From: Greg Kroah-Hartman Date: Sun, 3 Dec 2023 13:59:36 +0000 (+0100) Subject: 6.6-stable patches X-Git-Tag: v4.14.332~28 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=9fa8f556466b86dd91c6528f91770a271f51713c;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: acpi-video-use-acpi_video_device-for-cooling-dev-driver-data.patch bcache-revert-replacing-is_err_or_null-with-is_err.patch btrfs-add-dmesg-output-for-first-mount-and-last-unmount-of-a-filesystem.patch btrfs-fix-64bit-compat-send-ioctl-arguments-not-initializing-version-member.patch btrfs-fix-off-by-one-when-checking-chunk-map-includes-logical-address.patch btrfs-free-the-allocated-memory-if-btrfs_alloc_page_array-fails.patch btrfs-make-error-messages-more-clear-when-getting-a-chunk-map.patch btrfs-ref-verify-fix-memory-leaks-in-btrfs_ref_tree_mod.patch btrfs-send-ensure-send_fd-is-writable.patch cpufreq-amd-pstate-fix-the-return-value-of-amd_pstate_fast_switch.patch dma-buf-fix-check-in-dma_resv_add_fence.patch ext2-fix-ki_pos-update-for-dio-buffered-io-fallback-case.patch io_uring-don-t-allow-discontig-pages-for-ioring_setup_no_mmap.patch io_uring-don-t-guard-ioring_off_pbuf_ring-with-setup_no_mmap.patch io_uring-free-io_buffer_list-entries-via-rcu.patch iommu-avoid-more-races-around-device-probe.patch iommu-vt-d-add-mtl-to-quirk-list-to-skip-te-disabling.patch iommu-vt-d-fix-incorrect-cache-invalidation-for-mm-notification.patch kvm-ppc-book3s-hv-fix-kvm_run-clobbering-fp-vec-user-registers.patch nouveau-find-the-smallest-page-allocation-to-cover-a-buffer-alloc.patch parisc-drop-the-hp-ux-enosym-and-eremoterelease-error-codes.patch parisc-ensure-32-bit-alignment-on-parisc-unwind-section.patch parisc-mark-altinstructions-read-only-and-32-bit-aligned.patch parisc-mark-ex_table-entries-32-bit-aligned-in-assembly.h.patch parisc-mark-ex_table-entries-32-bit-aligned-in-uaccess.h.patch parisc-mark-jump_table-naturally-aligned.patch parisc-mark-lock_aligned-variables-16-byte-aligned-on-smp.patch parisc-use-natural-cpu-alignment-for-bug_table.patch powercap-dtpm-fix-unneeded-conversions-to-micro-watts.patch powerpc-don-t-clobber-f0-vs0-during-fp-altivec-register-save.patch r8169-fix-deadlock-on-rtl8125-in-jumbo-mtu-mode.patch --- diff --git a/queue-6.6/acpi-video-use-acpi_video_device-for-cooling-dev-driver-data.patch b/queue-6.6/acpi-video-use-acpi_video_device-for-cooling-dev-driver-data.patch new file mode 100644 index 00000000000..d167e60e517 --- /dev/null +++ b/queue-6.6/acpi-video-use-acpi_video_device-for-cooling-dev-driver-data.patch @@ -0,0 +1,115 @@ +From 172c48caed91a978bca078042222d09baea13717 Mon Sep 17 00:00:00 2001 +From: Hans de Goede +Date: Mon, 27 Nov 2023 15:37:41 +0100 +Subject: ACPI: video: Use acpi_video_device for cooling-dev driver data + +From: Hans de Goede + +commit 172c48caed91a978bca078042222d09baea13717 upstream. + +The acpi_video code was storing the acpi_video_device as driver_data +in the acpi_device children of the acpi_video_bus acpi_device. + +But the acpi_video driver only binds to the bus acpi_device. +It uses, but does not bind to, the children. Since it is not +the driver it should not be using the driver_data of the children's +acpi_device-s. + +Since commit 0d16710146a1 ("ACPI: bus: Set driver_data to NULL every +time .add() fails") the childen's driver_data ends up getting set +to NULL after a driver fails to bind to the children leading to a NULL +pointer deref in video_get_max_state when registering the cooling-dev: + +[ 3.148958] BUG: kernel NULL pointer dereference, address: 0000000000000090 + +[ 3.149015] Hardware name: Sony Corporation VPCSB2X9R/VAIO, BIOS R2087H4 06/15/2012 +[ 3.149021] RIP: 0010:video_get_max_state+0x17/0x30 [video] + +[ 3.149105] Call Trace: +[ 3.149110] +[ 3.149114] ? __die+0x23/0x70 +[ 3.149126] ? page_fault_oops+0x171/0x4e0 +[ 3.149137] ? exc_page_fault+0x7f/0x180 +[ 3.149147] ? asm_exc_page_fault+0x26/0x30 +[ 3.149158] ? video_get_max_state+0x17/0x30 [video 9b6f3f0d19d7b4a0e2df17a2d8b43bc19c2ed71f] +[ 3.149176] ? __pfx_video_get_max_state+0x10/0x10 [video 9b6f3f0d19d7b4a0e2df17a2d8b43bc19c2ed71f] +[ 3.149192] __thermal_cooling_device_register.part.0+0xf2/0x2f0 +[ 3.149205] acpi_video_bus_register_backlight.part.0.isra.0+0x414/0x570 [video 9b6f3f0d19d7b4a0e2df17a2d8b43bc19c2ed71f] +[ 3.149227] acpi_video_register_backlight+0x57/0x80 [video 9b6f3f0d19d7b4a0e2df17a2d8b43bc19c2ed71f] +[ 3.149245] intel_acpi_video_register+0x68/0x90 [i915 1f3a758130b32ef13d301d4f8f78c7d766d57f2a] +[ 3.149669] intel_display_driver_register+0x28/0x50 [i915 1f3a758130b32ef13d301d4f8f78c7d766d57f2a] +[ 3.150064] i915_driver_probe+0x790/0xb90 [i915 1f3a758130b32ef13d301d4f8f78c7d766d57f2a] +[ 3.150402] local_pci_probe+0x45/0xa0 +[ 3.150412] pci_device_probe+0xc1/0x260 + + +Fix this by directly using the acpi_video_device as devdata for +the cooling-device, which avoids the need to set driver-data on +the children at all. + +Fixes: 0d16710146a1 ("ACPI: bus: Set driver_data to NULL every time .add() fails") +Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9718 +Cc: 6.6+ # 6.6+ +Signed-off-by: Hans de Goede +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/acpi_video.c | 14 +++++--------- + 1 file changed, 5 insertions(+), 9 deletions(-) + +diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c +index d321ca7160d9..6cee536c229a 100644 +--- a/drivers/acpi/acpi_video.c ++++ b/drivers/acpi/acpi_video.c +@@ -253,8 +253,7 @@ static const struct backlight_ops acpi_backlight_ops = { + static int video_get_max_state(struct thermal_cooling_device *cooling_dev, + unsigned long *state) + { +- struct acpi_device *device = cooling_dev->devdata; +- struct acpi_video_device *video = acpi_driver_data(device); ++ struct acpi_video_device *video = cooling_dev->devdata; + + *state = video->brightness->count - ACPI_VIDEO_FIRST_LEVEL - 1; + return 0; +@@ -263,8 +262,7 @@ static int video_get_max_state(struct thermal_cooling_device *cooling_dev, + static int video_get_cur_state(struct thermal_cooling_device *cooling_dev, + unsigned long *state) + { +- struct acpi_device *device = cooling_dev->devdata; +- struct acpi_video_device *video = acpi_driver_data(device); ++ struct acpi_video_device *video = cooling_dev->devdata; + unsigned long long level; + int offset; + +@@ -283,8 +281,7 @@ static int video_get_cur_state(struct thermal_cooling_device *cooling_dev, + static int + video_set_cur_state(struct thermal_cooling_device *cooling_dev, unsigned long state) + { +- struct acpi_device *device = cooling_dev->devdata; +- struct acpi_video_device *video = acpi_driver_data(device); ++ struct acpi_video_device *video = cooling_dev->devdata; + int level; + + if (state >= video->brightness->count - ACPI_VIDEO_FIRST_LEVEL) +@@ -1125,7 +1122,6 @@ static int acpi_video_bus_get_one_device(struct acpi_device *device, void *arg) + + strcpy(acpi_device_name(device), ACPI_VIDEO_DEVICE_NAME); + strcpy(acpi_device_class(device), ACPI_VIDEO_CLASS); +- device->driver_data = data; + + data->device_id = device_id; + data->video = video; +@@ -1747,8 +1743,8 @@ static void acpi_video_dev_register_backlight(struct acpi_video_device *device) + device->backlight->props.brightness = + acpi_video_get_brightness(device->backlight); + +- device->cooling_dev = thermal_cooling_device_register("LCD", +- device->dev, &video_cooling_ops); ++ device->cooling_dev = thermal_cooling_device_register("LCD", device, ++ &video_cooling_ops); + if (IS_ERR(device->cooling_dev)) { + /* + * Set cooling_dev to NULL so we don't crash trying to free it. +-- +2.43.0 + diff --git a/queue-6.6/bcache-revert-replacing-is_err_or_null-with-is_err.patch b/queue-6.6/bcache-revert-replacing-is_err_or_null-with-is_err.patch new file mode 100644 index 00000000000..78ecd0c00ba --- /dev/null +++ b/queue-6.6/bcache-revert-replacing-is_err_or_null-with-is_err.patch @@ -0,0 +1,72 @@ +From bb6cc253861bd5a7cf8439e2118659696df9619f Mon Sep 17 00:00:00 2001 +From: Markus Weippert +Date: Fri, 24 Nov 2023 16:14:37 +0100 +Subject: bcache: revert replacing IS_ERR_OR_NULL with IS_ERR + +From: Markus Weippert + +commit bb6cc253861bd5a7cf8439e2118659696df9619f upstream. + +Commit 028ddcac477b ("bcache: Remove unnecessary NULL point check in +node allocations") replaced IS_ERR_OR_NULL by IS_ERR. This leads to a +NULL pointer dereference. + +BUG: kernel NULL pointer dereference, address: 0000000000000080 +Call Trace: + ? __die_body.cold+0x1a/0x1f + ? page_fault_oops+0xd2/0x2b0 + ? exc_page_fault+0x70/0x170 + ? asm_exc_page_fault+0x22/0x30 + ? btree_node_free+0xf/0x160 [bcache] + ? up_write+0x32/0x60 + btree_gc_coalesce+0x2aa/0x890 [bcache] + ? bch_extent_bad+0x70/0x170 [bcache] + btree_gc_recurse+0x130/0x390 [bcache] + ? btree_gc_mark_node+0x72/0x230 [bcache] + bch_btree_gc+0x5da/0x600 [bcache] + ? cpuusage_read+0x10/0x10 + ? bch_btree_gc+0x600/0x600 [bcache] + bch_gc_thread+0x135/0x180 [bcache] + +The relevant code starts with: + + new_nodes[0] = NULL; + + for (i = 0; i < nodes; i++) { + if (__bch_keylist_realloc(&keylist, bkey_u64s(&r[i].b->key))) + goto out_nocoalesce; + // ... +out_nocoalesce: + // ... + for (i = 0; i < nodes; i++) + if (!IS_ERR(new_nodes[i])) { // IS_ERR_OR_NULL before +028ddcac477b + btree_node_free(new_nodes[i]); // new_nodes[0] is NULL + rw_unlock(true, new_nodes[i]); + } + +This patch replaces IS_ERR() by IS_ERR_OR_NULL() to fix this. + +Fixes: 028ddcac477b ("bcache: Remove unnecessary NULL point check in node allocations") +Link: https://lore.kernel.org/all/3DF4A87A-2AC1-4893-AE5F-E921478419A9@suse.de/ +Cc: stable@vger.kernel.org +Cc: Zheng Wang +Cc: Coly Li +Signed-off-by: Markus Weippert +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/bcache/btree.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/md/bcache/btree.c ++++ b/drivers/md/bcache/btree.c +@@ -1510,7 +1510,7 @@ out_nocoalesce: + bch_keylist_free(&keylist); + + for (i = 0; i < nodes; i++) +- if (!IS_ERR(new_nodes[i])) { ++ if (!IS_ERR_OR_NULL(new_nodes[i])) { + btree_node_free(new_nodes[i]); + rw_unlock(true, new_nodes[i]); + } diff --git a/queue-6.6/btrfs-add-dmesg-output-for-first-mount-and-last-unmount-of-a-filesystem.patch b/queue-6.6/btrfs-add-dmesg-output-for-first-mount-and-last-unmount-of-a-filesystem.patch new file mode 100644 index 00000000000..1e9bf9220dd --- /dev/null +++ b/queue-6.6/btrfs-add-dmesg-output-for-first-mount-and-last-unmount-of-a-filesystem.patch @@ -0,0 +1,74 @@ +From 2db313205f8b96eea467691917138d646bb50aef Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Thu, 2 Nov 2023 07:54:50 +1030 +Subject: btrfs: add dmesg output for first mount and last unmount of a filesystem + +From: Qu Wenruo + +commit 2db313205f8b96eea467691917138d646bb50aef upstream. + +There is a feature request to add dmesg output when unmounting a btrfs. +There are several alternative methods to do the same thing, but with +their own problems: + +- Use eBPF to watch btrfs_put_super()/open_ctree() + Not end user friendly, they have to dip their head into the source + code. + +- Watch for directory /sys/fs// + This is way more simple, but still requires some simple device -> uuid + lookups. And a script needs to use inotify to watch /sys/fs/. + +Compared to all these, directly outputting the information into dmesg +would be the most simple one, with both device and UUID included. + +And since we're here, also add the output when mounting a filesystem for +the first time for parity. A more fine grained monitoring of subvolume +mounts should be done by another layer, like audit. + +Now mounting a btrfs with all default mkfs options would look like this: + + [81.906566] BTRFS info (device dm-8): first mount of filesystem 633b5c16-afe3-4b79-b195-138fe145e4f2 + [81.907494] BTRFS info (device dm-8): using crc32c (crc32c-intel) checksum algorithm + [81.908258] BTRFS info (device dm-8): using free space tree + [81.912644] BTRFS info (device dm-8): auto enabling async discard + [81.913277] BTRFS info (device dm-8): checking UUID tree + [91.668256] BTRFS info (device dm-8): last unmount of filesystem 633b5c16-afe3-4b79-b195-138fe145e4f2 + +CC: stable@vger.kernel.org # 5.4+ +Link: https://github.com/kdave/btrfs-progs/issues/689 +Reviewed-by: Anand Jain +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +[ update changelog ] +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/disk-io.c | 1 + + fs/btrfs/super.c | 5 ++++- + 2 files changed, 5 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -3197,6 +3197,7 @@ int __cold open_ctree(struct super_block + goto fail_alloc; + } + ++ btrfs_info(fs_info, "first mount of filesystem %pU", disk_super->fsid); + /* + * Verify the type first, if that or the checksum value are + * corrupted, we'll find out +--- a/fs/btrfs/super.c ++++ b/fs/btrfs/super.c +@@ -79,7 +79,10 @@ static int btrfs_remount(struct super_bl + + static void btrfs_put_super(struct super_block *sb) + { +- close_ctree(btrfs_sb(sb)); ++ struct btrfs_fs_info *fs_info = btrfs_sb(sb); ++ ++ btrfs_info(fs_info, "last unmount of filesystem %pU", fs_info->fs_devices->fsid); ++ close_ctree(fs_info); + } + + enum { diff --git a/queue-6.6/btrfs-fix-64bit-compat-send-ioctl-arguments-not-initializing-version-member.patch b/queue-6.6/btrfs-fix-64bit-compat-send-ioctl-arguments-not-initializing-version-member.patch new file mode 100644 index 00000000000..ae355b55bac --- /dev/null +++ b/queue-6.6/btrfs-fix-64bit-compat-send-ioctl-arguments-not-initializing-version-member.patch @@ -0,0 +1,36 @@ +From 5de0434bc064606d6b7467ec3e5ad22963a18c04 Mon Sep 17 00:00:00 2001 +From: David Sterba +Date: Tue, 14 Nov 2023 17:44:11 +0100 +Subject: btrfs: fix 64bit compat send ioctl arguments not initializing version member + +From: David Sterba + +commit 5de0434bc064606d6b7467ec3e5ad22963a18c04 upstream. + +When the send protocol versioning was added in 5.16 e77fbf990316 +("btrfs: send: prepare for v2 protocol"), the 32/64bit compat code was +not updated (added by 2351f431f727 ("btrfs: fix send ioctl on 32bit with +64bit kernel")), missing the version struct member. The compat code is +probably rarely used, nobody reported any bugs. + +Found by tool https://github.com/jirislaby/clang-struct . + +Fixes: e77fbf990316 ("btrfs: send: prepare for v2 protocol") +CC: stable@vger.kernel.org # 6.1+ +Reviewed-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -4351,6 +4351,7 @@ static int _btrfs_ioctl_send(struct inod + arg->clone_sources = compat_ptr(args32.clone_sources); + arg->parent_root = args32.parent_root; + arg->flags = args32.flags; ++ arg->version = args32.version; + memcpy(arg->reserved, args32.reserved, + sizeof(args32.reserved)); + #else diff --git a/queue-6.6/btrfs-fix-off-by-one-when-checking-chunk-map-includes-logical-address.patch b/queue-6.6/btrfs-fix-off-by-one-when-checking-chunk-map-includes-logical-address.patch new file mode 100644 index 00000000000..ae6b3f8ba0b --- /dev/null +++ b/queue-6.6/btrfs-fix-off-by-one-when-checking-chunk-map-includes-logical-address.patch @@ -0,0 +1,43 @@ +From 5fba5a571858ce2d787fdaf55814e42725bfa895 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Tue, 21 Nov 2023 13:38:32 +0000 +Subject: btrfs: fix off-by-one when checking chunk map includes logical address + +From: Filipe Manana + +commit 5fba5a571858ce2d787fdaf55814e42725bfa895 upstream. + +At btrfs_get_chunk_map() we get the extent map for the chunk that contains +the given logical address stored in the 'logical' argument. Then we do +sanity checks to verify the extent map contains the logical address. One +of these checks verifies if the extent map covers a range with an end +offset behind the target logical address - however this check has an +off-by-one error since it will consider an extent map whose start offset +plus its length matches the target logical address as inclusive, while +the fact is that the last byte it covers is behind the target logical +address (by 1). + +So fix this condition by using '<=' rather than '<' when comparing the +extent map's "start + length" against the target logical address. + +CC: stable@vger.kernel.org # 4.14+ +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/volumes.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -3050,7 +3050,7 @@ struct extent_map *btrfs_get_chunk_map(s + return ERR_PTR(-EINVAL); + } + +- if (em->start > logical || em->start + em->len < logical) { ++ if (em->start > logical || em->start + em->len <= logical) { + btrfs_crit(fs_info, + "found a bad mapping, wanted %llu-%llu, found %llu-%llu", + logical, length, em->start, em->start + em->len); diff --git a/queue-6.6/btrfs-free-the-allocated-memory-if-btrfs_alloc_page_array-fails.patch b/queue-6.6/btrfs-free-the-allocated-memory-if-btrfs_alloc_page_array-fails.patch new file mode 100644 index 00000000000..43e7516460c --- /dev/null +++ b/queue-6.6/btrfs-free-the-allocated-memory-if-btrfs_alloc_page_array-fails.patch @@ -0,0 +1,84 @@ +From 94dbf7c0871f7ae6349ba4b0341ce8f5f98a071d Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Fri, 24 Nov 2023 14:53:50 +1030 +Subject: btrfs: free the allocated memory if btrfs_alloc_page_array() fails + +From: Qu Wenruo + +commit 94dbf7c0871f7ae6349ba4b0341ce8f5f98a071d upstream. + +[BUG] +If btrfs_alloc_page_array() fail to allocate all pages but part of the +slots, then the partially allocated pages would be leaked in function +btrfs_submit_compressed_read(). + +[CAUSE] +As explicitly stated, if btrfs_alloc_page_array() returned -ENOMEM, +caller is responsible to free the partially allocated pages. + +For the existing call sites, most of them are fine: + +- btrfs_raid_bio::stripe_pages + Handled by free_raid_bio(). + +- extent_buffer::pages[] + Handled btrfs_release_extent_buffer_pages(). + +- scrub_stripe::pages[] + Handled by release_scrub_stripe(). + +But there is one exception in btrfs_submit_compressed_read(), if +btrfs_alloc_page_array() failed, we didn't cleanup the array and freed +the array pointer directly. + +Initially there is still the error handling in commit dd137dd1f2d7 +("btrfs: factor out allocating an array of pages"), but later in commit +544fe4a903ce ("btrfs: embed a btrfs_bio into struct compressed_bio"), +the error handling is removed, leading to the possible memory leak. + +[FIX] +This patch would add back the error handling first, then to prevent such +situation from happening again, also +Make btrfs_alloc_page_array() to free the allocated pages as a extra +safety net, then we don't need to add the error handling to +btrfs_submit_compressed_read(). + +Fixes: 544fe4a903ce ("btrfs: embed a btrfs_bio into struct compressed_bio") +CC: stable@vger.kernel.org # 6.4+ +Reviewed-by: Filipe Manana +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/extent_io.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -675,8 +675,8 @@ static void end_bio_extent_readpage(stru + * the array will be skipped + * + * Return: 0 if all pages were able to be allocated; +- * -ENOMEM otherwise, and the caller is responsible for freeing all +- * non-null page pointers in the array. ++ * -ENOMEM otherwise, the partially allocated pages would be freed and ++ * the array slots zeroed + */ + int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array) + { +@@ -695,8 +695,13 @@ int btrfs_alloc_page_array(unsigned int + * though alloc_pages_bulk_array() falls back to alloc_page() + * if it could not bulk-allocate. So we must be out of memory. + */ +- if (allocated == last) ++ if (allocated == last) { ++ for (int i = 0; i < allocated; i++) { ++ __free_page(page_array[i]); ++ page_array[i] = NULL; ++ } + return -ENOMEM; ++ } + + memalloc_retry_wait(GFP_NOFS); + } diff --git a/queue-6.6/btrfs-make-error-messages-more-clear-when-getting-a-chunk-map.patch b/queue-6.6/btrfs-make-error-messages-more-clear-when-getting-a-chunk-map.patch new file mode 100644 index 00000000000..229281ac8bd --- /dev/null +++ b/queue-6.6/btrfs-make-error-messages-more-clear-when-getting-a-chunk-map.patch @@ -0,0 +1,50 @@ +From 7d410d5efe04e42a6cd959bfe6d59d559fdf8b25 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Tue, 21 Nov 2023 13:38:33 +0000 +Subject: btrfs: make error messages more clear when getting a chunk map + +From: Filipe Manana + +commit 7d410d5efe04e42a6cd959bfe6d59d559fdf8b25 upstream. + +When getting a chunk map, at btrfs_get_chunk_map(), we do some sanity +checks to verify we found a chunk map and that map found covers the +logical address the caller passed in. However the messages aren't very +clear in the sense that don't mention the issue is with a chunk map and +one of them prints the 'length' argument as if it were the end offset of +the requested range (while the in the string format we use %llu-%llu +which suggests a range, and the second %llu-%llu is actually a range for +the chunk map). So improve these two details in the error messages. + +CC: stable@vger.kernel.org # 5.4+ +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/volumes.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -3045,15 +3045,16 @@ struct extent_map *btrfs_get_chunk_map(s + read_unlock(&em_tree->lock); + + if (!em) { +- btrfs_crit(fs_info, "unable to find logical %llu length %llu", ++ btrfs_crit(fs_info, ++ "unable to find chunk map for logical %llu length %llu", + logical, length); + return ERR_PTR(-EINVAL); + } + + if (em->start > logical || em->start + em->len <= logical) { + btrfs_crit(fs_info, +- "found a bad mapping, wanted %llu-%llu, found %llu-%llu", +- logical, length, em->start, em->start + em->len); ++ "found a bad chunk map, wanted %llu-%llu, found %llu-%llu", ++ logical, logical + length, em->start, em->start + em->len); + free_extent_map(em); + return ERR_PTR(-EINVAL); + } diff --git a/queue-6.6/btrfs-ref-verify-fix-memory-leaks-in-btrfs_ref_tree_mod.patch b/queue-6.6/btrfs-ref-verify-fix-memory-leaks-in-btrfs_ref_tree_mod.patch new file mode 100644 index 00000000000..946266c1d24 --- /dev/null +++ b/queue-6.6/btrfs-ref-verify-fix-memory-leaks-in-btrfs_ref_tree_mod.patch @@ -0,0 +1,48 @@ +From f91192cd68591c6b037da345bc9fcd5e50540358 Mon Sep 17 00:00:00 2001 +From: Bragatheswaran Manickavel +Date: Sat, 18 Nov 2023 14:40:12 +0530 +Subject: btrfs: ref-verify: fix memory leaks in btrfs_ref_tree_mod() + +From: Bragatheswaran Manickavel + +commit f91192cd68591c6b037da345bc9fcd5e50540358 upstream. + +In btrfs_ref_tree_mod(), when !parent 're' was allocated through +kmalloc(). In the following code, if an error occurs, the execution will +be redirected to 'out' or 'out_unlock' and the function will be exited. +However, on some of the paths, 're' are not deallocated and may lead to +memory leaks. + +For example: lookup_block_entry() for 'be' returns NULL, the out label +will be invoked. During that flow ref and 'ra' are freed but not 're', +which can potentially lead to a memory leak. + +CC: stable@vger.kernel.org # 5.10+ +Reported-and-tested-by: syzbot+d66de4cbf532749df35f@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=d66de4cbf532749df35f +Signed-off-by: Bragatheswaran Manickavel +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ref-verify.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/btrfs/ref-verify.c ++++ b/fs/btrfs/ref-verify.c +@@ -791,6 +791,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_i + dump_ref_action(fs_info, ra); + kfree(ref); + kfree(ra); ++ kfree(re); + goto out_unlock; + } else if (be->num_refs == 0) { + btrfs_err(fs_info, +@@ -800,6 +801,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_i + dump_ref_action(fs_info, ra); + kfree(ref); + kfree(ra); ++ kfree(re); + goto out_unlock; + } + diff --git a/queue-6.6/btrfs-send-ensure-send_fd-is-writable.patch b/queue-6.6/btrfs-send-ensure-send_fd-is-writable.patch new file mode 100644 index 00000000000..3681c7c7d21 --- /dev/null +++ b/queue-6.6/btrfs-send-ensure-send_fd-is-writable.patch @@ -0,0 +1,44 @@ +From 0ac1d13a55eb37d398b63e6ff6db4a09a2c9128c Mon Sep 17 00:00:00 2001 +From: Jann Horn +Date: Fri, 24 Nov 2023 17:48:31 +0100 +Subject: btrfs: send: ensure send_fd is writable + +From: Jann Horn + +commit 0ac1d13a55eb37d398b63e6ff6db4a09a2c9128c upstream. + +kernel_write() requires the caller to ensure that the file is writable. +Let's do that directly after looking up the ->send_fd. + +We don't need a separate bailout path because the "out" path already +does fput() if ->send_filp is non-NULL. + +This has no security impact for two reasons: + + - the ioctl requires CAP_SYS_ADMIN + - __kernel_write() bails out on read-only files - but only since 5.8, + see commit a01ac27be472 ("fs: check FMODE_WRITE in __kernel_write") + +Reported-and-tested-by: syzbot+12e098239d20385264d3@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=12e098239d20385264d3 +Fixes: 31db9f7c23fb ("Btrfs: introduce BTRFS_IOC_SEND for btrfs send/receive") +CC: stable@vger.kernel.org # 4.14+ +Signed-off-by: Jann Horn +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/send.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/send.c ++++ b/fs/btrfs/send.c +@@ -8158,7 +8158,7 @@ long btrfs_ioctl_send(struct inode *inod + } + + sctx->send_filp = fget(arg->send_fd); +- if (!sctx->send_filp) { ++ if (!sctx->send_filp || !(sctx->send_filp->f_mode & FMODE_WRITE)) { + ret = -EBADF; + goto out; + } diff --git a/queue-6.6/cpufreq-amd-pstate-fix-the-return-value-of-amd_pstate_fast_switch.patch b/queue-6.6/cpufreq-amd-pstate-fix-the-return-value-of-amd_pstate_fast_switch.patch new file mode 100644 index 00000000000..a4ed895e371 --- /dev/null +++ b/queue-6.6/cpufreq-amd-pstate-fix-the-return-value-of-amd_pstate_fast_switch.patch @@ -0,0 +1,43 @@ +From bb87be267b8ee9b40917fb5bf51be5ddb33c37c2 Mon Sep 17 00:00:00 2001 +From: "Gautham R. Shenoy" +Date: Mon, 27 Nov 2023 16:41:21 +0530 +Subject: cpufreq/amd-pstate: Fix the return value of amd_pstate_fast_switch() + +From: Gautham R. Shenoy + +commit bb87be267b8ee9b40917fb5bf51be5ddb33c37c2 upstream. + +cpufreq_driver->fast_switch() callback expects a frequency as a return +value. amd_pstate_fast_switch() was returning the return value of +amd_pstate_update_freq(), which only indicates a success or failure. + +Fix this by making amd_pstate_fast_switch() return the target_freq +when the call to amd_pstate_update_freq() is successful, and return +the current frequency from policy->cur when the call to +amd_pstate_update_freq() is unsuccessful. + +Fixes: 4badf2eb1e98 ("cpufreq: amd-pstate: Add ->fast_switch() callback") +Acked-by: Huang Rui +Reviewed-by: Wyes Karny +Reviewed-by: Perry Yuan +Cc: 6.4+ # v6.4+ +Signed-off-by: Gautham R. Shenoy +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cpufreq/amd-pstate.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/cpufreq/amd-pstate.c ++++ b/drivers/cpufreq/amd-pstate.c +@@ -518,7 +518,9 @@ static int amd_pstate_target(struct cpuf + static unsigned int amd_pstate_fast_switch(struct cpufreq_policy *policy, + unsigned int target_freq) + { +- return amd_pstate_update_freq(policy, target_freq, true); ++ if (!amd_pstate_update_freq(policy, target_freq, true)) ++ return target_freq; ++ return policy->cur; + } + + static void amd_pstate_adjust_perf(unsigned int cpu, diff --git a/queue-6.6/dma-buf-fix-check-in-dma_resv_add_fence.patch b/queue-6.6/dma-buf-fix-check-in-dma_resv_add_fence.patch new file mode 100644 index 00000000000..6c04a3fe9dc --- /dev/null +++ b/queue-6.6/dma-buf-fix-check-in-dma_resv_add_fence.patch @@ -0,0 +1,61 @@ +From 95ba893c9f4feb836ddce627efd0bb6af6667031 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= +Date: Tue, 14 Nov 2023 13:37:09 +0100 +Subject: dma-buf: fix check in dma_resv_add_fence +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Christian König + +commit 95ba893c9f4feb836ddce627efd0bb6af6667031 upstream. + +It's valid to add the same fence multiple times to a dma-resv object and +we shouldn't need one extra slot for each. + +Signed-off-by: Christian König +Reviewed-by: Thomas Hellström +Fixes: a3f7c10a269d5 ("dma-buf/dma-resv: check if the new fence is really later") +Cc: stable@vger.kernel.org # v5.19+ +Link: https://patchwork.freedesktop.org/patch/msgid/20231115093035.1889-1-christian.koenig@amd.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/dma-buf/dma-resv.c | 2 +- + include/linux/dma-fence.h | 15 +++++++++++++++ + 2 files changed, 16 insertions(+), 1 deletion(-) + +--- a/drivers/dma-buf/dma-resv.c ++++ b/drivers/dma-buf/dma-resv.c +@@ -301,7 +301,7 @@ void dma_resv_add_fence(struct dma_resv + + dma_resv_list_entry(fobj, i, obj, &old, &old_usage); + if ((old->context == fence->context && old_usage >= usage && +- dma_fence_is_later(fence, old)) || ++ dma_fence_is_later_or_same(fence, old)) || + dma_fence_is_signaled(old)) { + dma_resv_list_set(fobj, i, fence, usage); + dma_fence_put(old); +--- a/include/linux/dma-fence.h ++++ b/include/linux/dma-fence.h +@@ -499,6 +499,21 @@ static inline bool dma_fence_is_later(st + } + + /** ++ * dma_fence_is_later_or_same - return true if f1 is later or same as f2 ++ * @f1: the first fence from the same context ++ * @f2: the second fence from the same context ++ * ++ * Returns true if f1 is chronologically later than f2 or the same fence. Both ++ * fences must be from the same context, since a seqno is not re-used across ++ * contexts. ++ */ ++static inline bool dma_fence_is_later_or_same(struct dma_fence *f1, ++ struct dma_fence *f2) ++{ ++ return f1 == f2 || dma_fence_is_later(f1, f2); ++} ++ ++/** + * dma_fence_later - return the chronologically later fence + * @f1: the first fence from the same context + * @f2: the second fence from the same context diff --git a/queue-6.6/ext2-fix-ki_pos-update-for-dio-buffered-io-fallback-case.patch b/queue-6.6/ext2-fix-ki_pos-update-for-dio-buffered-io-fallback-case.patch new file mode 100644 index 00000000000..ccc54947110 --- /dev/null +++ b/queue-6.6/ext2-fix-ki_pos-update-for-dio-buffered-io-fallback-case.patch @@ -0,0 +1,42 @@ +From 8abc712ea4867a81c860853048f24e511bbc20f2 Mon Sep 17 00:00:00 2001 +From: "Ritesh Harjani (IBM)" +Date: Wed, 22 Nov 2023 14:32:15 +0530 +Subject: ext2: Fix ki_pos update for DIO buffered-io fallback case + +From: Ritesh Harjani (IBM) + +commit 8abc712ea4867a81c860853048f24e511bbc20f2 upstream. + +Commit "filemap: update ki_pos in generic_perform_write", made updating +of ki_pos into common code in generic_perform_write() function. +This also causes generic/091 to fail. +This happened due to an in-flight collision with: +fb5de4358e1a ("ext2: Move direct-io to use iomap"). I have chosen fixes tag +based on which commit got landed later to upstream kernel. + +Fixes: 182c25e9c157 ("filemap: update ki_pos in generic_perform_write") +Cc: stable@vger.kernel.org +Reviewed-by: Christoph Hellwig +Signed-off-by: Ritesh Harjani (IBM) +Signed-off-by: Jan Kara +Message-Id: +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext2/file.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/fs/ext2/file.c b/fs/ext2/file.c +index 1039e5bf90af..4ddc36f4dbd4 100644 +--- a/fs/ext2/file.c ++++ b/fs/ext2/file.c +@@ -258,7 +258,6 @@ static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) + goto out_unlock; + } + +- iocb->ki_pos += status; + ret += status; + endbyte = pos + status - 1; + ret2 = filemap_write_and_wait_range(inode->i_mapping, pos, +-- +2.43.0 + diff --git a/queue-6.6/io_uring-don-t-allow-discontig-pages-for-ioring_setup_no_mmap.patch b/queue-6.6/io_uring-don-t-allow-discontig-pages-for-ioring_setup_no_mmap.patch new file mode 100644 index 00000000000..d69c4986e13 --- /dev/null +++ b/queue-6.6/io_uring-don-t-allow-discontig-pages-for-ioring_setup_no_mmap.patch @@ -0,0 +1,89 @@ +From 820d070feb668aab5bc9413c285a1dda2a70e076 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Fri, 24 Nov 2023 21:02:01 -0700 +Subject: io_uring: don't allow discontig pages for IORING_SETUP_NO_MMAP + +From: Jens Axboe + +commit 820d070feb668aab5bc9413c285a1dda2a70e076 upstream. + +io_sqes_map() is used rather than io_mem_alloc(), if the application +passes in memory for mapping rather than have the kernel allocate it and +then mmap(2) the ranges. This then calls __io_uaddr_map() to perform the +page mapping and pinning, which checks if we end up with the same pages, +if more than one page is mapped. But this check is incorrect and only +checks if the first and last pages are the same, where it really should +be checking if the mapped pages are contigous. This allows mapping a +single normal page, or a huge page range. + +Down the line we can add support for remapping pages to be virtually +contigous, which is really all that io_uring cares about. + +Cc: stable@vger.kernel.org +Fixes: 03d89a2de25b ("io_uring: support for user allocated memory for rings/sqes") +Reported-by: Jann Horn +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 41 ++++++++++++++++++++++------------------- + 1 file changed, 22 insertions(+), 19 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -2690,6 +2690,7 @@ static void *__io_uaddr_map(struct page + { + struct page **page_array; + unsigned int nr_pages; ++ void *page_addr; + int ret, i; + + *npages = 0; +@@ -2711,27 +2712,29 @@ err: + io_pages_free(&page_array, ret > 0 ? ret : 0); + return ret < 0 ? ERR_PTR(ret) : ERR_PTR(-EFAULT); + } +- /* +- * Should be a single page. If the ring is small enough that we can +- * use a normal page, that is fine. If we need multiple pages, then +- * userspace should use a huge page. That's the only way to guarantee +- * that we get contigious memory, outside of just being lucky or +- * (currently) having low memory fragmentation. +- */ +- if (page_array[0] != page_array[ret - 1]) +- goto err; +- +- /* +- * Can't support mapping user allocated ring memory on 32-bit archs +- * where it could potentially reside in highmem. Just fail those with +- * -EINVAL, just like we did on kernels that didn't support this +- * feature. +- */ ++ ++ page_addr = page_address(page_array[0]); + for (i = 0; i < nr_pages; i++) { +- if (PageHighMem(page_array[i])) { +- ret = -EINVAL; ++ ret = -EINVAL; ++ ++ /* ++ * Can't support mapping user allocated ring memory on 32-bit ++ * archs where it could potentially reside in highmem. Just ++ * fail those with -EINVAL, just like we did on kernels that ++ * didn't support this feature. ++ */ ++ if (PageHighMem(page_array[i])) ++ goto err; ++ ++ /* ++ * No support for discontig pages for now, should either be a ++ * single normal page, or a huge page. Later on we can add ++ * support for remapping discontig pages, for now we will ++ * just fail them with EINVAL. ++ */ ++ if (page_address(page_array[i]) != page_addr) + goto err; +- } ++ page_addr += PAGE_SIZE; + } + + *pages = page_array; diff --git a/queue-6.6/io_uring-don-t-guard-ioring_off_pbuf_ring-with-setup_no_mmap.patch b/queue-6.6/io_uring-don-t-guard-ioring_off_pbuf_ring-with-setup_no_mmap.patch new file mode 100644 index 00000000000..a3c95fb0134 --- /dev/null +++ b/queue-6.6/io_uring-don-t-guard-ioring_off_pbuf_ring-with-setup_no_mmap.patch @@ -0,0 +1,46 @@ +From 6f007b1406637d3d73d42e41d7e8d9b245185e69 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Mon, 27 Nov 2023 17:08:19 -0700 +Subject: io_uring: don't guard IORING_OFF_PBUF_RING with SETUP_NO_MMAP + +From: Jens Axboe + +commit 6f007b1406637d3d73d42e41d7e8d9b245185e69 upstream. + +This flag only applies to the SQ and CQ rings, it's perfectly valid +to use a mmap approach for the provided ring buffers. Move the +check into where it belongs. + +Cc: stable@vger.kernel.org +Fixes: 03d89a2de25b ("io_uring: support for user allocated memory for rings/sqes") +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -3436,16 +3436,18 @@ static void *io_uring_validate_mmap_requ + struct page *page; + void *ptr; + +- /* Don't allow mmap if the ring was setup without it */ +- if (ctx->flags & IORING_SETUP_NO_MMAP) +- return ERR_PTR(-EINVAL); +- + switch (offset & IORING_OFF_MMAP_MASK) { + case IORING_OFF_SQ_RING: + case IORING_OFF_CQ_RING: ++ /* Don't allow mmap if the ring was setup without it */ ++ if (ctx->flags & IORING_SETUP_NO_MMAP) ++ return ERR_PTR(-EINVAL); + ptr = ctx->rings; + break; + case IORING_OFF_SQES: ++ /* Don't allow mmap if the ring was setup without it */ ++ if (ctx->flags & IORING_SETUP_NO_MMAP) ++ return ERR_PTR(-EINVAL); + ptr = ctx->sq_sqes; + break; + case IORING_OFF_PBUF_RING: { diff --git a/queue-6.6/io_uring-free-io_buffer_list-entries-via-rcu.patch b/queue-6.6/io_uring-free-io_buffer_list-entries-via-rcu.patch new file mode 100644 index 00000000000..f56f7f0f4a1 --- /dev/null +++ b/queue-6.6/io_uring-free-io_buffer_list-entries-via-rcu.patch @@ -0,0 +1,267 @@ +From 5cf4f52e6d8aa2d3b7728f568abbf9d42a3af252 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Mon, 27 Nov 2023 17:54:40 -0700 +Subject: io_uring: free io_buffer_list entries via RCU + +From: Jens Axboe + +commit 5cf4f52e6d8aa2d3b7728f568abbf9d42a3af252 upstream. + +mmap_lock nests under uring_lock out of necessity, as we may be doing +user copies with uring_lock held. However, for mmap of provided buffer +rings, we attempt to grab uring_lock with mmap_lock already held from +do_mmap(). This makes lockdep, rightfully, complain: + +WARNING: possible circular locking dependency detected +6.7.0-rc1-00009-gff3337ebaf94-dirty #4438 Not tainted +------------------------------------------------------ +buf-ring.t/442 is trying to acquire lock: +ffff00020e1480a8 (&ctx->uring_lock){+.+.}-{3:3}, at: io_uring_validate_mmap_request.isra.0+0x4c/0x140 + +but task is already holding lock: +ffff0000dc226190 (&mm->mmap_lock){++++}-{3:3}, at: vm_mmap_pgoff+0x124/0x264 + +which lock already depends on the new lock. + +the existing dependency chain (in reverse order) is: + +-> #1 (&mm->mmap_lock){++++}-{3:3}: + __might_fault+0x90/0xbc + io_register_pbuf_ring+0x94/0x488 + __arm64_sys_io_uring_register+0x8dc/0x1318 + invoke_syscall+0x5c/0x17c + el0_svc_common.constprop.0+0x108/0x130 + do_el0_svc+0x2c/0x38 + el0_svc+0x4c/0x94 + el0t_64_sync_handler+0x118/0x124 + el0t_64_sync+0x168/0x16c + +-> #0 (&ctx->uring_lock){+.+.}-{3:3}: + __lock_acquire+0x19a0/0x2d14 + lock_acquire+0x2e0/0x44c + __mutex_lock+0x118/0x564 + mutex_lock_nested+0x20/0x28 + io_uring_validate_mmap_request.isra.0+0x4c/0x140 + io_uring_mmu_get_unmapped_area+0x3c/0x98 + get_unmapped_area+0xa4/0x158 + do_mmap+0xec/0x5b4 + vm_mmap_pgoff+0x158/0x264 + ksys_mmap_pgoff+0x1d4/0x254 + __arm64_sys_mmap+0x80/0x9c + invoke_syscall+0x5c/0x17c + el0_svc_common.constprop.0+0x108/0x130 + do_el0_svc+0x2c/0x38 + el0_svc+0x4c/0x94 + el0t_64_sync_handler+0x118/0x124 + el0t_64_sync+0x168/0x16c + +From that mmap(2) path, we really just need to ensure that the buffer +list doesn't go away from underneath us. For the lower indexed entries, +they never go away until the ring is freed and we can always sanely +reference those as long as the caller has a file reference. For the +higher indexed ones in our xarray, we just need to ensure that the +buffer list remains valid while we return the address of it. + +Free the higher indexed io_buffer_list entries via RCU. With that we can +avoid needing ->uring_lock inside mmap(2), and simply hold the RCU read +lock around the buffer list lookup and address check. + +To ensure that the arrayed lookup either returns a valid fully formulated +entry via RCU lookup, add an 'is_ready' flag that we access with store +and release memory ordering. This isn't needed for the xarray lookups, +but doesn't hurt either. Since this isn't a fast path, retain it across +both types. Similarly, for the allocated array inside the ctx, ensure +we use the proper load/acquire as setup could in theory be running in +parallel with mmap. + +While in there, add a few lockdep checks for documentation purposes. + +Cc: stable@vger.kernel.org +Fixes: c56e022c0a27 ("io_uring: add support for user mapped provided buffer ring") +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 4 +-- + io_uring/kbuf.c | 64 +++++++++++++++++++++++++++++++++++++++++----------- + io_uring/kbuf.h | 3 ++ + 3 files changed, 56 insertions(+), 15 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -3452,9 +3452,9 @@ static void *io_uring_validate_mmap_requ + unsigned int bgid; + + bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT; +- mutex_lock(&ctx->uring_lock); ++ rcu_read_lock(); + ptr = io_pbuf_get_address(ctx, bgid); +- mutex_unlock(&ctx->uring_lock); ++ rcu_read_unlock(); + if (!ptr) + return ERR_PTR(-EINVAL); + break; +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -31,19 +31,35 @@ struct io_provide_buf { + __u16 bid; + }; + ++static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx, ++ struct io_buffer_list *bl, ++ unsigned int bgid) ++{ ++ if (bl && bgid < BGID_ARRAY) ++ return &bl[bgid]; ++ ++ return xa_load(&ctx->io_bl_xa, bgid); ++} ++ + static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, + unsigned int bgid) + { +- if (ctx->io_bl && bgid < BGID_ARRAY) +- return &ctx->io_bl[bgid]; ++ lockdep_assert_held(&ctx->uring_lock); + +- return xa_load(&ctx->io_bl_xa, bgid); ++ return __io_buffer_get_list(ctx, ctx->io_bl, bgid); + } + + static int io_buffer_add_list(struct io_ring_ctx *ctx, + struct io_buffer_list *bl, unsigned int bgid) + { ++ /* ++ * Store buffer group ID and finally mark the list as visible. ++ * The normal lookup doesn't care about the visibility as we're ++ * always under the ->uring_lock, but the RCU lookup from mmap does. ++ */ + bl->bgid = bgid; ++ smp_store_release(&bl->is_ready, 1); ++ + if (bgid < BGID_ARRAY) + return 0; + +@@ -194,18 +210,19 @@ void __user *io_buffer_select(struct io_ + + static __cold int io_init_bl_list(struct io_ring_ctx *ctx) + { ++ struct io_buffer_list *bl; + int i; + +- ctx->io_bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), +- GFP_KERNEL); +- if (!ctx->io_bl) ++ bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), GFP_KERNEL); ++ if (!bl) + return -ENOMEM; + + for (i = 0; i < BGID_ARRAY; i++) { +- INIT_LIST_HEAD(&ctx->io_bl[i].buf_list); +- ctx->io_bl[i].bgid = i; ++ INIT_LIST_HEAD(&bl[i].buf_list); ++ bl[i].bgid = i; + } + ++ smp_store_release(&ctx->io_bl, bl); + return 0; + } + +@@ -270,7 +287,7 @@ void io_destroy_buffers(struct io_ring_c + xa_for_each(&ctx->io_bl_xa, index, bl) { + xa_erase(&ctx->io_bl_xa, bl->bgid); + __io_remove_buffers(ctx, bl, -1U); +- kfree(bl); ++ kfree_rcu(bl, rcu); + } + + while (!list_empty(&ctx->io_buffers_pages)) { +@@ -455,7 +472,16 @@ int io_provide_buffers(struct io_kiocb * + INIT_LIST_HEAD(&bl->buf_list); + ret = io_buffer_add_list(ctx, bl, p->bgid); + if (ret) { +- kfree(bl); ++ /* ++ * Doesn't need rcu free as it was never visible, but ++ * let's keep it consistent throughout. Also can't ++ * be a lower indexed array group, as adding one ++ * where lookup failed cannot happen. ++ */ ++ if (p->bgid >= BGID_ARRAY) ++ kfree_rcu(bl, rcu); ++ else ++ WARN_ON_ONCE(1); + goto err; + } + } +@@ -550,6 +576,8 @@ int io_register_pbuf_ring(struct io_ring + struct io_buffer_list *bl, *free_bl = NULL; + int ret; + ++ lockdep_assert_held(&ctx->uring_lock); ++ + if (copy_from_user(®, arg, sizeof(reg))) + return -EFAULT; + +@@ -604,7 +632,7 @@ int io_register_pbuf_ring(struct io_ring + return 0; + } + +- kfree(free_bl); ++ kfree_rcu(free_bl, rcu); + return ret; + } + +@@ -613,6 +641,8 @@ int io_unregister_pbuf_ring(struct io_ri + struct io_uring_buf_reg reg; + struct io_buffer_list *bl; + ++ lockdep_assert_held(&ctx->uring_lock); ++ + if (copy_from_user(®, arg, sizeof(reg))) + return -EFAULT; + if (reg.resv[0] || reg.resv[1] || reg.resv[2]) +@@ -629,7 +659,7 @@ int io_unregister_pbuf_ring(struct io_ri + __io_remove_buffers(ctx, bl, -1U); + if (bl->bgid >= BGID_ARRAY) { + xa_erase(&ctx->io_bl_xa, bl->bgid); +- kfree(bl); ++ kfree_rcu(bl, rcu); + } + return 0; + } +@@ -638,7 +668,15 @@ void *io_pbuf_get_address(struct io_ring + { + struct io_buffer_list *bl; + +- bl = io_buffer_get_list(ctx, bgid); ++ bl = __io_buffer_get_list(ctx, smp_load_acquire(&ctx->io_bl), bgid); ++ ++ /* ++ * Ensure the list is fully setup. Only strictly needed for RCU lookup ++ * via mmap, and in that case only for the array indexed groups. For ++ * the xarray lookups, it's either visible and ready, or not at all. ++ */ ++ if (!smp_load_acquire(&bl->is_ready)) ++ return NULL; + if (!bl || !bl->is_mmap) + return NULL; + +--- a/io_uring/kbuf.h ++++ b/io_uring/kbuf.h +@@ -15,6 +15,7 @@ struct io_buffer_list { + struct page **buf_pages; + struct io_uring_buf_ring *buf_ring; + }; ++ struct rcu_head rcu; + }; + __u16 bgid; + +@@ -28,6 +29,8 @@ struct io_buffer_list { + __u8 is_mapped; + /* ring mapped provided buffers, but mmap'ed by application */ + __u8 is_mmap; ++ /* bl is visible from an RCU point of view for lookup */ ++ __u8 is_ready; + }; + + struct io_buffer { diff --git a/queue-6.6/iommu-avoid-more-races-around-device-probe.patch b/queue-6.6/iommu-avoid-more-races-around-device-probe.patch new file mode 100644 index 00000000000..71117df2f56 --- /dev/null +++ b/queue-6.6/iommu-avoid-more-races-around-device-probe.patch @@ -0,0 +1,183 @@ +From a2e7e59a94269484a83386972ca07c22fd188854 Mon Sep 17 00:00:00 2001 +From: Robin Murphy +Date: Wed, 15 Nov 2023 18:25:44 +0000 +Subject: iommu: Avoid more races around device probe +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Robin Murphy + +commit a2e7e59a94269484a83386972ca07c22fd188854 upstream. + +It turns out there are more subtle races beyond just the main part of +__iommu_probe_device() itself running in parallel - the dev_iommu_free() +on the way out of an unsuccessful probe can still manage to trip up +concurrent accesses to a device's fwspec. Thus, extend the scope of +iommu_probe_device_lock() to also serialise fwspec creation and initial +retrieval. + +Reported-by: Zhenhua Huang +Link: https://lore.kernel.org/linux-iommu/e2e20e1c-6450-4ac5-9804-b0000acdf7de@quicinc.com/ +Fixes: 01657bc14a39 ("iommu: Avoid races around device probe") +Signed-off-by: Robin Murphy +Acked-by: Greg Kroah-Hartman +Reviewed-by: André Draszik +Tested-by: André Draszik +Link: https://lore.kernel.org/r/16f433658661d7cadfea51e7c65da95826112a2b.1700071477.git.robin.murphy@arm.com +Cc: stable@vger.kernel.org +Signed-off-by: Joerg Roedel +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/scan.c | 7 ++++++- + drivers/iommu/iommu.c | 20 ++++++++++---------- + drivers/iommu/of_iommu.c | 12 +++++++++--- + include/linux/iommu.h | 1 + + 4 files changed, 26 insertions(+), 14 deletions(-) + +--- a/drivers/acpi/scan.c ++++ b/drivers/acpi/scan.c +@@ -1568,17 +1568,22 @@ static const struct iommu_ops *acpi_iomm + int err; + const struct iommu_ops *ops; + ++ /* Serialise to make dev->iommu stable under our potential fwspec */ ++ mutex_lock(&iommu_probe_device_lock); + /* + * If we already translated the fwspec there is nothing left to do, + * return the iommu_ops. + */ + ops = acpi_iommu_fwspec_ops(dev); +- if (ops) ++ if (ops) { ++ mutex_unlock(&iommu_probe_device_lock); + return ops; ++ } + + err = iort_iommu_configure_id(dev, id_in); + if (err && err != -EPROBE_DEFER) + err = viot_iommu_configure(dev); ++ mutex_unlock(&iommu_probe_device_lock); + + /* + * If we have reason to believe the IOMMU driver missed the initial +--- a/drivers/iommu/iommu.c ++++ b/drivers/iommu/iommu.c +@@ -479,11 +479,12 @@ static void iommu_deinit_device(struct d + dev_iommu_free(dev); + } + ++DEFINE_MUTEX(iommu_probe_device_lock); ++ + static int __iommu_probe_device(struct device *dev, struct list_head *group_list) + { + const struct iommu_ops *ops = dev->bus->iommu_ops; + struct iommu_group *group; +- static DEFINE_MUTEX(iommu_probe_device_lock); + struct group_device *gdev; + int ret; + +@@ -496,17 +497,15 @@ static int __iommu_probe_device(struct d + * probably be able to use device_lock() here to minimise the scope, + * but for now enforcing a simple global ordering is fine. + */ +- mutex_lock(&iommu_probe_device_lock); ++ lockdep_assert_held(&iommu_probe_device_lock); + + /* Device is probed already if in a group */ +- if (dev->iommu_group) { +- ret = 0; +- goto out_unlock; +- } ++ if (dev->iommu_group) ++ return 0; + + ret = iommu_init_device(dev, ops); + if (ret) +- goto out_unlock; ++ return ret; + + group = dev->iommu_group; + gdev = iommu_group_alloc_device(group, dev); +@@ -542,7 +541,6 @@ static int __iommu_probe_device(struct d + list_add_tail(&group->entry, group_list); + } + mutex_unlock(&group->mutex); +- mutex_unlock(&iommu_probe_device_lock); + + if (dev_is_pci(dev)) + iommu_dma_set_pci_32bit_workaround(dev); +@@ -556,8 +554,6 @@ err_put_group: + iommu_deinit_device(dev); + mutex_unlock(&group->mutex); + iommu_group_put(group); +-out_unlock: +- mutex_unlock(&iommu_probe_device_lock); + + return ret; + } +@@ -567,7 +563,9 @@ int iommu_probe_device(struct device *de + const struct iommu_ops *ops; + int ret; + ++ mutex_lock(&iommu_probe_device_lock); + ret = __iommu_probe_device(dev, NULL); ++ mutex_unlock(&iommu_probe_device_lock); + if (ret) + return ret; + +@@ -1783,7 +1781,9 @@ static int probe_iommu_group(struct devi + struct list_head *group_list = data; + int ret; + ++ mutex_lock(&iommu_probe_device_lock); + ret = __iommu_probe_device(dev, group_list); ++ mutex_unlock(&iommu_probe_device_lock); + if (ret == -ENODEV) + ret = 0; + +--- a/drivers/iommu/of_iommu.c ++++ b/drivers/iommu/of_iommu.c +@@ -112,16 +112,20 @@ const struct iommu_ops *of_iommu_configu + const u32 *id) + { + const struct iommu_ops *ops = NULL; +- struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); ++ struct iommu_fwspec *fwspec; + int err = NO_IOMMU; + + if (!master_np) + return NULL; + ++ /* Serialise to make dev->iommu stable under our potential fwspec */ ++ mutex_lock(&iommu_probe_device_lock); ++ fwspec = dev_iommu_fwspec_get(dev); + if (fwspec) { +- if (fwspec->ops) ++ if (fwspec->ops) { ++ mutex_unlock(&iommu_probe_device_lock); + return fwspec->ops; +- ++ } + /* In the deferred case, start again from scratch */ + iommu_fwspec_free(dev); + } +@@ -155,6 +159,8 @@ const struct iommu_ops *of_iommu_configu + fwspec = dev_iommu_fwspec_get(dev); + ops = fwspec->ops; + } ++ mutex_unlock(&iommu_probe_device_lock); ++ + /* + * If we have reason to believe the IOMMU driver missed the initial + * probe for dev, replay it to get things in order. +--- a/include/linux/iommu.h ++++ b/include/linux/iommu.h +@@ -703,6 +703,7 @@ static inline void dev_iommu_priv_set(st + dev->iommu->priv = priv; + } + ++extern struct mutex iommu_probe_device_lock; + int iommu_probe_device(struct device *dev); + + int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); diff --git a/queue-6.6/iommu-vt-d-add-mtl-to-quirk-list-to-skip-te-disabling.patch b/queue-6.6/iommu-vt-d-add-mtl-to-quirk-list-to-skip-te-disabling.patch new file mode 100644 index 00000000000..d12870a1a4c --- /dev/null +++ b/queue-6.6/iommu-vt-d-add-mtl-to-quirk-list-to-skip-te-disabling.patch @@ -0,0 +1,45 @@ +From 85b80fdffa867d75dfb9084a839e7949e29064e8 Mon Sep 17 00:00:00 2001 +From: "Abdul Halim, Mohd Syazwan" +Date: Wed, 22 Nov 2023 11:26:06 +0800 +Subject: iommu/vt-d: Add MTL to quirk list to skip TE disabling + +From: Abdul Halim, Mohd Syazwan + +commit 85b80fdffa867d75dfb9084a839e7949e29064e8 upstream. + +The VT-d spec requires (10.4.4 Global Command Register, TE field) that: + +Hardware implementations supporting DMA draining must drain any in-flight +DMA read/write requests queued within the Root-Complex before switching +address translation on or off and reflecting the status of the command +through the TES field in the Global Status register. + +Unfortunately, some integrated graphic devices fail to do so after some +kind of power state transition. As the result, the system might stuck in +iommu_disable_translation(), waiting for the completion of TE transition. + +Add MTL to the quirk list for those devices and skips TE disabling if the +qurik hits. + +Fixes: b1012ca8dc4f ("iommu/vt-d: Skip TE disabling on quirky gfx dedicated iommu") +Cc: stable@vger.kernel.org +Signed-off-by: Abdul Halim, Mohd Syazwan +Signed-off-by: Lu Baolu +Link: https://lore.kernel.org/r/20231116022324.30120-1-baolu.lu@linux.intel.com +Signed-off-by: Joerg Roedel +Signed-off-by: Greg Kroah-Hartman +--- + drivers/iommu/intel/iommu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/iommu/intel/iommu.c ++++ b/drivers/iommu/intel/iommu.c +@@ -4928,7 +4928,7 @@ static void quirk_igfx_skip_te_disable(s + ver = (dev->device >> 8) & 0xff; + if (ver != 0x45 && ver != 0x46 && ver != 0x4c && + ver != 0x4e && ver != 0x8a && ver != 0x98 && +- ver != 0x9a && ver != 0xa7) ++ ver != 0x9a && ver != 0xa7 && ver != 0x7d) + return; + + if (risky_device(dev)) diff --git a/queue-6.6/iommu-vt-d-fix-incorrect-cache-invalidation-for-mm-notification.patch b/queue-6.6/iommu-vt-d-fix-incorrect-cache-invalidation-for-mm-notification.patch new file mode 100644 index 00000000000..858c2b472a9 --- /dev/null +++ b/queue-6.6/iommu-vt-d-fix-incorrect-cache-invalidation-for-mm-notification.patch @@ -0,0 +1,88 @@ +From e7ad6c2a4b1aa710db94060b716f53c812cef565 Mon Sep 17 00:00:00 2001 +From: Lu Baolu +Date: Wed, 22 Nov 2023 11:26:07 +0800 +Subject: iommu/vt-d: Fix incorrect cache invalidation for mm notification + +From: Lu Baolu + +commit e7ad6c2a4b1aa710db94060b716f53c812cef565 upstream. + +Commit 6bbd42e2df8f ("mmu_notifiers: call invalidate_range() when +invalidating TLBs") moved the secondary TLB invalidations into the TLB +invalidation functions to ensure that all secondary TLB invalidations +happen at the same time as the CPU invalidation and added a flush-all +type of secondary TLB invalidation for the batched mode, where a range +of [0, -1UL) is used to indicates that the range extends to the end of +the address space. + +However, using an end address of -1UL caused an overflow in the Intel +IOMMU driver, where the end address was rounded up to the next page. +As a result, both the IOTLB and device ATC were not invalidated correctly. + +Add a flush all helper function and call it when the invalidation range +is from 0 to -1UL, ensuring that the entire caches are invalidated +correctly. + +Fixes: 6bbd42e2df8f ("mmu_notifiers: call invalidate_range() when invalidating TLBs") +Cc: stable@vger.kernel.org +Cc: Huang Ying +Cc: Alistair Popple +Tested-by: Luo Yuzhang # QAT +Tested-by: Tony Zhu # DSA +Reviewed-by: Jason Gunthorpe +Reviewed-by: Alistair Popple +Signed-off-by: Lu Baolu +Link: https://lore.kernel.org/r/20231117090933.75267-1-baolu.lu@linux.intel.com +Signed-off-by: Joerg Roedel +Signed-off-by: Greg Kroah-Hartman +--- + drivers/iommu/intel/svm.c | 26 ++++++++++++++++++++++++++ + 1 file changed, 26 insertions(+) + +diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c +index 50a481c895b8..ac12f76c1212 100644 +--- a/drivers/iommu/intel/svm.c ++++ b/drivers/iommu/intel/svm.c +@@ -216,6 +216,27 @@ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, + rcu_read_unlock(); + } + ++static void intel_flush_svm_all(struct intel_svm *svm) ++{ ++ struct device_domain_info *info; ++ struct intel_svm_dev *sdev; ++ ++ rcu_read_lock(); ++ list_for_each_entry_rcu(sdev, &svm->devs, list) { ++ info = dev_iommu_priv_get(sdev->dev); ++ ++ qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, 0, -1UL, 0); ++ if (info->ats_enabled) { ++ qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, ++ svm->pasid, sdev->qdep, ++ 0, 64 - VTD_PAGE_SHIFT); ++ quirk_extra_dev_tlb_flush(info, 0, 64 - VTD_PAGE_SHIFT, ++ svm->pasid, sdev->qdep); ++ } ++ } ++ rcu_read_unlock(); ++} ++ + /* Pages have been freed at this point */ + static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, + struct mm_struct *mm, +@@ -223,6 +244,11 @@ static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, + { + struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); + ++ if (start == 0 && end == -1UL) { ++ intel_flush_svm_all(svm); ++ return; ++ } ++ + intel_flush_svm_range(svm, start, + (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0); + } +-- +2.43.0 + diff --git a/queue-6.6/kvm-ppc-book3s-hv-fix-kvm_run-clobbering-fp-vec-user-registers.patch b/queue-6.6/kvm-ppc-book3s-hv-fix-kvm_run-clobbering-fp-vec-user-registers.patch new file mode 100644 index 00000000000..5c04c1f758b --- /dev/null +++ b/queue-6.6/kvm-ppc-book3s-hv-fix-kvm_run-clobbering-fp-vec-user-registers.patch @@ -0,0 +1,47 @@ +From dc158d23b33df9033bcc8e7117e8591dd2f9d125 Mon Sep 17 00:00:00 2001 +From: Nicholas Piggin +Date: Wed, 22 Nov 2023 12:58:11 +1000 +Subject: KVM: PPC: Book3S HV: Fix KVM_RUN clobbering FP/VEC user registers + +From: Nicholas Piggin + +commit dc158d23b33df9033bcc8e7117e8591dd2f9d125 upstream. + +Before running a guest, the host process (e.g., QEMU) FP/VEC registers +are saved if they were being used, similarly to when the kernel uses FP +registers. The guest values are then loaded into regs, and the host +process registers will be restored lazily when it uses FP/VEC. + +KVM HV has a bug here: the host process registers do get saved, but the +user MSR bits remain enabled, which indicates the registers are valid +for the process. After they are clobbered by running the guest, this +valid indication causes the host process to take on the FP/VEC register +values of the guest. + +Fixes: 34e119c96b2b ("KVM: PPC: Book3S HV P9: Reduce mtmsrd instructions required to save host SPRs") +Cc: stable@vger.kernel.org # v5.17+ +Signed-off-by: Nicholas Piggin +Signed-off-by: Michael Ellerman +Link: https://msgid.link/20231122025811.2973-1-npiggin@gmail.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/kernel/process.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/powerpc/kernel/process.c ++++ b/arch/powerpc/kernel/process.c +@@ -1198,11 +1198,11 @@ void kvmppc_save_user_regs(void) + + usermsr = current->thread.regs->msr; + ++ /* Caller has enabled FP/VEC/VSX/TM in MSR */ + if (usermsr & MSR_FP) +- save_fpu(current); +- ++ __giveup_fpu(current); + if (usermsr & MSR_VEC) +- save_altivec(current); ++ __giveup_altivec(current); + + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (usermsr & MSR_TM) { diff --git a/queue-6.6/nouveau-find-the-smallest-page-allocation-to-cover-a-buffer-alloc.patch b/queue-6.6/nouveau-find-the-smallest-page-allocation-to-cover-a-buffer-alloc.patch new file mode 100644 index 00000000000..f75adc08ff2 --- /dev/null +++ b/queue-6.6/nouveau-find-the-smallest-page-allocation-to-cover-a-buffer-alloc.patch @@ -0,0 +1,47 @@ +From e9ba37d9f9a6872b069dd893bd86a7d77ba8c153 Mon Sep 17 00:00:00 2001 +From: Dave Airlie +Date: Fri, 11 Aug 2023 13:15:20 +1000 +Subject: nouveau: find the smallest page allocation to cover a buffer alloc. + +From: Dave Airlie + +commit e9ba37d9f9a6872b069dd893bd86a7d77ba8c153 upstream. + +With the new uapi we don't have the comp flags on the allocation, +so we shouldn't be using the first size that works, we should be +iterating until we get the correct one. + +This reduces allocations from 2MB to 64k in lots of places. + +Fixes dEQP-VK.memory.allocation.basic.size_8KiB.forward.count_4000 +on my ampere/gsp system. + +Cc: stable@vger.kernel.org # v6.6 +Signed-off-by: Dave Airlie +Reviewed-by: Faith Ekstrand +Signed-off-by: Danilo Krummrich +Link: https://patchwork.freedesktop.org/patch/msgid/20230811031520.248341-1-airlied@gmail.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/nouveau/nouveau_bo.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c +index 0f3bd187ede6..280d1d9a559b 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_bo.c ++++ b/drivers/gpu/drm/nouveau/nouveau_bo.c +@@ -318,8 +318,9 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain, + (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT)) + continue; + +- if (pi < 0) +- pi = i; ++ /* pick the last one as it will be smallest. */ ++ pi = i; ++ + /* Stop once the buffer is larger than the current page size. */ + if (*size >= 1ULL << vmm->page[i].shift) + break; +-- +2.43.0 + diff --git a/queue-6.6/parisc-drop-the-hp-ux-enosym-and-eremoterelease-error-codes.patch b/queue-6.6/parisc-drop-the-hp-ux-enosym-and-eremoterelease-error-codes.patch new file mode 100644 index 00000000000..f20743ecf44 --- /dev/null +++ b/queue-6.6/parisc-drop-the-hp-ux-enosym-and-eremoterelease-error-codes.patch @@ -0,0 +1,87 @@ +From e5f3e299a2b1e9c3ece24a38adfc089aef307e8a Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Thu, 23 Nov 2023 20:28:27 +0100 +Subject: parisc: Drop the HP-UX ENOSYM and EREMOTERELEASE error codes + +From: Helge Deller + +commit e5f3e299a2b1e9c3ece24a38adfc089aef307e8a upstream. + +Those return codes are only defined for the parisc architecture and +are leftovers from when we wanted to be HP-UX compatible. + +They are not returned by any Linux kernel syscall but do trigger +problems with the glibc strerrorname_np() and strerror() functions as +reported in glibc issue #31080. + +There is no need to keep them, so simply remove them. + +Signed-off-by: Helge Deller +Reported-by: Bruno Haible +Closes: https://sourceware.org/bugzilla/show_bug.cgi?id=31080 +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/parisc/include/uapi/asm/errno.h | 2 -- + lib/errname.c | 6 ------ + tools/arch/parisc/include/uapi/asm/errno.h | 2 -- + 3 files changed, 10 deletions(-) + +--- a/arch/parisc/include/uapi/asm/errno.h ++++ b/arch/parisc/include/uapi/asm/errno.h +@@ -75,7 +75,6 @@ + + /* We now return you to your regularly scheduled HPUX. */ + +-#define ENOSYM 215 /* symbol does not exist in executable */ + #define ENOTSOCK 216 /* Socket operation on non-socket */ + #define EDESTADDRREQ 217 /* Destination address required */ + #define EMSGSIZE 218 /* Message too long */ +@@ -101,7 +100,6 @@ + #define ETIMEDOUT 238 /* Connection timed out */ + #define ECONNREFUSED 239 /* Connection refused */ + #define EREFUSED ECONNREFUSED /* for HP's NFS apparently */ +-#define EREMOTERELEASE 240 /* Remote peer released connection */ + #define EHOSTDOWN 241 /* Host is down */ + #define EHOSTUNREACH 242 /* No route to host */ + +--- a/lib/errname.c ++++ b/lib/errname.c +@@ -111,9 +111,6 @@ static const char *names_0[] = { + E(ENOSPC), + E(ENOSR), + E(ENOSTR), +-#ifdef ENOSYM +- E(ENOSYM), +-#endif + E(ENOSYS), + E(ENOTBLK), + E(ENOTCONN), +@@ -144,9 +141,6 @@ static const char *names_0[] = { + #endif + E(EREMOTE), + E(EREMOTEIO), +-#ifdef EREMOTERELEASE +- E(EREMOTERELEASE), +-#endif + E(ERESTART), + E(ERFKILL), + E(EROFS), +--- a/tools/arch/parisc/include/uapi/asm/errno.h ++++ b/tools/arch/parisc/include/uapi/asm/errno.h +@@ -75,7 +75,6 @@ + + /* We now return you to your regularly scheduled HPUX. */ + +-#define ENOSYM 215 /* symbol does not exist in executable */ + #define ENOTSOCK 216 /* Socket operation on non-socket */ + #define EDESTADDRREQ 217 /* Destination address required */ + #define EMSGSIZE 218 /* Message too long */ +@@ -101,7 +100,6 @@ + #define ETIMEDOUT 238 /* Connection timed out */ + #define ECONNREFUSED 239 /* Connection refused */ + #define EREFUSED ECONNREFUSED /* for HP's NFS apparently */ +-#define EREMOTERELEASE 240 /* Remote peer released connection */ + #define EHOSTDOWN 241 /* Host is down */ + #define EHOSTUNREACH 242 /* No route to host */ + diff --git a/queue-6.6/parisc-ensure-32-bit-alignment-on-parisc-unwind-section.patch b/queue-6.6/parisc-ensure-32-bit-alignment-on-parisc-unwind-section.patch new file mode 100644 index 00000000000..246f75ee139 --- /dev/null +++ b/queue-6.6/parisc-ensure-32-bit-alignment-on-parisc-unwind-section.patch @@ -0,0 +1,28 @@ +From c9fcb2b65c2849e8ff3be23fd8828312fb68dc19 Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Sat, 25 Nov 2023 09:16:02 +0100 +Subject: parisc: Ensure 32-bit alignment on parisc unwind section + +From: Helge Deller + +commit c9fcb2b65c2849e8ff3be23fd8828312fb68dc19 upstream. + +Make sure the .PARISC.unwind section will be 32-bit aligned. + +Signed-off-by: Helge Deller +Cc: stable@vger.kernel.org # v6.0+ +Signed-off-by: Greg Kroah-Hartman +--- + arch/parisc/kernel/vmlinux.lds.S | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/parisc/kernel/vmlinux.lds.S ++++ b/arch/parisc/kernel/vmlinux.lds.S +@@ -130,6 +130,7 @@ SECTIONS + RO_DATA(8) + + /* unwind info */ ++ . = ALIGN(4); + .PARISC.unwind : { + __start___unwind = .; + *(.PARISC.unwind) diff --git a/queue-6.6/parisc-mark-altinstructions-read-only-and-32-bit-aligned.patch b/queue-6.6/parisc-mark-altinstructions-read-only-and-32-bit-aligned.patch new file mode 100644 index 00000000000..42418a89759 --- /dev/null +++ b/queue-6.6/parisc-mark-altinstructions-read-only-and-32-bit-aligned.patch @@ -0,0 +1,48 @@ +From 33f806da2df68606f77d7b892cd1298ba3d463e8 Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Mon, 20 Nov 2023 23:10:20 +0100 +Subject: parisc: Mark altinstructions read-only and 32-bit aligned + +From: Helge Deller + +commit 33f806da2df68606f77d7b892cd1298ba3d463e8 upstream. + +Signed-off-by: Helge Deller +Cc: stable@vger.kernel.org # v6.0+ +Signed-off-by: Greg Kroah-Hartman +--- + arch/parisc/include/asm/alternative.h | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/arch/parisc/include/asm/alternative.h ++++ b/arch/parisc/include/asm/alternative.h +@@ -34,7 +34,8 @@ void apply_alternatives(struct alt_instr + + /* Alternative SMP implementation. */ + #define ALTERNATIVE(cond, replacement) "!0:" \ +- ".section .altinstructions, \"aw\" !" \ ++ ".section .altinstructions, \"a\" !" \ ++ ".align 4 !" \ + ".word (0b-4-.) !" \ + ".hword 1, " __stringify(cond) " !" \ + ".word " __stringify(replacement) " !" \ +@@ -44,7 +45,8 @@ void apply_alternatives(struct alt_instr + + /* to replace one single instructions by a new instruction */ + #define ALTERNATIVE(from, to, cond, replacement)\ +- .section .altinstructions, "aw" ! \ ++ .section .altinstructions, "a" ! \ ++ .align 4 ! \ + .word (from - .) ! \ + .hword (to - from)/4, cond ! \ + .word replacement ! \ +@@ -52,7 +54,8 @@ void apply_alternatives(struct alt_instr + + /* to replace multiple instructions by new code */ + #define ALTERNATIVE_CODE(from, num_instructions, cond, new_instr_ptr)\ +- .section .altinstructions, "aw" ! \ ++ .section .altinstructions, "a" ! \ ++ .align 4 ! \ + .word (from - .) ! \ + .hword -num_instructions, cond ! \ + .word (new_instr_ptr - .) ! \ diff --git a/queue-6.6/parisc-mark-ex_table-entries-32-bit-aligned-in-assembly.h.patch b/queue-6.6/parisc-mark-ex_table-entries-32-bit-aligned-in-assembly.h.patch new file mode 100644 index 00000000000..f96de1f01ed --- /dev/null +++ b/queue-6.6/parisc-mark-ex_table-entries-32-bit-aligned-in-assembly.h.patch @@ -0,0 +1,29 @@ +From e11d4cccd094a7cd4696c8c42e672c76c092dad5 Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Mon, 20 Nov 2023 15:37:50 +0100 +Subject: parisc: Mark ex_table entries 32-bit aligned in assembly.h + +From: Helge Deller + +commit e11d4cccd094a7cd4696c8c42e672c76c092dad5 upstream. + +Add an align statement to tell the linker that all ex_table entries and as +such the whole ex_table section should be 32-bit aligned in vmlinux and modules. + +Signed-off-by: Helge Deller +Cc: stable@vger.kernel.org # v6.0+ +Signed-off-by: Greg Kroah-Hartman +--- + arch/parisc/include/asm/assembly.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/parisc/include/asm/assembly.h ++++ b/arch/parisc/include/asm/assembly.h +@@ -574,6 +574,7 @@ + */ + #define ASM_EXCEPTIONTABLE_ENTRY(fault_addr, except_addr) \ + .section __ex_table,"aw" ! \ ++ .align 4 ! \ + .word (fault_addr - .), (except_addr - .) ! \ + .previous + diff --git a/queue-6.6/parisc-mark-ex_table-entries-32-bit-aligned-in-uaccess.h.patch b/queue-6.6/parisc-mark-ex_table-entries-32-bit-aligned-in-uaccess.h.patch new file mode 100644 index 00000000000..9d407caa676 --- /dev/null +++ b/queue-6.6/parisc-mark-ex_table-entries-32-bit-aligned-in-uaccess.h.patch @@ -0,0 +1,29 @@ +From a80aeb86542a50aa8521729ea4cc731ee7174f03 Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Mon, 20 Nov 2023 15:39:03 +0100 +Subject: parisc: Mark ex_table entries 32-bit aligned in uaccess.h + +From: Helge Deller + +commit a80aeb86542a50aa8521729ea4cc731ee7174f03 upstream. + +Add an align statement to tell the linker that all ex_table entries and as +such the whole ex_table section should be 32-bit aligned in vmlinux and modules. + +Signed-off-by: Helge Deller +Cc: stable@vger.kernel.org # v6.0+ +Signed-off-by: Greg Kroah-Hartman +--- + arch/parisc/include/asm/uaccess.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/parisc/include/asm/uaccess.h ++++ b/arch/parisc/include/asm/uaccess.h +@@ -41,6 +41,7 @@ struct exception_table_entry { + + #define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\ + ".section __ex_table,\"aw\"\n" \ ++ ".align 4\n" \ + ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \ + ".previous\n" + diff --git a/queue-6.6/parisc-mark-jump_table-naturally-aligned.patch b/queue-6.6/parisc-mark-jump_table-naturally-aligned.patch new file mode 100644 index 00000000000..5857cb939f2 --- /dev/null +++ b/queue-6.6/parisc-mark-jump_table-naturally-aligned.patch @@ -0,0 +1,51 @@ +From 07eecff8ae78df7f28800484d31337e1f9bfca3a Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Mon, 20 Nov 2023 23:14:39 +0100 +Subject: parisc: Mark jump_table naturally aligned + +From: Helge Deller + +commit 07eecff8ae78df7f28800484d31337e1f9bfca3a upstream. + +The jump_table stores two 32-bit words and one 32- (on 32-bit kernel) +or one 64-bit word (on 64-bit kernel). +Ensure that the last word is always 64-bit aligned on a 64-bit kernel +by aligning the whole structure on sizeof(long). + +Signed-off-by: Helge Deller +Cc: stable@vger.kernel.org # v6.0+ +Signed-off-by: Greg Kroah-Hartman +--- + arch/parisc/include/asm/jump_label.h | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/arch/parisc/include/asm/jump_label.h ++++ b/arch/parisc/include/asm/jump_label.h +@@ -15,10 +15,12 @@ static __always_inline bool arch_static_ + asm_volatile_goto("1:\n\t" + "nop\n\t" + ".pushsection __jump_table, \"aw\"\n\t" ++ ".align %1\n\t" + ".word 1b - ., %l[l_yes] - .\n\t" + __stringify(ASM_ULONG_INSN) " %c0 - .\n\t" + ".popsection\n\t" +- : : "i" (&((char *)key)[branch]) : : l_yes); ++ : : "i" (&((char *)key)[branch]), "i" (sizeof(long)) ++ : : l_yes); + + return false; + l_yes: +@@ -30,10 +32,12 @@ static __always_inline bool arch_static_ + asm_volatile_goto("1:\n\t" + "b,n %l[l_yes]\n\t" + ".pushsection __jump_table, \"aw\"\n\t" ++ ".align %1\n\t" + ".word 1b - ., %l[l_yes] - .\n\t" + __stringify(ASM_ULONG_INSN) " %c0 - .\n\t" + ".popsection\n\t" +- : : "i" (&((char *)key)[branch]) : : l_yes); ++ : : "i" (&((char *)key)[branch]), "i" (sizeof(long)) ++ : : l_yes); + + return false; + l_yes: diff --git a/queue-6.6/parisc-mark-lock_aligned-variables-16-byte-aligned-on-smp.patch b/queue-6.6/parisc-mark-lock_aligned-variables-16-byte-aligned-on-smp.patch new file mode 100644 index 00000000000..a0d8b1feb16 --- /dev/null +++ b/queue-6.6/parisc-mark-lock_aligned-variables-16-byte-aligned-on-smp.patch @@ -0,0 +1,32 @@ +From b28fc0d8739c03e7b6c44914a9d00d4c6dddc0ea Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Sat, 25 Nov 2023 09:11:56 +0100 +Subject: parisc: Mark lock_aligned variables 16-byte aligned on SMP + +From: Helge Deller + +commit b28fc0d8739c03e7b6c44914a9d00d4c6dddc0ea upstream. + +On parisc we need 16-byte alignment for variables which are used for +locking. Mark the __lock_aligned attribute acordingly so that the +.data..lock_aligned section will get that alignment in the generated +object files. + +Signed-off-by: Helge Deller +Cc: stable@vger.kernel.org # v6.0+ +Signed-off-by: Greg Kroah-Hartman +--- + arch/parisc/include/asm/ldcw.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/parisc/include/asm/ldcw.h ++++ b/arch/parisc/include/asm/ldcw.h +@@ -55,7 +55,7 @@ + }) + + #ifdef CONFIG_SMP +-# define __lock_aligned __section(".data..lock_aligned") ++# define __lock_aligned __section(".data..lock_aligned") __aligned(16) + #endif + + #endif /* __PARISC_LDCW_H */ diff --git a/queue-6.6/parisc-use-natural-cpu-alignment-for-bug_table.patch b/queue-6.6/parisc-use-natural-cpu-alignment-for-bug_table.patch new file mode 100644 index 00000000000..5de6c36c102 --- /dev/null +++ b/queue-6.6/parisc-use-natural-cpu-alignment-for-bug_table.patch @@ -0,0 +1,83 @@ +From fe76a1349f235969381832c83d703bc911021eb6 Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Mon, 20 Nov 2023 23:30:49 +0100 +Subject: parisc: Use natural CPU alignment for bug_table + +From: Helge Deller + +commit fe76a1349f235969381832c83d703bc911021eb6 upstream. + +Make sure that the __bug_table section gets 32- or 64-bit aligned, +depending if a 32- or 64-bit kernel is being built. +Mark it non-writeable and use .blockz instead of the .org assembler +directive to pad the struct. + +Signed-off-by: Helge Deller +Cc: stable@vger.kernel.org # v6.0+ +Signed-off-by: Greg Kroah-Hartman +--- + arch/parisc/include/asm/bug.h | 30 ++++++++++++++++++------------ + 1 file changed, 18 insertions(+), 12 deletions(-) + +--- a/arch/parisc/include/asm/bug.h ++++ b/arch/parisc/include/asm/bug.h +@@ -28,13 +28,15 @@ + do { \ + asm volatile("\n" \ + "1:\t" PARISC_BUG_BREAK_ASM "\n" \ +- "\t.pushsection __bug_table,\"aw\"\n" \ ++ "\t.pushsection __bug_table,\"a\"\n" \ ++ "\t.align %4\n" \ + "2:\t" ASM_WORD_INSN "1b, %c0\n" \ +- "\t.short %c1, %c2\n" \ +- "\t.org 2b+%c3\n" \ ++ "\t.short %1, %2\n" \ ++ "\t.blockz %3-2*%4-2*2\n" \ + "\t.popsection" \ + : : "i" (__FILE__), "i" (__LINE__), \ +- "i" (0), "i" (sizeof(struct bug_entry)) ); \ ++ "i" (0), "i" (sizeof(struct bug_entry)), \ ++ "i" (sizeof(long)) ); \ + unreachable(); \ + } while(0) + +@@ -51,27 +53,31 @@ + do { \ + asm volatile("\n" \ + "1:\t" PARISC_BUG_BREAK_ASM "\n" \ +- "\t.pushsection __bug_table,\"aw\"\n" \ ++ "\t.pushsection __bug_table,\"a\"\n" \ ++ "\t.align %4\n" \ + "2:\t" ASM_WORD_INSN "1b, %c0\n" \ +- "\t.short %c1, %c2\n" \ +- "\t.org 2b+%c3\n" \ ++ "\t.short %1, %2\n" \ ++ "\t.blockz %3-2*%4-2*2\n" \ + "\t.popsection" \ + : : "i" (__FILE__), "i" (__LINE__), \ + "i" (BUGFLAG_WARNING|(flags)), \ +- "i" (sizeof(struct bug_entry)) ); \ ++ "i" (sizeof(struct bug_entry)), \ ++ "i" (sizeof(long)) ); \ + } while(0) + #else + #define __WARN_FLAGS(flags) \ + do { \ + asm volatile("\n" \ + "1:\t" PARISC_BUG_BREAK_ASM "\n" \ +- "\t.pushsection __bug_table,\"aw\"\n" \ ++ "\t.pushsection __bug_table,\"a\"\n" \ ++ "\t.align %2\n" \ + "2:\t" ASM_WORD_INSN "1b\n" \ +- "\t.short %c0\n" \ +- "\t.org 2b+%c1\n" \ ++ "\t.short %0\n" \ ++ "\t.blockz %1-%2-2\n" \ + "\t.popsection" \ + : : "i" (BUGFLAG_WARNING|(flags)), \ +- "i" (sizeof(struct bug_entry)) ); \ ++ "i" (sizeof(struct bug_entry)), \ ++ "i" (sizeof(long)) ); \ + } while(0) + #endif + diff --git a/queue-6.6/powercap-dtpm-fix-unneeded-conversions-to-micro-watts.patch b/queue-6.6/powercap-dtpm-fix-unneeded-conversions-to-micro-watts.patch new file mode 100644 index 00000000000..de8a3bb07f0 --- /dev/null +++ b/queue-6.6/powercap-dtpm-fix-unneeded-conversions-to-micro-watts.patch @@ -0,0 +1,105 @@ +From b817f1488fca548fe50e2654d84a1956a16a1a8a Mon Sep 17 00:00:00 2001 +From: Lukasz Luba +Date: Mon, 27 Nov 2023 09:28:19 +0000 +Subject: powercap: DTPM: Fix unneeded conversions to micro-Watts + +From: Lukasz Luba + +commit b817f1488fca548fe50e2654d84a1956a16a1a8a upstream. + +The power values coming from the Energy Model are already in uW. + +The PowerCap and DTPM frameworks operate on uW, so all places should +just use the values from the EM. + +Fix the code by removing all of the conversion to uW still present in it. + +Fixes: ae6ccaa65038 (PM: EM: convert power field to micro-Watts precision and align drivers) +Cc: 5.19+ # v5.19+ +Signed-off-by: Lukasz Luba +[ rjw: Changelog edits ] +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/powercap/dtpm_cpu.c | 6 +----- + drivers/powercap/dtpm_devfreq.c | 11 +++-------- + 2 files changed, 4 insertions(+), 13 deletions(-) + +--- a/drivers/powercap/dtpm_cpu.c ++++ b/drivers/powercap/dtpm_cpu.c +@@ -24,7 +24,6 @@ + #include + #include + #include +-#include + + struct dtpm_cpu { + struct dtpm dtpm; +@@ -104,8 +103,7 @@ static u64 get_pd_power_uw(struct dtpm * + if (pd->table[i].frequency < freq) + continue; + +- return scale_pd_power_uw(pd_mask, pd->table[i].power * +- MICROWATT_PER_MILLIWATT); ++ return scale_pd_power_uw(pd_mask, pd->table[i].power); + } + + return 0; +@@ -122,11 +120,9 @@ static int update_pd_power_uw(struct dtp + nr_cpus = cpumask_weight(&cpus); + + dtpm->power_min = em->table[0].power; +- dtpm->power_min *= MICROWATT_PER_MILLIWATT; + dtpm->power_min *= nr_cpus; + + dtpm->power_max = em->table[em->nr_perf_states - 1].power; +- dtpm->power_max *= MICROWATT_PER_MILLIWATT; + dtpm->power_max *= nr_cpus; + + return 0; +--- a/drivers/powercap/dtpm_devfreq.c ++++ b/drivers/powercap/dtpm_devfreq.c +@@ -39,10 +39,8 @@ static int update_pd_power_uw(struct dtp + struct em_perf_domain *pd = em_pd_get(dev); + + dtpm->power_min = pd->table[0].power; +- dtpm->power_min *= MICROWATT_PER_MILLIWATT; + + dtpm->power_max = pd->table[pd->nr_perf_states - 1].power; +- dtpm->power_max *= MICROWATT_PER_MILLIWATT; + + return 0; + } +@@ -54,13 +52,10 @@ static u64 set_pd_power_limit(struct dtp + struct device *dev = devfreq->dev.parent; + struct em_perf_domain *pd = em_pd_get(dev); + unsigned long freq; +- u64 power; + int i; + + for (i = 0; i < pd->nr_perf_states; i++) { +- +- power = pd->table[i].power * MICROWATT_PER_MILLIWATT; +- if (power > power_limit) ++ if (pd->table[i].power > power_limit) + break; + } + +@@ -68,7 +63,7 @@ static u64 set_pd_power_limit(struct dtp + + dev_pm_qos_update_request(&dtpm_devfreq->qos_req, freq); + +- power_limit = pd->table[i - 1].power * MICROWATT_PER_MILLIWATT; ++ power_limit = pd->table[i - 1].power; + + return power_limit; + } +@@ -110,7 +105,7 @@ static u64 get_pd_power_uw(struct dtpm * + if (pd->table[i].frequency < freq) + continue; + +- power = pd->table[i].power * MICROWATT_PER_MILLIWATT; ++ power = pd->table[i].power; + power *= status.busy_time; + power >>= 10; + diff --git a/queue-6.6/powerpc-don-t-clobber-f0-vs0-during-fp-altivec-register-save.patch b/queue-6.6/powerpc-don-t-clobber-f0-vs0-during-fp-altivec-register-save.patch new file mode 100644 index 00000000000..5ccd549c169 --- /dev/null +++ b/queue-6.6/powerpc-don-t-clobber-f0-vs0-during-fp-altivec-register-save.patch @@ -0,0 +1,153 @@ +From 5e1d824f9a283cbf90f25241b66d1f69adb3835b Mon Sep 17 00:00:00 2001 +From: Timothy Pearson +Date: Sun, 19 Nov 2023 09:18:02 -0600 +Subject: powerpc: Don't clobber f0/vs0 during fp|altivec register save + +From: Timothy Pearson + +commit 5e1d824f9a283cbf90f25241b66d1f69adb3835b upstream. + +During floating point and vector save to thread data f0/vs0 are +clobbered by the FPSCR/VSCR store routine. This has been obvserved to +lead to userspace register corruption and application data corruption +with io-uring. + +Fix it by restoring f0/vs0 after FPSCR/VSCR store has completed for +all the FP, altivec, VMX register save paths. + +Tested under QEMU in kvm mode, running on a Talos II workstation with +dual POWER9 DD2.2 CPUs. + +Additional detail (mpe): + +Typically save_fpu() is called from __giveup_fpu() which saves the FP +regs and also *turns off FP* in the tasks MSR, meaning the kernel will +reload the FP regs from the thread struct before letting the task use FP +again. So in that case save_fpu() is free to clobber f0 because the FP +regs no longer hold live values for the task. + +There is another case though, which is the path via: + sys_clone() + ... + copy_process() + dup_task_struct() + arch_dup_task_struct() + flush_all_to_thread() + save_all() + +That path saves the FP regs but leaves them live. That's meant as an +optimisation for a process that's using FP/VSX and then calls fork(), +leaving the regs live means the parent process doesn't have to take a +fault after the fork to get its FP regs back. The optimisation was added +in commit 8792468da5e1 ("powerpc: Add the ability to save FPU without +giving it up"). + +That path does clobber f0, but f0 is volatile across function calls, +and typically programs reach copy_process() from userspace via a syscall +wrapper function. So in normal usage f0 being clobbered across a +syscall doesn't cause visible data corruption. + +But there is now a new path, because io-uring can call copy_process() +via create_io_thread() from the signal handling path. That's OK if the +signal is handled as part of syscall return, but it's not OK if the +signal is handled due to some other interrupt. + +That path is: + +interrupt_return_srr_user() + interrupt_exit_user_prepare() + interrupt_exit_user_prepare_main() + do_notify_resume() + get_signal() + task_work_run() + create_worker_cb() + create_io_worker() + copy_process() + dup_task_struct() + arch_dup_task_struct() + flush_all_to_thread() + save_all() + if (tsk->thread.regs->msr & MSR_FP) + save_fpu() + # f0 is clobbered and potentially live in userspace + +Note the above discussion applies equally to save_altivec(). + +Fixes: 8792468da5e1 ("powerpc: Add the ability to save FPU without giving it up") +Cc: stable@vger.kernel.org # v4.6+ +Closes: https://lore.kernel.org/all/480932026.45576726.1699374859845.JavaMail.zimbra@raptorengineeringinc.com/ +Closes: https://lore.kernel.org/linuxppc-dev/480221078.47953493.1700206777956.JavaMail.zimbra@raptorengineeringinc.com/ +Tested-by: Timothy Pearson +Tested-by: Jens Axboe +Signed-off-by: Timothy Pearson +[mpe: Reword change log to describe exact path of corruption & other minor tweaks] +Signed-off-by: Michael Ellerman +Link: https://msgid.link/1921539696.48534988.1700407082933.JavaMail.zimbra@raptorengineeringinc.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/kernel/fpu.S | 13 +++++++++++++ + arch/powerpc/kernel/vector.S | 2 ++ + 2 files changed, 15 insertions(+) + +--- a/arch/powerpc/kernel/fpu.S ++++ b/arch/powerpc/kernel/fpu.S +@@ -23,6 +23,15 @@ + #include + + #ifdef CONFIG_VSX ++#define __REST_1FPVSR(n,c,base) \ ++BEGIN_FTR_SECTION \ ++ b 2f; \ ++END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ ++ REST_FPR(n,base); \ ++ b 3f; \ ++2: REST_VSR(n,c,base); \ ++3: ++ + #define __REST_32FPVSRS(n,c,base) \ + BEGIN_FTR_SECTION \ + b 2f; \ +@@ -41,9 +50,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); + 2: SAVE_32VSRS(n,c,base); \ + 3: + #else ++#define __REST_1FPVSR(n,b,base) REST_FPR(n, base) + #define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) + #define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) + #endif ++#define REST_1FPVSR(n,c,base) __REST_1FPVSR(n,__REG_##c,__REG_##base) + #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) + #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) + +@@ -67,6 +78,7 @@ _GLOBAL(store_fp_state) + SAVE_32FPVSRS(0, R4, R3) + mffs fr0 + stfd fr0,FPSTATE_FPSCR(r3) ++ REST_1FPVSR(0, R4, R3) + blr + EXPORT_SYMBOL(store_fp_state) + +@@ -138,4 +150,5 @@ _GLOBAL(save_fpu) + 2: SAVE_32FPVSRS(0, R4, R6) + mffs fr0 + stfd fr0,FPSTATE_FPSCR(r6) ++ REST_1FPVSR(0, R4, R6) + blr +--- a/arch/powerpc/kernel/vector.S ++++ b/arch/powerpc/kernel/vector.S +@@ -33,6 +33,7 @@ _GLOBAL(store_vr_state) + mfvscr v0 + li r4, VRSTATE_VSCR + stvx v0, r4, r3 ++ lvx v0, 0, r3 + blr + EXPORT_SYMBOL(store_vr_state) + +@@ -109,6 +110,7 @@ _GLOBAL(save_altivec) + mfvscr v0 + li r4,VRSTATE_VSCR + stvx v0,r4,r7 ++ lvx v0,0,r7 + blr + + #ifdef CONFIG_VSX diff --git a/queue-6.6/r8169-fix-deadlock-on-rtl8125-in-jumbo-mtu-mode.patch b/queue-6.6/r8169-fix-deadlock-on-rtl8125-in-jumbo-mtu-mode.patch new file mode 100644 index 00000000000..8b04ae2d729 --- /dev/null +++ b/queue-6.6/r8169-fix-deadlock-on-rtl8125-in-jumbo-mtu-mode.patch @@ -0,0 +1,55 @@ +From 59d395ed606d8df14615712b0cdcdadb2d962175 Mon Sep 17 00:00:00 2001 +From: Heiner Kallweit +Date: Sun, 26 Nov 2023 19:36:46 +0100 +Subject: r8169: fix deadlock on RTL8125 in jumbo mtu mode + +From: Heiner Kallweit + +commit 59d395ed606d8df14615712b0cdcdadb2d962175 upstream. + +The original change results in a deadlock if jumbo mtu mode is used. +Reason is that the phydev lock is held when rtl_reset_work() is called +here, and rtl_jumbo_config() calls phy_start_aneg() which also tries +to acquire the phydev lock. Fix this by calling rtl_reset_work() +asynchronously. + +Fixes: 621735f59064 ("r8169: fix rare issue with broken rx after link-down on RTL8125") +Reported-by: Ian Chen +Tested-by: Ian Chen +Cc: stable@vger.kernel.org +Signed-off-by: Heiner Kallweit +Link: https://lore.kernel.org/r/caf6a487-ef8c-4570-88f9-f47a659faf33@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/realtek/r8169_main.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/realtek/r8169_main.c ++++ b/drivers/net/ethernet/realtek/r8169_main.c +@@ -579,6 +579,7 @@ struct rtl8169_tc_offsets { + enum rtl_flag { + RTL_FLAG_TASK_ENABLED = 0, + RTL_FLAG_TASK_RESET_PENDING, ++ RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE, + RTL_FLAG_TASK_TX_TIMEOUT, + RTL_FLAG_MAX + }; +@@ -4582,6 +4583,8 @@ static void rtl_task(struct work_struct + reset: + rtl_reset_work(tp); + netif_wake_queue(tp->dev); ++ } else if (test_and_clear_bit(RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE, tp->wk.flags)) { ++ rtl_reset_work(tp); + } + out_unlock: + rtnl_unlock(); +@@ -4615,7 +4618,7 @@ static void r8169_phylink_handler(struct + } else { + /* In few cases rx is broken after link-down otherwise */ + if (rtl_is_8125(tp)) +- rtl_reset_work(tp); ++ rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE); + pm_runtime_idle(d); + } + diff --git a/queue-6.6/series b/queue-6.6/series index fd395da4d8c..48b4f192205 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -37,3 +37,34 @@ drm-amd-display-force-toggle-rate-wa-for-first-link-training-for-a-retimer.patch dm-verity-initialize-fec-io-before-freeing-it.patch dm-verity-don-t-perform-fec-for-failed-readahead-io.patch nvme-check-for-valid-nvme_identify_ns-before-using-it.patch +r8169-fix-deadlock-on-rtl8125-in-jumbo-mtu-mode.patch +acpi-video-use-acpi_video_device-for-cooling-dev-driver-data.patch +io_uring-don-t-allow-discontig-pages-for-ioring_setup_no_mmap.patch +iommu-vt-d-fix-incorrect-cache-invalidation-for-mm-notification.patch +io_uring-free-io_buffer_list-entries-via-rcu.patch +nouveau-find-the-smallest-page-allocation-to-cover-a-buffer-alloc.patch +powercap-dtpm-fix-unneeded-conversions-to-micro-watts.patch +cpufreq-amd-pstate-fix-the-return-value-of-amd_pstate_fast_switch.patch +dma-buf-fix-check-in-dma_resv_add_fence.patch +io_uring-don-t-guard-ioring_off_pbuf_ring-with-setup_no_mmap.patch +iommu-avoid-more-races-around-device-probe.patch +bcache-revert-replacing-is_err_or_null-with-is_err.patch +ext2-fix-ki_pos-update-for-dio-buffered-io-fallback-case.patch +iommu-vt-d-add-mtl-to-quirk-list-to-skip-te-disabling.patch +kvm-ppc-book3s-hv-fix-kvm_run-clobbering-fp-vec-user-registers.patch +powerpc-don-t-clobber-f0-vs0-during-fp-altivec-register-save.patch +parisc-mark-ex_table-entries-32-bit-aligned-in-assembly.h.patch +parisc-mark-ex_table-entries-32-bit-aligned-in-uaccess.h.patch +parisc-use-natural-cpu-alignment-for-bug_table.patch +parisc-mark-lock_aligned-variables-16-byte-aligned-on-smp.patch +parisc-drop-the-hp-ux-enosym-and-eremoterelease-error-codes.patch +parisc-mark-jump_table-naturally-aligned.patch +parisc-ensure-32-bit-alignment-on-parisc-unwind-section.patch +parisc-mark-altinstructions-read-only-and-32-bit-aligned.patch +btrfs-add-dmesg-output-for-first-mount-and-last-unmount-of-a-filesystem.patch +btrfs-ref-verify-fix-memory-leaks-in-btrfs_ref_tree_mod.patch +btrfs-fix-off-by-one-when-checking-chunk-map-includes-logical-address.patch +btrfs-send-ensure-send_fd-is-writable.patch +btrfs-make-error-messages-more-clear-when-getting-a-chunk-map.patch +btrfs-free-the-allocated-memory-if-btrfs_alloc_page_array-fails.patch +btrfs-fix-64bit-compat-send-ioctl-arguments-not-initializing-version-member.patch