From: Greg Kroah-Hartman Date: Mon, 14 Oct 2024 12:32:54 +0000 (+0200) Subject: 6.6-stable patches X-Git-Tag: v5.10.227~32 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=336d6414b3e80467b09c62a775c38770148b8cae;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: ata-libata-avoid-superfluous-disk-spin-down-spin-up-during-hibernation.patch bluetooth-hci_conn-fix-uaf-in-hci_enhanced_setup_sync.patch btrfs-split-remaining-space-to-discard-in-chunks.patch device-dax-correct-pgoff-align-in-dax_set_mapping.patch drm-i915-hdcp-fix-connector-refcounting.patch drm-v3d-stop-the-active-perfmon-before-being-destroyed.patch drm-vc4-stop-the-active-perfmon-before-being-destroyed.patch fs-proc-kcore.c-allow-translation-of-physical-memory-addresses.patch kthread-unpark-only-parked-kthread.patch mptcp-fallback-when-mptcp-opts-are-dropped-after-1st-data.patch mptcp-handle-consistently-dss-corruption.patch mptcp-pm-do-not-remove-closing-subflows.patch net-dsa-lan9303-ensure-chip-reset-and-wait-for-ready-status.patch net-explicitly-clear-the-sk-pointer-when-pf-create-fails.patch net-fix-an-unsafe-loop-on-the-list.patch net-phy-remove-led-entry-from-leds-list-on-unregister.patch nouveau-dmem-fix-vulnerability-in-migrate_to_ram-upon-copy-error.patch powercap-intel_rapl_tpmi-fix-bogus-register-reading.patch scsi-ufs-use-pre-calculated-offsets-in-ufshcd_init_lrb.patch scsi-wd33c93-don-t-use-stale-scsi_pointer-value.patch secretmem-disable-memfd_secret-if-arch-cannot-set-direct-map.patch selftests-mm-fix-incorrect-buffer-mirror-size-in-hmm2-double_map-test.patch selftests-rseq-fix-mm_cid-test-failure.patch --- diff --git a/queue-6.6/ata-libata-avoid-superfluous-disk-spin-down-spin-up-during-hibernation.patch b/queue-6.6/ata-libata-avoid-superfluous-disk-spin-down-spin-up-during-hibernation.patch new file mode 100644 index 00000000000..2d51210f746 --- /dev/null +++ b/queue-6.6/ata-libata-avoid-superfluous-disk-spin-down-spin-up-during-hibernation.patch @@ -0,0 +1,76 @@ +From a38719e3157118428e34fbd45b0d0707a5877784 Mon Sep 17 00:00:00 2001 +From: Niklas Cassel +Date: Tue, 8 Oct 2024 15:58:44 +0200 +Subject: ata: libata: avoid superfluous disk spin down + spin up during hibernation + +From: Niklas Cassel + +commit a38719e3157118428e34fbd45b0d0707a5877784 upstream. + +A user reported that commit aa3998dbeb3a ("ata: libata-scsi: Disable scsi +device manage_system_start_stop") introduced a spin down + immediate spin +up of the disk both when entering and when resuming from hibernation. +This behavior was not there before, and causes an increased latency both +when entering and when resuming from hibernation. + +Hibernation is done by three consecutive PM events, in the following order: +1) PM_EVENT_FREEZE +2) PM_EVENT_THAW +3) PM_EVENT_HIBERNATE + +Commit aa3998dbeb3a ("ata: libata-scsi: Disable scsi device +manage_system_start_stop") modified ata_eh_handle_port_suspend() to call +ata_dev_power_set_standby() (which spins down the disk), for both event +PM_EVENT_FREEZE and event PM_EVENT_HIBERNATE. + +Documentation/driver-api/pm/devices.rst, section "Entering Hibernation", +explicitly mentions that PM_EVENT_FREEZE does not have to be put the device +in a low-power state, and actually recommends not doing so. Thus, let's not +spin down the disk on PM_EVENT_FREEZE. (The disk will instead be spun down +during the subsequent PM_EVENT_HIBERNATE event.) + +This way, PM_EVENT_FREEZE will behave as it did before commit aa3998dbeb3a +("ata: libata-scsi: Disable scsi device manage_system_start_stop"), while +PM_EVENT_HIBERNATE will continue to spin down the disk. + +This will avoid the superfluous spin down + spin up when entering and +resuming from hibernation, while still making sure that the disk is spun +down before actually entering hibernation. + +Cc: stable@vger.kernel.org # v6.6+ +Fixes: aa3998dbeb3a ("ata: libata-scsi: Disable scsi device manage_system_start_stop") +Reviewed-by: Damien Le Moal +Link: https://lore.kernel.org/r/20241008135843.1266244-2-cassel@kernel.org +Signed-off-by: Niklas Cassel +Signed-off-by: Greg Kroah-Hartman +--- + drivers/ata/libata-eh.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +--- a/drivers/ata/libata-eh.c ++++ b/drivers/ata/libata-eh.c +@@ -4049,10 +4049,20 @@ static void ata_eh_handle_port_suspend(s + + WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); + +- /* Set all devices attached to the port in standby mode */ +- ata_for_each_link(link, ap, HOST_FIRST) { +- ata_for_each_dev(dev, link, ENABLED) +- ata_dev_power_set_standby(dev); ++ /* ++ * We will reach this point for all of the PM events: ++ * PM_EVENT_SUSPEND (if runtime pm, PM_EVENT_AUTO will also be set) ++ * PM_EVENT_FREEZE, and PM_EVENT_HIBERNATE. ++ * ++ * We do not want to perform disk spin down for PM_EVENT_FREEZE. ++ * (Spin down will be performed by the subsequent PM_EVENT_HIBERNATE.) ++ */ ++ if (!(ap->pm_mesg.event & PM_EVENT_FREEZE)) { ++ /* Set all devices attached to the port in standby mode */ ++ ata_for_each_link(link, ap, HOST_FIRST) { ++ ata_for_each_dev(dev, link, ENABLED) ++ ata_dev_power_set_standby(dev); ++ } + } + + /* diff --git a/queue-6.6/bluetooth-hci_conn-fix-uaf-in-hci_enhanced_setup_sync.patch b/queue-6.6/bluetooth-hci_conn-fix-uaf-in-hci_enhanced_setup_sync.patch new file mode 100644 index 00000000000..e6274bde0df --- /dev/null +++ b/queue-6.6/bluetooth-hci_conn-fix-uaf-in-hci_enhanced_setup_sync.patch @@ -0,0 +1,97 @@ +From 18fd04ad856df07733f5bb07e7f7168e7443d393 Mon Sep 17 00:00:00 2001 +From: Luiz Augusto von Dentz +Date: Wed, 2 Oct 2024 11:17:26 -0400 +Subject: Bluetooth: hci_conn: Fix UAF in hci_enhanced_setup_sync + +From: Luiz Augusto von Dentz + +commit 18fd04ad856df07733f5bb07e7f7168e7443d393 upstream. + +This checks if the ACL connection remains valid as it could be destroyed +while hci_enhanced_setup_sync is pending on cmd_sync leading to the +following trace: + +BUG: KASAN: slab-use-after-free in hci_enhanced_setup_sync+0x91b/0xa60 +Read of size 1 at addr ffff888002328ffd by task kworker/u5:2/37 + +CPU: 0 UID: 0 PID: 37 Comm: kworker/u5:2 Not tainted 6.11.0-rc6-01300-g810be445d8d6 #7099 +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-2.fc40 04/01/2014 +Workqueue: hci0 hci_cmd_sync_work +Call Trace: + + dump_stack_lvl+0x5d/0x80 + ? hci_enhanced_setup_sync+0x91b/0xa60 + print_report+0x152/0x4c0 + ? hci_enhanced_setup_sync+0x91b/0xa60 + ? __virt_addr_valid+0x1fa/0x420 + ? hci_enhanced_setup_sync+0x91b/0xa60 + kasan_report+0xda/0x1b0 + ? hci_enhanced_setup_sync+0x91b/0xa60 + hci_enhanced_setup_sync+0x91b/0xa60 + ? __pfx_hci_enhanced_setup_sync+0x10/0x10 + ? __pfx___mutex_lock+0x10/0x10 + hci_cmd_sync_work+0x1c2/0x330 + process_one_work+0x7d9/0x1360 + ? __pfx_lock_acquire+0x10/0x10 + ? __pfx_process_one_work+0x10/0x10 + ? assign_work+0x167/0x240 + worker_thread+0x5b7/0xf60 + ? __kthread_parkme+0xac/0x1c0 + ? __pfx_worker_thread+0x10/0x10 + ? __pfx_worker_thread+0x10/0x10 + kthread+0x293/0x360 + ? __pfx_kthread+0x10/0x10 + ret_from_fork+0x2f/0x70 + ? __pfx_kthread+0x10/0x10 + ret_from_fork_asm+0x1a/0x30 + + +Allocated by task 34: + kasan_save_stack+0x30/0x50 + kasan_save_track+0x14/0x30 + __kasan_kmalloc+0x8f/0xa0 + __hci_conn_add+0x187/0x17d0 + hci_connect_sco+0x2e1/0xb90 + sco_sock_connect+0x2a2/0xb80 + __sys_connect+0x227/0x2a0 + __x64_sys_connect+0x6d/0xb0 + do_syscall_64+0x71/0x140 + entry_SYSCALL_64_after_hwframe+0x76/0x7e + +Freed by task 37: + kasan_save_stack+0x30/0x50 + kasan_save_track+0x14/0x30 + kasan_save_free_info+0x3b/0x60 + __kasan_slab_free+0x101/0x160 + kfree+0xd0/0x250 + device_release+0x9a/0x210 + kobject_put+0x151/0x280 + hci_conn_del+0x448/0xbf0 + hci_abort_conn_sync+0x46f/0x980 + hci_cmd_sync_work+0x1c2/0x330 + process_one_work+0x7d9/0x1360 + worker_thread+0x5b7/0xf60 + kthread+0x293/0x360 + ret_from_fork+0x2f/0x70 + ret_from_fork_asm+0x1a/0x30 + +Cc: stable@vger.kernel.org +Fixes: e07a06b4eb41 ("Bluetooth: Convert SCO configure_datapath to hci_sync") +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Greg Kroah-Hartman +--- + net/bluetooth/hci_conn.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/bluetooth/hci_conn.c ++++ b/net/bluetooth/hci_conn.c +@@ -290,6 +290,9 @@ static int hci_enhanced_setup_sync(struc + + kfree(conn_handle); + ++ if (!hci_conn_valid(hdev, conn)) ++ return -ECANCELED; ++ + bt_dev_dbg(hdev, "hcon %p", conn); + + configure_datapath_sync(hdev, &conn->codec); diff --git a/queue-6.6/btrfs-split-remaining-space-to-discard-in-chunks.patch b/queue-6.6/btrfs-split-remaining-space-to-discard-in-chunks.patch new file mode 100644 index 00000000000..79c0f2ccca1 --- /dev/null +++ b/queue-6.6/btrfs-split-remaining-space-to-discard-in-chunks.patch @@ -0,0 +1,78 @@ +From a99fcb0158978ed332009449b484e5f3ca2d7df4 Mon Sep 17 00:00:00 2001 +From: Luca Stefani +Date: Tue, 17 Sep 2024 22:33:04 +0200 +Subject: btrfs: split remaining space to discard in chunks + +From: Luca Stefani + +commit a99fcb0158978ed332009449b484e5f3ca2d7df4 upstream. + +Per Qu Wenruo in case we have a very large disk, e.g. 8TiB device, +mostly empty although we will do the split according to our super block +locations, the last super block ends at 256G, we can submit a huge +discard for the range [256G, 8T), causing a large delay. + +Split the space left to discard based on BTRFS_MAX_DISCARD_CHUNK_SIZE in +preparation of introduction of cancellation points to trim. The value +of the chunk size is arbitrary, it can be higher or derived from actual +device capabilities but we can't easily read that using +bio_discard_limit(). + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=219180 +Link: https://bugzilla.suse.com/show_bug.cgi?id=1229737 +CC: stable@vger.kernel.org # 5.15+ +Signed-off-by: Luca Stefani +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/extent-tree.c | 19 +++++++++++++++---- + fs/btrfs/volumes.h | 6 ++++++ + 2 files changed, 21 insertions(+), 4 deletions(-) + +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -1303,13 +1303,24 @@ static int btrfs_issue_discard(struct bl + bytes_left = end - start; + } + +- if (bytes_left) { ++ while (bytes_left) { ++ u64 bytes_to_discard = min(BTRFS_MAX_DISCARD_CHUNK_SIZE, bytes_left); ++ + ret = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT, +- bytes_left >> SECTOR_SHIFT, ++ bytes_to_discard >> SECTOR_SHIFT, + GFP_NOFS); +- if (!ret) +- *discarded_bytes += bytes_left; ++ ++ if (ret) { ++ if (ret != -EOPNOTSUPP) ++ break; ++ continue; ++ } ++ ++ start += bytes_to_discard; ++ bytes_left -= bytes_to_discard; ++ *discarded_bytes += bytes_to_discard; + } ++ + return ret; + } + +--- a/fs/btrfs/volumes.h ++++ b/fs/btrfs/volumes.h +@@ -15,6 +15,12 @@ + + #define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G) + ++/* ++ * Arbitratry maximum size of one discard request to limit potentially long time ++ * spent in blkdev_issue_discard(). ++ */ ++#define BTRFS_MAX_DISCARD_CHUNK_SIZE (SZ_1G) ++ + extern struct mutex uuid_mutex; + + #define BTRFS_STRIPE_LEN SZ_64K diff --git a/queue-6.6/device-dax-correct-pgoff-align-in-dax_set_mapping.patch b/queue-6.6/device-dax-correct-pgoff-align-in-dax_set_mapping.patch new file mode 100644 index 00000000000..e22d09b5d5d --- /dev/null +++ b/queue-6.6/device-dax-correct-pgoff-align-in-dax_set_mapping.patch @@ -0,0 +1,112 @@ +From 7fcbd9785d4c17ea533c42f20a9083a83f301fa6 Mon Sep 17 00:00:00 2001 +From: "Kun(llfl)" +Date: Fri, 27 Sep 2024 15:45:09 +0800 +Subject: device-dax: correct pgoff align in dax_set_mapping() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Kun(llfl) + +commit 7fcbd9785d4c17ea533c42f20a9083a83f301fa6 upstream. + +pgoff should be aligned using ALIGN_DOWN() instead of ALIGN(). Otherwise, +vmf->address not aligned to fault_size will be aligned to the next +alignment, that can result in memory failure getting the wrong address. + +It's a subtle situation that only can be observed in +page_mapped_in_vma() after the page is page fault handled by +dev_dax_huge_fault. Generally, there is little chance to perform +page_mapped_in_vma in dev-dax's page unless in specific error injection +to the dax device to trigger an MCE - memory-failure. In that case, +page_mapped_in_vma() will be triggered to determine which task is +accessing the failure address and kill that task in the end. + + +We used self-developed dax device (which is 2M aligned mapping) , to +perform error injection to random address. It turned out that error +injected to non-2M-aligned address was causing endless MCE until panic. +Because page_mapped_in_vma() kept resulting wrong address and the task +accessing the failure address was never killed properly: + + +[ 3783.719419] Memory failure: 0x200c9742: recovery action for dax page: +Recovered +[ 3784.049006] mce: Uncorrected hardware memory error in user-access at +200c9742380 +[ 3784.049190] Memory failure: 0x200c9742: recovery action for dax page: +Recovered +[ 3784.448042] mce: Uncorrected hardware memory error in user-access at +200c9742380 +[ 3784.448186] Memory failure: 0x200c9742: recovery action for dax page: +Recovered +[ 3784.792026] mce: Uncorrected hardware memory error in user-access at +200c9742380 +[ 3784.792179] Memory failure: 0x200c9742: recovery action for dax page: +Recovered +[ 3785.162502] mce: Uncorrected hardware memory error in user-access at +200c9742380 +[ 3785.162633] Memory failure: 0x200c9742: recovery action for dax page: +Recovered +[ 3785.461116] mce: Uncorrected hardware memory error in user-access at +200c9742380 +[ 3785.461247] Memory failure: 0x200c9742: recovery action for dax page: +Recovered +[ 3785.764730] mce: Uncorrected hardware memory error in user-access at +200c9742380 +[ 3785.764859] Memory failure: 0x200c9742: recovery action for dax page: +Recovered +[ 3786.042128] mce: Uncorrected hardware memory error in user-access at +200c9742380 +[ 3786.042259] Memory failure: 0x200c9742: recovery action for dax page: +Recovered +[ 3786.464293] mce: Uncorrected hardware memory error in user-access at +200c9742380 +[ 3786.464423] Memory failure: 0x200c9742: recovery action for dax page: +Recovered +[ 3786.818090] mce: Uncorrected hardware memory error in user-access at +200c9742380 +[ 3786.818217] Memory failure: 0x200c9742: recovery action for dax page: +Recovered +[ 3787.085297] mce: Uncorrected hardware memory error in user-access at +200c9742380 +[ 3787.085424] Memory failure: 0x200c9742: recovery action for dax page: +Recovered + +It took us several weeks to pinpoint this problem,  but we eventually +used bpftrace to trace the page fault and mce address and successfully +identified the issue. + + +Joao added: + +; Likely we never reproduce in production because we always pin +: device-dax regions in the region align they provide (Qemu does +: similarly with prealloc in hugetlb/file backed memory). I think this +: bug requires that we touch *unpinned* device-dax regions unaligned to +: the device-dax selected alignment (page size i.e. 4K/2M/1G) + +Link: https://lkml.kernel.org/r/23c02a03e8d666fef11bbe13e85c69c8b4ca0624.1727421694.git.llfl@linux.alibaba.com +Fixes: b9b5777f09be ("device-dax: use ALIGN() for determining pgoff") +Signed-off-by: Kun(llfl) +Tested-by: JianXiong Zhao +Reviewed-by: Joao Martins +Cc: Dan Williams +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + drivers/dax/device.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/dax/device.c ++++ b/drivers/dax/device.c +@@ -86,7 +86,7 @@ static void dax_set_mapping(struct vm_fa + nr_pages = 1; + + pgoff = linear_page_index(vmf->vma, +- ALIGN(vmf->address, fault_size)); ++ ALIGN_DOWN(vmf->address, fault_size)); + + for (i = 0; i < nr_pages; i++) { + struct page *page = pfn_to_page(pfn_t_to_pfn(pfn) + i); diff --git a/queue-6.6/drm-i915-hdcp-fix-connector-refcounting.patch b/queue-6.6/drm-i915-hdcp-fix-connector-refcounting.patch new file mode 100644 index 00000000000..6292444937b --- /dev/null +++ b/queue-6.6/drm-i915-hdcp-fix-connector-refcounting.patch @@ -0,0 +1,68 @@ +From 4cc2718f621a6a57a02581125bb6d914ce74d23b Mon Sep 17 00:00:00 2001 +From: Jani Nikula +Date: Tue, 24 Sep 2024 18:30:22 +0300 +Subject: drm/i915/hdcp: fix connector refcounting +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jani Nikula + +commit 4cc2718f621a6a57a02581125bb6d914ce74d23b upstream. + +We acquire a connector reference before scheduling an HDCP prop work, +and expect the work function to release the reference. + +However, if the work was already queued, it won't be queued multiple +times, and the reference is not dropped. + +Release the reference immediately if the work was already queued. + +Fixes: a6597faa2d59 ("drm/i915: Protect workers against disappearing connectors") +Cc: Sean Paul +Cc: Suraj Kandpal +Cc: Ville Syrjälä +Cc: stable@vger.kernel.org # v5.10+ +Reviewed-by: Suraj Kandpal +Link: https://patchwork.freedesktop.org/patch/msgid/20240924153022.2255299-1-jani.nikula@intel.com +Signed-off-by: Jani Nikula +(cherry picked from commit abc0742c79bdb3b164eacab24aea0916d2ec1cb5) +Signed-off-by: Joonas Lahtinen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/display/intel_hdcp.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/i915/display/intel_hdcp.c ++++ b/drivers/gpu/drm/i915/display/intel_hdcp.c +@@ -1005,7 +1005,8 @@ static void intel_hdcp_update_value(stru + hdcp->value = value; + if (update_property) { + drm_connector_get(&connector->base); +- queue_work(i915->unordered_wq, &hdcp->prop_work); ++ if (!queue_work(i915->unordered_wq, &hdcp->prop_work)) ++ drm_connector_put(&connector->base); + } + } + +@@ -2480,7 +2481,8 @@ void intel_hdcp_update_pipe(struct intel + mutex_lock(&hdcp->mutex); + hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; + drm_connector_get(&connector->base); +- queue_work(i915->unordered_wq, &hdcp->prop_work); ++ if (!queue_work(i915->unordered_wq, &hdcp->prop_work)) ++ drm_connector_put(&connector->base); + mutex_unlock(&hdcp->mutex); + } + +@@ -2497,7 +2499,9 @@ void intel_hdcp_update_pipe(struct intel + */ + if (!desired_and_not_enabled && !content_protection_type_changed) { + drm_connector_get(&connector->base); +- queue_work(i915->unordered_wq, &hdcp->prop_work); ++ if (!queue_work(i915->unordered_wq, &hdcp->prop_work)) ++ drm_connector_put(&connector->base); ++ + } + } + diff --git a/queue-6.6/drm-v3d-stop-the-active-perfmon-before-being-destroyed.patch b/queue-6.6/drm-v3d-stop-the-active-perfmon-before-being-destroyed.patch new file mode 100644 index 00000000000..f4b863972bd --- /dev/null +++ b/queue-6.6/drm-v3d-stop-the-active-perfmon-before-being-destroyed.patch @@ -0,0 +1,113 @@ +From 7d1fd3638ee3a9f9bca4785fffb638ca19120718 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ma=C3=ADra=20Canal?= +Date: Fri, 4 Oct 2024 10:02:29 -0300 +Subject: drm/v3d: Stop the active perfmon before being destroyed +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Maíra Canal + +commit 7d1fd3638ee3a9f9bca4785fffb638ca19120718 upstream. + +When running `kmscube` with one or more performance monitors enabled +via `GALLIUM_HUD`, the following kernel panic can occur: + +[ 55.008324] Unable to handle kernel paging request at virtual address 00000000052004a4 +[ 55.008368] Mem abort info: +[ 55.008377] ESR = 0x0000000096000005 +[ 55.008387] EC = 0x25: DABT (current EL), IL = 32 bits +[ 55.008402] SET = 0, FnV = 0 +[ 55.008412] EA = 0, S1PTW = 0 +[ 55.008421] FSC = 0x05: level 1 translation fault +[ 55.008434] Data abort info: +[ 55.008442] ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000 +[ 55.008455] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 +[ 55.008467] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 +[ 55.008481] user pgtable: 4k pages, 39-bit VAs, pgdp=00000001046c6000 +[ 55.008497] [00000000052004a4] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000 +[ 55.008525] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP +[ 55.008542] Modules linked in: rfcomm [...] vc4 v3d snd_soc_hdmi_codec drm_display_helper +gpu_sched drm_shmem_helper cec drm_dma_helper drm_kms_helper i2c_brcmstb +drm drm_panel_orientation_quirks snd_soc_core snd_compress snd_pcm_dmaengine snd_pcm snd_timer snd backlight +[ 55.008799] CPU: 2 PID: 166 Comm: v3d_bin Tainted: G C 6.6.47+rpt-rpi-v8 #1 Debian 1:6.6.47-1+rpt1 +[ 55.008824] Hardware name: Raspberry Pi 4 Model B Rev 1.5 (DT) +[ 55.008838] pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) +[ 55.008855] pc : __mutex_lock.constprop.0+0x90/0x608 +[ 55.008879] lr : __mutex_lock.constprop.0+0x58/0x608 +[ 55.008895] sp : ffffffc080673cf0 +[ 55.008904] x29: ffffffc080673cf0 x28: 0000000000000000 x27: ffffff8106188a28 +[ 55.008926] x26: ffffff8101e78040 x25: ffffff8101baa6c0 x24: ffffffd9d989f148 +[ 55.008947] x23: ffffffda1c2a4008 x22: 0000000000000002 x21: ffffffc080673d38 +[ 55.008968] x20: ffffff8101238000 x19: ffffff8104f83188 x18: 0000000000000000 +[ 55.008988] x17: 0000000000000000 x16: ffffffda1bd04d18 x15: 00000055bb08bc90 +[ 55.009715] x14: 0000000000000000 x13: 0000000000000000 x12: ffffffda1bd4cbb0 +[ 55.010433] x11: 00000000fa83b2da x10: 0000000000001a40 x9 : ffffffda1bd04d04 +[ 55.011162] x8 : ffffff8102097b80 x7 : 0000000000000000 x6 : 00000000030a5857 +[ 55.011880] x5 : 00ffffffffffffff x4 : 0300000005200470 x3 : 0300000005200470 +[ 55.012598] x2 : ffffff8101238000 x1 : 0000000000000021 x0 : 0300000005200470 +[ 55.013292] Call trace: +[ 55.013959] __mutex_lock.constprop.0+0x90/0x608 +[ 55.014646] __mutex_lock_slowpath+0x1c/0x30 +[ 55.015317] mutex_lock+0x50/0x68 +[ 55.015961] v3d_perfmon_stop+0x40/0xe0 [v3d] +[ 55.016627] v3d_bin_job_run+0x10c/0x2d8 [v3d] +[ 55.017282] drm_sched_main+0x178/0x3f8 [gpu_sched] +[ 55.017921] kthread+0x11c/0x128 +[ 55.018554] ret_from_fork+0x10/0x20 +[ 55.019168] Code: f9400260 f1001c1f 54001ea9 927df000 (b9403401) +[ 55.019776] ---[ end trace 0000000000000000 ]--- +[ 55.020411] note: v3d_bin[166] exited with preempt_count 1 + +This issue arises because, upon closing the file descriptor (which happens +when we interrupt `kmscube`), the active performance monitor is not +stopped. Although all perfmons are destroyed in `v3d_perfmon_close_file()`, +the active performance monitor's pointer (`v3d->active_perfmon`) is still +retained. + +If `kmscube` is run again, the driver will attempt to stop the active +performance monitor using the stale pointer in `v3d->active_perfmon`. +However, this pointer is no longer valid because the previous process has +already terminated, and all performance monitors associated with it have +been destroyed and freed. + +To fix this, when the active performance monitor belongs to a given +process, explicitly stop it before destroying and freeing it. + +Cc: stable@vger.kernel.org # v5.15+ +Closes: https://github.com/raspberrypi/linux/issues/6389 +Fixes: 26a4dc29b74a ("drm/v3d: Expose performance counters to userspace") +Signed-off-by: Maíra Canal +Reviewed-by: Juan A. Suarez +Link: https://patchwork.freedesktop.org/patch/msgid/20241004130625.918580-2-mcanal@igalia.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/v3d/v3d_perfmon.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/v3d/v3d_perfmon.c ++++ b/drivers/gpu/drm/v3d/v3d_perfmon.c +@@ -103,6 +103,11 @@ void v3d_perfmon_open_file(struct v3d_fi + static int v3d_perfmon_idr_del(int id, void *elem, void *data) + { + struct v3d_perfmon *perfmon = elem; ++ struct v3d_dev *v3d = (struct v3d_dev *)data; ++ ++ /* If the active perfmon is being destroyed, stop it first */ ++ if (perfmon == v3d->active_perfmon) ++ v3d_perfmon_stop(v3d, perfmon, false); + + v3d_perfmon_put(perfmon); + +@@ -111,8 +116,10 @@ static int v3d_perfmon_idr_del(int id, v + + void v3d_perfmon_close_file(struct v3d_file_priv *v3d_priv) + { ++ struct v3d_dev *v3d = v3d_priv->v3d; ++ + mutex_lock(&v3d_priv->perfmon.lock); +- idr_for_each(&v3d_priv->perfmon.idr, v3d_perfmon_idr_del, NULL); ++ idr_for_each(&v3d_priv->perfmon.idr, v3d_perfmon_idr_del, v3d); + idr_destroy(&v3d_priv->perfmon.idr); + mutex_unlock(&v3d_priv->perfmon.lock); + mutex_destroy(&v3d_priv->perfmon.lock); diff --git a/queue-6.6/drm-vc4-stop-the-active-perfmon-before-being-destroyed.patch b/queue-6.6/drm-vc4-stop-the-active-perfmon-before-being-destroyed.patch new file mode 100644 index 00000000000..c0ae9bd08e0 --- /dev/null +++ b/queue-6.6/drm-vc4-stop-the-active-perfmon-before-being-destroyed.patch @@ -0,0 +1,62 @@ +From 0b2ad4f6f2bec74a5287d96cb2325a5e11706f22 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ma=C3=ADra=20Canal?= +Date: Fri, 4 Oct 2024 09:36:00 -0300 +Subject: drm/vc4: Stop the active perfmon before being destroyed +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Maíra Canal + +commit 0b2ad4f6f2bec74a5287d96cb2325a5e11706f22 upstream. + +Upon closing the file descriptor, the active performance monitor is not +stopped. Although all perfmons are destroyed in `vc4_perfmon_close_file()`, +the active performance monitor's pointer (`vc4->active_perfmon`) is still +retained. + +If we open a new file descriptor and submit a few jobs with performance +monitors, the driver will attempt to stop the active performance monitor +using the stale pointer in `vc4->active_perfmon`. However, this pointer +is no longer valid because the previous process has already terminated, +and all performance monitors associated with it have been destroyed and +freed. + +To fix this, when the active performance monitor belongs to a given +process, explicitly stop it before destroying and freeing it. + +Cc: stable@vger.kernel.org # v4.17+ +Cc: Boris Brezillon +Cc: Juan A. Suarez Romero +Fixes: 65101d8c9108 ("drm/vc4: Expose performance counters to userspace") +Signed-off-by: Maíra Canal +Reviewed-by: Juan A. Suarez +Link: https://patchwork.freedesktop.org/patch/msgid/20241004123817.890016-2-mcanal@igalia.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/vc4/vc4_perfmon.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/vc4/vc4_perfmon.c ++++ b/drivers/gpu/drm/vc4/vc4_perfmon.c +@@ -116,6 +116,11 @@ void vc4_perfmon_open_file(struct vc4_fi + static int vc4_perfmon_idr_del(int id, void *elem, void *data) + { + struct vc4_perfmon *perfmon = elem; ++ struct vc4_dev *vc4 = (struct vc4_dev *)data; ++ ++ /* If the active perfmon is being destroyed, stop it first */ ++ if (perfmon == vc4->active_perfmon) ++ vc4_perfmon_stop(vc4, perfmon, false); + + vc4_perfmon_put(perfmon); + +@@ -130,7 +135,7 @@ void vc4_perfmon_close_file(struct vc4_f + return; + + mutex_lock(&vc4file->perfmon.lock); +- idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, NULL); ++ idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, vc4); + idr_destroy(&vc4file->perfmon.idr); + mutex_unlock(&vc4file->perfmon.lock); + mutex_destroy(&vc4file->perfmon.lock); diff --git a/queue-6.6/fs-proc-kcore.c-allow-translation-of-physical-memory-addresses.patch b/queue-6.6/fs-proc-kcore.c-allow-translation-of-physical-memory-addresses.patch new file mode 100644 index 00000000000..4dc24baf61c --- /dev/null +++ b/queue-6.6/fs-proc-kcore.c-allow-translation-of-physical-memory-addresses.patch @@ -0,0 +1,122 @@ +From 3d5854d75e3187147613130561b58f0b06166172 Mon Sep 17 00:00:00 2001 +From: Alexander Gordeev +Date: Mon, 30 Sep 2024 14:21:19 +0200 +Subject: fs/proc/kcore.c: allow translation of physical memory addresses + +From: Alexander Gordeev + +commit 3d5854d75e3187147613130561b58f0b06166172 upstream. + +When /proc/kcore is read an attempt to read the first two pages results in +HW-specific page swap on s390 and another (so called prefix) pages are +accessed instead. That leads to a wrong read. + +Allow architecture-specific translation of memory addresses using +kc_xlate_dev_mem_ptr() and kc_unxlate_dev_mem_ptr() callbacks similarily +to /dev/mem xlate_dev_mem_ptr() and unxlate_dev_mem_ptr() callbacks. That +way an architecture can deal with specific physical memory ranges. + +Re-use the existing /dev/mem callback implementation on s390, which +handles the described prefix pages swapping correctly. + +For other architectures the default callback is basically NOP. It is +expected the condition (vaddr == __va(__pa(vaddr))) always holds true for +KCORE_RAM memory type. + +Link: https://lkml.kernel.org/r/20240930122119.1651546-1-agordeev@linux.ibm.com +Signed-off-by: Alexander Gordeev +Suggested-by: Heiko Carstens +Cc: Vasily Gorbik +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/include/asm/io.h | 2 ++ + fs/proc/kcore.c | 36 ++++++++++++++++++++++++++++++++++-- + 2 files changed, 36 insertions(+), 2 deletions(-) + +--- a/arch/s390/include/asm/io.h ++++ b/arch/s390/include/asm/io.h +@@ -16,8 +16,10 @@ + #include + + #define xlate_dev_mem_ptr xlate_dev_mem_ptr ++#define kc_xlate_dev_mem_ptr xlate_dev_mem_ptr + void *xlate_dev_mem_ptr(phys_addr_t phys); + #define unxlate_dev_mem_ptr unxlate_dev_mem_ptr ++#define kc_unxlate_dev_mem_ptr unxlate_dev_mem_ptr + void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); + + #define IO_SPACE_LIMIT 0 +--- a/fs/proc/kcore.c ++++ b/fs/proc/kcore.c +@@ -50,6 +50,20 @@ static struct proc_dir_entry *proc_root_ + #define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET) + #endif + ++#ifndef kc_xlate_dev_mem_ptr ++#define kc_xlate_dev_mem_ptr kc_xlate_dev_mem_ptr ++static inline void *kc_xlate_dev_mem_ptr(phys_addr_t phys) ++{ ++ return __va(phys); ++} ++#endif ++#ifndef kc_unxlate_dev_mem_ptr ++#define kc_unxlate_dev_mem_ptr kc_unxlate_dev_mem_ptr ++static inline void kc_unxlate_dev_mem_ptr(phys_addr_t phys, void *virt) ++{ ++} ++#endif ++ + static LIST_HEAD(kclist_head); + static DECLARE_RWSEM(kclist_lock); + static int kcore_need_update = 1; +@@ -471,6 +485,8 @@ static ssize_t read_kcore_iter(struct ki + while (buflen) { + struct page *page; + unsigned long pfn; ++ phys_addr_t phys; ++ void *__start; + + /* + * If this is the first iteration or the address is not within +@@ -537,7 +553,8 @@ static ssize_t read_kcore_iter(struct ki + } + break; + case KCORE_RAM: +- pfn = __pa(start) >> PAGE_SHIFT; ++ phys = __pa(start); ++ pfn = phys >> PAGE_SHIFT; + page = pfn_to_online_page(pfn); + + /* +@@ -556,13 +573,28 @@ static ssize_t read_kcore_iter(struct ki + fallthrough; + case KCORE_VMEMMAP: + case KCORE_TEXT: ++ if (m->type == KCORE_RAM) { ++ __start = kc_xlate_dev_mem_ptr(phys); ++ if (!__start) { ++ ret = -ENOMEM; ++ if (iov_iter_zero(tsz, iter) != tsz) ++ ret = -EFAULT; ++ goto out; ++ } ++ } else { ++ __start = (void *)start; ++ } ++ + /* + * Sadly we must use a bounce buffer here to be able to + * make use of copy_from_kernel_nofault(), as these + * memory regions might not always be mapped on all + * architectures. + */ +- if (copy_from_kernel_nofault(buf, (void *)start, tsz)) { ++ ret = copy_from_kernel_nofault(buf, __start, tsz); ++ if (m->type == KCORE_RAM) ++ kc_unxlate_dev_mem_ptr(phys, __start); ++ if (ret) { + if (iov_iter_zero(tsz, iter) != tsz) { + ret = -EFAULT; + goto out; diff --git a/queue-6.6/kthread-unpark-only-parked-kthread.patch b/queue-6.6/kthread-unpark-only-parked-kthread.patch new file mode 100644 index 00000000000..4da762cc31c --- /dev/null +++ b/queue-6.6/kthread-unpark-only-parked-kthread.patch @@ -0,0 +1,65 @@ +From 214e01ad4ed7158cab66498810094fac5d09b218 Mon Sep 17 00:00:00 2001 +From: Frederic Weisbecker +Date: Fri, 13 Sep 2024 23:46:34 +0200 +Subject: kthread: unpark only parked kthread + +From: Frederic Weisbecker + +commit 214e01ad4ed7158cab66498810094fac5d09b218 upstream. + +Calling into kthread unparking unconditionally is mostly harmless when +the kthread is already unparked. The wake up is then simply ignored +because the target is not in TASK_PARKED state. + +However if the kthread is per CPU, the wake up is preceded by a call +to kthread_bind() which expects the task to be inactive and in +TASK_PARKED state, which obviously isn't the case if it is unparked. + +As a result, calling kthread_stop() on an unparked per-cpu kthread +triggers such a warning: + + WARNING: CPU: 0 PID: 11 at kernel/kthread.c:525 __kthread_bind_mask kernel/kthread.c:525 + + kthread_stop+0x17a/0x630 kernel/kthread.c:707 + destroy_workqueue+0x136/0xc40 kernel/workqueue.c:5810 + wg_destruct+0x1e2/0x2e0 drivers/net/wireguard/device.c:257 + netdev_run_todo+0xe1a/0x1000 net/core/dev.c:10693 + default_device_exit_batch+0xa14/0xa90 net/core/dev.c:11769 + ops_exit_list net/core/net_namespace.c:178 [inline] + cleanup_net+0x89d/0xcc0 net/core/net_namespace.c:640 + process_one_work kernel/workqueue.c:3231 [inline] + process_scheduled_works+0xa2c/0x1830 kernel/workqueue.c:3312 + worker_thread+0x86d/0xd70 kernel/workqueue.c:3393 + kthread+0x2f0/0x390 kernel/kthread.c:389 + ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 + ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 + + +Fix this with skipping unecessary unparking while stopping a kthread. + +Link: https://lkml.kernel.org/r/20240913214634.12557-1-frederic@kernel.org +Fixes: 5c25b5ff89f0 ("workqueue: Tag bound workers with KTHREAD_IS_PER_CPU") +Signed-off-by: Frederic Weisbecker +Reported-by: syzbot+943d34fa3cf2191e3068@syzkaller.appspotmail.com +Tested-by: syzbot+943d34fa3cf2191e3068@syzkaller.appspotmail.com +Suggested-by: Thomas Gleixner +Cc: Hillf Danton +Cc: Tejun Heo +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + kernel/kthread.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/kernel/kthread.c ++++ b/kernel/kthread.c +@@ -622,6 +622,8 @@ void kthread_unpark(struct task_struct * + { + struct kthread *kthread = to_kthread(k); + ++ if (!test_bit(KTHREAD_SHOULD_PARK, &kthread->flags)) ++ return; + /* + * Newly created kthread was parked when the CPU was offline. + * The binding was lost and we need to set it again. diff --git a/queue-6.6/mptcp-fallback-when-mptcp-opts-are-dropped-after-1st-data.patch b/queue-6.6/mptcp-fallback-when-mptcp-opts-are-dropped-after-1st-data.patch new file mode 100644 index 00000000000..df6c4b7e29f --- /dev/null +++ b/queue-6.6/mptcp-fallback-when-mptcp-opts-are-dropped-after-1st-data.patch @@ -0,0 +1,85 @@ +From 119d51e225febc8152476340a880f5415a01e99e Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Tue, 8 Oct 2024 13:04:54 +0200 +Subject: mptcp: fallback when MPTCP opts are dropped after 1st data + +From: Matthieu Baerts (NGI0) + +commit 119d51e225febc8152476340a880f5415a01e99e upstream. + +As reported by Christoph [1], before this patch, an MPTCP connection was +wrongly reset when a host received a first data packet with MPTCP +options after the 3wHS, but got the next ones without. + +According to the MPTCP v1 specs [2], a fallback should happen in this +case, because the host didn't receive a DATA_ACK from the other peer, +nor receive data for more than the initial window which implies a +DATA_ACK being received by the other peer. + +The patch here re-uses the same logic as the one used in other places: +by looking at allow_infinite_fallback, which is disabled at the creation +of an additional subflow. It's not looking at the first DATA_ACK (or +implying one received from the other side) as suggested by the RFC, but +it is in continuation with what was already done, which is safer, and it +fixes the reported issue. The next step, looking at this first DATA_ACK, +is tracked in [4]. + +This patch has been validated using the following Packetdrill script: + + 0 socket(..., SOCK_STREAM, IPPROTO_MPTCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + // 3WHS is OK + +0.0 < S 0:0(0) win 65535 + +0.0 > S. 0:0(0) ack 1 + +0.1 < . 1:1(0) ack 1 win 2048 + +0 accept(3, ..., ...) = 4 + + // Data from the client with valid MPTCP options (no DATA_ACK: normal) + +0.1 < P. 1:501(500) ack 1 win 2048 + // From here, the MPTCP options will be dropped by a middlebox + +0.0 > . 1:1(0) ack 501 + + +0.1 read(4, ..., 500) = 500 + +0 write(4, ..., 100) = 100 + + // The server replies with data, still thinking MPTCP is being used + +0.0 > P. 1:101(100) ack 501 + // But the client already did a fallback to TCP, because the two previous packets have been received without MPTCP options + +0.1 < . 501:501(0) ack 101 win 2048 + + +0.0 < P. 501:601(100) ack 101 win 2048 + // The server should fallback to TCP, not reset: it didn't get a DATA_ACK, nor data for more than the initial window + +0.0 > . 101:101(0) ack 601 + +Note that this script requires Packetdrill with MPTCP support, see [3]. + +Fixes: dea2b1ea9c70 ("mptcp: do not reset MP_CAPABLE subflow on mapping errors") +Cc: stable@vger.kernel.org +Reported-by: Christoph Paasch +Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/518 [1] +Link: https://datatracker.ietf.org/doc/html/rfc8684#name-fallback [2] +Link: https://github.com/multipath-tcp/packetdrill [3] +Link: https://github.com/multipath-tcp/mptcp_net-next/issues/519 [4] +Reviewed-by: Paolo Abeni +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20241008-net-mptcp-fallback-fixes-v1-3-c6fb8e93e551@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/subflow.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -1247,7 +1247,7 @@ static bool subflow_can_fallback(struct + else if (READ_ONCE(msk->csum_enabled)) + return !subflow->valid_csum_seen; + else +- return !subflow->fully_established; ++ return READ_ONCE(msk->allow_infinite_fallback); + } + + static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) diff --git a/queue-6.6/mptcp-handle-consistently-dss-corruption.patch b/queue-6.6/mptcp-handle-consistently-dss-corruption.patch new file mode 100644 index 00000000000..2844b097ffb --- /dev/null +++ b/queue-6.6/mptcp-handle-consistently-dss-corruption.patch @@ -0,0 +1,107 @@ +From e32d262c89e2b22cb0640223f953b548617ed8a6 Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Tue, 8 Oct 2024 13:04:52 +0200 +Subject: mptcp: handle consistently DSS corruption + +From: Paolo Abeni + +commit e32d262c89e2b22cb0640223f953b548617ed8a6 upstream. + +Bugged peer implementation can send corrupted DSS options, consistently +hitting a few warning in the data path. Use DEBUG_NET assertions, to +avoid the splat on some builds and handle consistently the error, dumping +related MIBs and performing fallback and/or reset according to the +subflow type. + +Fixes: 6771bfd9ee24 ("mptcp: update mptcp ack sequence from work queue") +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20241008-net-mptcp-fallback-fixes-v1-1-c6fb8e93e551@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/mib.c | 2 ++ + net/mptcp/mib.h | 2 ++ + net/mptcp/protocol.c | 24 +++++++++++++++++++++--- + net/mptcp/subflow.c | 4 +++- + 4 files changed, 28 insertions(+), 4 deletions(-) + +--- a/net/mptcp/mib.c ++++ b/net/mptcp/mib.c +@@ -26,6 +26,8 @@ static const struct snmp_mib mptcp_snmp_ + SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX), + SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), + SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH), ++ SNMP_MIB_ITEM("DSSCorruptionFallback", MPTCP_MIB_DSSCORRUPTIONFALLBACK), ++ SNMP_MIB_ITEM("DSSCorruptionReset", MPTCP_MIB_DSSCORRUPTIONRESET), + SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX), + SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX), + SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH), +--- a/net/mptcp/mib.h ++++ b/net/mptcp/mib.h +@@ -19,6 +19,8 @@ enum linux_mptcp_mib_field { + MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */ + MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */ + MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */ ++ MPTCP_MIB_DSSCORRUPTIONFALLBACK,/* DSS corruption detected, fallback */ ++ MPTCP_MIB_DSSCORRUPTIONRESET, /* DSS corruption detected, MPJ subflow reset */ + MPTCP_MIB_INFINITEMAPTX, /* Sent an infinite mapping */ + MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */ + MPTCP_MIB_DSSTCPMISMATCH, /* DSS-mapping did not map with TCP's sequence numbers */ +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -620,6 +620,18 @@ static bool mptcp_check_data_fin(struct + return ret; + } + ++static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk) ++{ ++ if (READ_ONCE(msk->allow_infinite_fallback)) { ++ MPTCP_INC_STATS(sock_net(ssk), ++ MPTCP_MIB_DSSCORRUPTIONFALLBACK); ++ mptcp_do_fallback(ssk); ++ } else { ++ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCORRUPTIONRESET); ++ mptcp_subflow_reset(ssk); ++ } ++} ++ + static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, + struct sock *ssk, + unsigned int *bytes) +@@ -692,10 +704,16 @@ static bool __mptcp_move_skbs_from_subfl + moved += len; + seq += len; + +- if (WARN_ON_ONCE(map_remaining < len)) +- break; ++ if (unlikely(map_remaining < len)) { ++ DEBUG_NET_WARN_ON_ONCE(1); ++ mptcp_dss_corruption(msk, ssk); ++ } + } else { +- WARN_ON_ONCE(!fin); ++ if (unlikely(!fin)) { ++ DEBUG_NET_WARN_ON_ONCE(1); ++ mptcp_dss_corruption(msk, ssk); ++ } ++ + sk_eat_skb(ssk, skb); + done = true; + } +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -944,8 +944,10 @@ static bool skb_is_fully_mapped(struct s + unsigned int skb_consumed; + + skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq; +- if (WARN_ON_ONCE(skb_consumed >= skb->len)) ++ if (unlikely(skb_consumed >= skb->len)) { ++ DEBUG_NET_WARN_ON_ONCE(1); + return true; ++ } + + return skb->len - skb_consumed <= subflow->map_data_len - + mptcp_subflow_get_map_offset(subflow); diff --git a/queue-6.6/mptcp-pm-do-not-remove-closing-subflows.patch b/queue-6.6/mptcp-pm-do-not-remove-closing-subflows.patch new file mode 100644 index 00000000000..4a97524ff95 --- /dev/null +++ b/queue-6.6/mptcp-pm-do-not-remove-closing-subflows.patch @@ -0,0 +1,41 @@ +From db0a37b7ac27d8ca27d3dc676a16d081c16ec7b9 Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Tue, 8 Oct 2024 13:04:55 +0200 +Subject: mptcp: pm: do not remove closing subflows + +From: Matthieu Baerts (NGI0) + +commit db0a37b7ac27d8ca27d3dc676a16d081c16ec7b9 upstream. + +In a previous fix, the in-kernel path-manager has been modified not to +retrigger the removal of a subflow if it was already closed, e.g. when +the initial subflow is removed, but kept in the subflows list. + +To be complete, this fix should also skip the subflows that are in any +closing state: mptcp_close_ssk() will initiate the closure, but the +switch to the TCP_CLOSE state depends on the other peer. + +Fixes: 58e1b66b4e4b ("mptcp: pm: do not remove already closed subflows") +Cc: stable@vger.kernel.org +Suggested-by: Paolo Abeni +Acked-by: Paolo Abeni +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20241008-net-mptcp-fallback-fixes-v1-4-c6fb8e93e551@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/pm_netlink.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/mptcp/pm_netlink.c ++++ b/net/mptcp/pm_netlink.c +@@ -864,7 +864,8 @@ static void mptcp_pm_nl_rm_addr_or_subfl + int how = RCV_SHUTDOWN | SEND_SHUTDOWN; + u8 id = subflow_get_local_id(subflow); + +- if (inet_sk_state_load(ssk) == TCP_CLOSE) ++ if ((1 << inet_sk_state_load(ssk)) & ++ (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING | TCPF_CLOSE)) + continue; + if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) + continue; diff --git a/queue-6.6/net-dsa-lan9303-ensure-chip-reset-and-wait-for-ready-status.patch b/queue-6.6/net-dsa-lan9303-ensure-chip-reset-and-wait-for-ready-status.patch new file mode 100644 index 00000000000..6e4813f2d64 --- /dev/null +++ b/queue-6.6/net-dsa-lan9303-ensure-chip-reset-and-wait-for-ready-status.patch @@ -0,0 +1,83 @@ +From 5c14e51d2d7df49fe0d4e64a12c58d2542f452ff Mon Sep 17 00:00:00 2001 +From: Anatolij Gustschin +Date: Fri, 4 Oct 2024 13:36:54 +0200 +Subject: net: dsa: lan9303: ensure chip reset and wait for READY status + +From: Anatolij Gustschin + +commit 5c14e51d2d7df49fe0d4e64a12c58d2542f452ff upstream. + +Accessing device registers seems to be not reliable, the chip +revision is sometimes detected wrongly (0 instead of expected 1). + +Ensure that the chip reset is performed via reset GPIO and then +wait for 'Device Ready' status in HW_CFG register before doing +any register initializations. + +Cc: stable@vger.kernel.org +Fixes: a1292595e006 ("net: dsa: add new DSA switch driver for the SMSC-LAN9303") +Signed-off-by: Anatolij Gustschin +[alex: reworked using read_poll_timeout()] +Signed-off-by: Alexander Sverdlin +Reviewed-by: Vladimir Oltean +Link: https://patch.msgid.link/20241004113655.3436296-1-alexander.sverdlin@siemens.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/lan9303-core.c | 29 +++++++++++++++++++++++++++++ + 1 file changed, 29 insertions(+) + +--- a/drivers/net/dsa/lan9303-core.c ++++ b/drivers/net/dsa/lan9303-core.c +@@ -6,6 +6,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -839,6 +840,8 @@ static void lan9303_handle_reset(struct + if (!chip->reset_gpio) + return; + ++ gpiod_set_value_cansleep(chip->reset_gpio, 1); ++ + if (chip->reset_duration != 0) + msleep(chip->reset_duration); + +@@ -864,8 +867,34 @@ static int lan9303_disable_processing(st + static int lan9303_check_device(struct lan9303 *chip) + { + int ret; ++ int err; + u32 reg; + ++ /* In I2C-managed configurations this polling loop will clash with ++ * switch's reading of EEPROM right after reset and this behaviour is ++ * not configurable. While lan9303_read() already has quite long retry ++ * timeout, seems not all cases are being detected as arbitration error. ++ * ++ * According to datasheet, EEPROM loader has 30ms timeout (in case of ++ * missing EEPROM). ++ * ++ * Loading of the largest supported EEPROM is expected to take at least ++ * 5.9s. ++ */ ++ err = read_poll_timeout(lan9303_read, ret, ++ !ret && reg & LAN9303_HW_CFG_READY, ++ 20000, 6000000, false, ++ chip->regmap, LAN9303_HW_CFG, ®); ++ if (ret) { ++ dev_err(chip->dev, "failed to read HW_CFG reg: %pe\n", ++ ERR_PTR(ret)); ++ return ret; ++ } ++ if (err) { ++ dev_err(chip->dev, "HW_CFG not ready: 0x%08x\n", reg); ++ return err; ++ } ++ + ret = lan9303_read(chip->regmap, LAN9303_CHIP_REV, ®); + if (ret) { + dev_err(chip->dev, "failed to read chip revision register: %d\n", diff --git a/queue-6.6/net-explicitly-clear-the-sk-pointer-when-pf-create-fails.patch b/queue-6.6/net-explicitly-clear-the-sk-pointer-when-pf-create-fails.patch new file mode 100644 index 00000000000..45cadc2eacb --- /dev/null +++ b/queue-6.6/net-explicitly-clear-the-sk-pointer-when-pf-create-fails.patch @@ -0,0 +1,56 @@ +From 631083143315d1b192bd7d915b967b37819e88ea Mon Sep 17 00:00:00 2001 +From: Ignat Korchagin +Date: Thu, 3 Oct 2024 18:01:51 +0100 +Subject: net: explicitly clear the sk pointer, when pf->create fails + +From: Ignat Korchagin + +commit 631083143315d1b192bd7d915b967b37819e88ea upstream. + +We have recently noticed the exact same KASAN splat as in commit +6cd4a78d962b ("net: do not leave a dangling sk pointer, when socket +creation fails"). The problem is that commit did not fully address the +problem, as some pf->create implementations do not use sk_common_release +in their error paths. + +For example, we can use the same reproducer as in the above commit, but +changing ping to arping. arping uses AF_PACKET socket and if packet_create +fails, it will just sk_free the allocated sk object. + +While we could chase all the pf->create implementations and make sure they +NULL the freed sk object on error from the socket, we can't guarantee +future protocols will not make the same mistake. + +So it is easier to just explicitly NULL the sk pointer upon return from +pf->create in __sock_create. We do know that pf->create always releases the +allocated sk object on error, so if the pointer is not NULL, it is +definitely dangling. + +Fixes: 6cd4a78d962b ("net: do not leave a dangling sk pointer, when socket creation fails") +Signed-off-by: Ignat Korchagin +Cc: stable@vger.kernel.org +Reviewed-by: Eric Dumazet +Link: https://patch.msgid.link/20241003170151.69445-1-ignat@cloudflare.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/socket.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/net/socket.c ++++ b/net/socket.c +@@ -1569,8 +1569,13 @@ int __sock_create(struct net *net, int f + rcu_read_unlock(); + + err = pf->create(net, sock, protocol, kern); +- if (err < 0) ++ if (err < 0) { ++ /* ->create should release the allocated sock->sk object on error ++ * but it may leave the dangling pointer ++ */ ++ sock->sk = NULL; + goto out_module_put; ++ } + + /* + * Now to bump the refcnt of the [loadable] module that owns this diff --git a/queue-6.6/net-fix-an-unsafe-loop-on-the-list.patch b/queue-6.6/net-fix-an-unsafe-loop-on-the-list.patch new file mode 100644 index 00000000000..3b7b26fbc88 --- /dev/null +++ b/queue-6.6/net-fix-an-unsafe-loop-on-the-list.patch @@ -0,0 +1,60 @@ +From 1dae9f1187189bc09ff6d25ca97ead711f7e26f9 Mon Sep 17 00:00:00 2001 +From: Anastasia Kovaleva +Date: Thu, 3 Oct 2024 13:44:31 +0300 +Subject: net: Fix an unsafe loop on the list + +From: Anastasia Kovaleva + +commit 1dae9f1187189bc09ff6d25ca97ead711f7e26f9 upstream. + +The kernel may crash when deleting a genetlink family if there are still +listeners for that family: + +Oops: Kernel access of bad area, sig: 11 [#1] + ... + NIP [c000000000c080bc] netlink_update_socket_mc+0x3c/0xc0 + LR [c000000000c0f764] __netlink_clear_multicast_users+0x74/0xc0 + Call Trace: +__netlink_clear_multicast_users+0x74/0xc0 +genl_unregister_family+0xd4/0x2d0 + +Change the unsafe loop on the list to a safe one, because inside the +loop there is an element removal from this list. + +Fixes: b8273570f802 ("genetlink: fix netns vs. netlink table locking (2)") +Cc: stable@vger.kernel.org +Signed-off-by: Anastasia Kovaleva +Reviewed-by: Dmitry Bogdanov +Reviewed-by: Kuniyuki Iwashima +Link: https://patch.msgid.link/20241003104431.12391-1-a.kovaleva@yadro.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sock.h | 2 ++ + net/netlink/af_netlink.c | 3 ++- + 2 files changed, 4 insertions(+), 1 deletion(-) + +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -902,6 +902,8 @@ static inline void sk_add_bind2_node(str + hlist_for_each_entry(__sk, list, sk_bind_node) + #define sk_for_each_bound_bhash2(__sk, list) \ + hlist_for_each_entry(__sk, list, sk_bind2_node) ++#define sk_for_each_bound_safe(__sk, tmp, list) \ ++ hlist_for_each_entry_safe(__sk, tmp, list, sk_bind_node) + + /** + * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -2143,8 +2143,9 @@ void __netlink_clear_multicast_users(str + { + struct sock *sk; + struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; ++ struct hlist_node *tmp; + +- sk_for_each_bound(sk, &tbl->mc_list) ++ sk_for_each_bound_safe(sk, tmp, &tbl->mc_list) + netlink_update_socket_mc(nlk_sk(sk), group, 0); + } + diff --git a/queue-6.6/net-phy-remove-led-entry-from-leds-list-on-unregister.patch b/queue-6.6/net-phy-remove-led-entry-from-leds-list-on-unregister.patch new file mode 100644 index 00000000000..1a52dd2fefa --- /dev/null +++ b/queue-6.6/net-phy-remove-led-entry-from-leds-list-on-unregister.patch @@ -0,0 +1,58 @@ +From f50b5d74c68e551667e265123659b187a30fe3a5 Mon Sep 17 00:00:00 2001 +From: Christian Marangi +Date: Fri, 4 Oct 2024 20:27:58 +0200 +Subject: net: phy: Remove LED entry from LEDs list on unregister + +From: Christian Marangi + +commit f50b5d74c68e551667e265123659b187a30fe3a5 upstream. + +Commit c938ab4da0eb ("net: phy: Manual remove LEDs to ensure correct +ordering") correctly fixed a problem with using devm_ but missed +removing the LED entry from the LEDs list. + +This cause kernel panic on specific scenario where the port for the PHY +is torn down and up and the kmod for the PHY is removed. + +On setting the port down the first time, the assosiacted LEDs are +correctly unregistered. The associated kmod for the PHY is now removed. +The kmod is now added again and the port is now put up, the associated LED +are registered again. +On putting the port down again for the second time after these step, the +LED list now have 4 elements. With the first 2 already unregistered +previously and the 2 new one registered again. + +This cause a kernel panic as the first 2 element should have been +removed. + +Fix this by correctly removing the element when LED is unregistered. + +Reported-by: Daniel Golle +Tested-by: Daniel Golle +Cc: stable@vger.kernel.org +Fixes: c938ab4da0eb ("net: phy: Manual remove LEDs to ensure correct ordering") +Signed-off-by: Christian Marangi +Reviewed-by: Andrew Lunn +Link: https://patch.msgid.link/20241004182759.14032-1-ansuelsmth@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/phy_device.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/net/phy/phy_device.c ++++ b/drivers/net/phy/phy_device.c +@@ -3082,10 +3082,11 @@ static __maybe_unused int phy_led_hw_is_ + + static void phy_leds_unregister(struct phy_device *phydev) + { +- struct phy_led *phyled; ++ struct phy_led *phyled, *tmp; + +- list_for_each_entry(phyled, &phydev->leds, list) { ++ list_for_each_entry_safe(phyled, tmp, &phydev->leds, list) { + led_classdev_unregister(&phyled->led_cdev); ++ list_del(&phyled->list); + } + } + diff --git a/queue-6.6/nouveau-dmem-fix-vulnerability-in-migrate_to_ram-upon-copy-error.patch b/queue-6.6/nouveau-dmem-fix-vulnerability-in-migrate_to_ram-upon-copy-error.patch new file mode 100644 index 00000000000..2bf20f1e928 --- /dev/null +++ b/queue-6.6/nouveau-dmem-fix-vulnerability-in-migrate_to_ram-upon-copy-error.patch @@ -0,0 +1,48 @@ +From 835745a377a4519decd1a36d6b926e369b3033e2 Mon Sep 17 00:00:00 2001 +From: Yonatan Maman +Date: Tue, 8 Oct 2024 14:59:43 +0300 +Subject: nouveau/dmem: Fix vulnerability in migrate_to_ram upon copy error + +From: Yonatan Maman + +commit 835745a377a4519decd1a36d6b926e369b3033e2 upstream. + +The `nouveau_dmem_copy_one` function ensures that the copy push command is +sent to the device firmware but does not track whether it was executed +successfully. + +In the case of a copy error (e.g., firmware or hardware failure), the +copy push command will be sent via the firmware channel, and +`nouveau_dmem_copy_one` will likely report success, leading to the +`migrate_to_ram` function returning a dirty HIGH_USER page to the user. + +This can result in a security vulnerability, as a HIGH_USER page that may +contain sensitive or corrupted data could be returned to the user. + +To prevent this vulnerability, we allocate a zero page. Thus, in case of +an error, a non-dirty (zero) page will be returned to the user. + +Fixes: 5be73b690875 ("drm/nouveau/dmem: device memory helpers for SVM") +Signed-off-by: Yonatan Maman +Co-developed-by: Gal Shalom +Signed-off-by: Gal Shalom +Reviewed-by: Ben Skeggs +Cc: stable@vger.kernel.org +Signed-off-by: Danilo Krummrich +Link: https://patchwork.freedesktop.org/patch/msgid/20241008115943.990286-3-ymaman@nvidia.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/nouveau/nouveau_dmem.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c ++++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c +@@ -193,7 +193,7 @@ static vm_fault_t nouveau_dmem_migrate_t + if (!spage || !(src & MIGRATE_PFN_MIGRATE)) + goto done; + +- dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address); ++ dpage = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vmf->vma, vmf->address); + if (!dpage) + goto done; + diff --git a/queue-6.6/powercap-intel_rapl_tpmi-fix-bogus-register-reading.patch b/queue-6.6/powercap-intel_rapl_tpmi-fix-bogus-register-reading.patch new file mode 100644 index 00000000000..4623b95a568 --- /dev/null +++ b/queue-6.6/powercap-intel_rapl_tpmi-fix-bogus-register-reading.patch @@ -0,0 +1,34 @@ +From 91e8f835a7eda4ba2c0c4002a3108a0e3b22d34e Mon Sep 17 00:00:00 2001 +From: Zhang Rui +Date: Mon, 30 Sep 2024 16:17:56 +0800 +Subject: powercap: intel_rapl_tpmi: Fix bogus register reading + +From: Zhang Rui + +commit 91e8f835a7eda4ba2c0c4002a3108a0e3b22d34e upstream. + +The TPMI_RAPL_REG_DOMAIN_INFO value needs to be multiplied by 8 to get +the register offset. + +Cc: All applicable +Fixes: 903eb9fb85e3 ("powercap: intel_rapl_tpmi: Fix System Domain probing") +Signed-off-by: Zhang Rui +Link: https://patch.msgid.link/20240930081801.28502-2-rui.zhang@intel.com +[ rjw: Changelog edits ] +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/powercap/intel_rapl_tpmi.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/powercap/intel_rapl_tpmi.c ++++ b/drivers/powercap/intel_rapl_tpmi.c +@@ -192,7 +192,7 @@ static int parse_one_domain(struct tpmi_ + pr_warn(FW_BUG "System domain must support Domain Info register\n"); + return -ENODEV; + } +- tpmi_domain_info = readq(trp->base + offset + TPMI_RAPL_REG_DOMAIN_INFO); ++ tpmi_domain_info = readq(trp->base + offset + TPMI_RAPL_REG_DOMAIN_INFO * 8); + if (!(tpmi_domain_info & TPMI_RAPL_DOMAIN_ROOT)) + return 0; + domain_type = RAPL_DOMAIN_PLATFORM; diff --git a/queue-6.6/scsi-ufs-use-pre-calculated-offsets-in-ufshcd_init_lrb.patch b/queue-6.6/scsi-ufs-use-pre-calculated-offsets-in-ufshcd_init_lrb.patch new file mode 100644 index 00000000000..3b3a9220895 --- /dev/null +++ b/queue-6.6/scsi-ufs-use-pre-calculated-offsets-in-ufshcd_init_lrb.patch @@ -0,0 +1,41 @@ +From d5130c5a093257aa4542aaded8034ef116a7624a Mon Sep 17 00:00:00 2001 +From: Avri Altman +Date: Tue, 10 Sep 2024 07:45:43 +0300 +Subject: scsi: ufs: Use pre-calculated offsets in ufshcd_init_lrb() + +From: Avri Altman + +commit d5130c5a093257aa4542aaded8034ef116a7624a upstream. + +Replace manual offset calculations for response_upiu and prd_table in +ufshcd_init_lrb() with pre-calculated offsets already stored in the +utp_transfer_req_desc structure. The pre-calculated offsets are set +differently in ufshcd_host_memory_configure() based on the +UFSHCD_QUIRK_PRDT_BYTE_GRAN quirk, ensuring correct alignment and +access. + +Fixes: 26f968d7de82 ("scsi: ufs: Introduce UFSHCD_QUIRK_PRDT_BYTE_GRAN quirk") +Cc: stable@vger.kernel.org +Signed-off-by: Avri Altman +Link: https://lore.kernel.org/r/20240910044543.3812642-1-avri.altman@wdc.com +Acked-by: Bart Van Assche +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/ufs/core/ufshcd.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/drivers/ufs/core/ufshcd.c ++++ b/drivers/ufs/core/ufshcd.c +@@ -2804,9 +2804,8 @@ static void ufshcd_init_lrb(struct ufs_h + struct utp_transfer_req_desc *utrdlp = hba->utrdl_base_addr; + dma_addr_t cmd_desc_element_addr = hba->ucdl_dma_addr + + i * ufshcd_get_ucd_size(hba); +- u16 response_offset = offsetof(struct utp_transfer_cmd_desc, +- response_upiu); +- u16 prdt_offset = offsetof(struct utp_transfer_cmd_desc, prd_table); ++ u16 response_offset = le16_to_cpu(utrdlp[i].response_upiu_offset); ++ u16 prdt_offset = le16_to_cpu(utrdlp[i].prd_table_offset); + + lrb->utr_descriptor_ptr = utrdlp + i; + lrb->utrd_dma_addr = hba->utrdl_dma_addr + diff --git a/queue-6.6/scsi-wd33c93-don-t-use-stale-scsi_pointer-value.patch b/queue-6.6/scsi-wd33c93-don-t-use-stale-scsi_pointer-value.patch new file mode 100644 index 00000000000..389dfed39f3 --- /dev/null +++ b/queue-6.6/scsi-wd33c93-don-t-use-stale-scsi_pointer-value.patch @@ -0,0 +1,43 @@ +From 9023ed8d91eb1fcc93e64dc4962f7412b1c4cbec Mon Sep 17 00:00:00 2001 +From: Daniel Palmer +Date: Thu, 3 Oct 2024 13:29:47 +1000 +Subject: scsi: wd33c93: Don't use stale scsi_pointer value + +From: Daniel Palmer + +commit 9023ed8d91eb1fcc93e64dc4962f7412b1c4cbec upstream. + +A regression was introduced with commit dbb2da557a6a ("scsi: wd33c93: +Move the SCSI pointer to private command data") which results in an oops +in wd33c93_intr(). That commit added the scsi_pointer variable and +initialized it from hostdata->connected. However, during selection, +hostdata->connected is not yet valid. Fix this by getting the current +scsi_pointer from hostdata->selecting. + +Cc: Daniel Palmer +Cc: Michael Schmitz +Cc: stable@kernel.org +Fixes: dbb2da557a6a ("scsi: wd33c93: Move the SCSI pointer to private command data") +Signed-off-by: Daniel Palmer +Co-developed-by: Finn Thain +Signed-off-by: Finn Thain +Link: https://lore.kernel.org/r/09e11a0a54e6aa2a88bd214526d305aaf018f523.1727926187.git.fthain@linux-m68k.org +Reviewed-by: Michael Schmitz +Reviewed-by: Bart Van Assche +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/wd33c93.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/scsi/wd33c93.c ++++ b/drivers/scsi/wd33c93.c +@@ -831,7 +831,7 @@ wd33c93_intr(struct Scsi_Host *instance) + /* construct an IDENTIFY message with correct disconnect bit */ + + hostdata->outgoing_msg[0] = IDENTIFY(0, cmd->device->lun); +- if (scsi_pointer->phase) ++ if (WD33C93_scsi_pointer(cmd)->phase) + hostdata->outgoing_msg[0] |= 0x40; + + if (hostdata->sync_stat[cmd->device->id] == SS_FIRST) { diff --git a/queue-6.6/secretmem-disable-memfd_secret-if-arch-cannot-set-direct-map.patch b/queue-6.6/secretmem-disable-memfd_secret-if-arch-cannot-set-direct-map.patch new file mode 100644 index 00000000000..bfb0c8a35a1 --- /dev/null +++ b/queue-6.6/secretmem-disable-memfd_secret-if-arch-cannot-set-direct-map.patch @@ -0,0 +1,75 @@ +From 532b53cebe58f34ce1c0f34d866f5c0e335c53c6 Mon Sep 17 00:00:00 2001 +From: Patrick Roy +Date: Tue, 1 Oct 2024 09:00:41 +0100 +Subject: secretmem: disable memfd_secret() if arch cannot set direct map + +From: Patrick Roy + +commit 532b53cebe58f34ce1c0f34d866f5c0e335c53c6 upstream. + +Return -ENOSYS from memfd_secret() syscall if !can_set_direct_map(). This +is the case for example on some arm64 configurations, where marking 4k +PTEs in the direct map not present can only be done if the direct map is +set up at 4k granularity in the first place (as ARM's break-before-make +semantics do not easily allow breaking apart large/gigantic pages). + +More precisely, on arm64 systems with !can_set_direct_map(), +set_direct_map_invalid_noflush() is a no-op, however it returns success +(0) instead of an error. This means that memfd_secret will seemingly +"work" (e.g. syscall succeeds, you can mmap the fd and fault in pages), +but it does not actually achieve its goal of removing its memory from the +direct map. + +Note that with this patch, memfd_secret() will start erroring on systems +where can_set_direct_map() returns false (arm64 with +CONFIG_RODATA_FULL_DEFAULT_ENABLED=n, CONFIG_DEBUG_PAGEALLOC=n and +CONFIG_KFENCE=n), but that still seems better than the current silent +failure. Since CONFIG_RODATA_FULL_DEFAULT_ENABLED defaults to 'y', most +arm64 systems actually have a working memfd_secret() and aren't be +affected. + +From going through the iterations of the original memfd_secret patch +series, it seems that disabling the syscall in these scenarios was the +intended behavior [1] (preferred over having +set_direct_map_invalid_noflush return an error as that would result in +SIGBUSes at page-fault time), however the check for it got dropped between +v16 [2] and v17 [3], when secretmem moved away from CMA allocations. + +[1]: https://lore.kernel.org/lkml/20201124164930.GK8537@kernel.org/ +[2]: https://lore.kernel.org/lkml/20210121122723.3446-11-rppt@kernel.org/#t +[3]: https://lore.kernel.org/lkml/20201125092208.12544-10-rppt@kernel.org/ + +Link: https://lkml.kernel.org/r/20241001080056.784735-1-roypat@amazon.co.uk +Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas") +Signed-off-by: Patrick Roy +Reviewed-by: Mike Rapoport (Microsoft) +Cc: Alexander Graf +Cc: David Hildenbrand +Cc: James Gowans +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/secretmem.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/mm/secretmem.c ++++ b/mm/secretmem.c +@@ -238,7 +238,7 @@ SYSCALL_DEFINE1(memfd_secret, unsigned i + /* make sure local flags do not confict with global fcntl.h */ + BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC); + +- if (!secretmem_enable) ++ if (!secretmem_enable || !can_set_direct_map()) + return -ENOSYS; + + if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC)) +@@ -280,7 +280,7 @@ static struct file_system_type secretmem + + static int __init secretmem_init(void) + { +- if (!secretmem_enable) ++ if (!secretmem_enable || !can_set_direct_map()) + return 0; + + secretmem_mnt = kern_mount(&secretmem_fs); diff --git a/queue-6.6/selftests-mm-fix-incorrect-buffer-mirror-size-in-hmm2-double_map-test.patch b/queue-6.6/selftests-mm-fix-incorrect-buffer-mirror-size-in-hmm2-double_map-test.patch new file mode 100644 index 00000000000..de16027f319 --- /dev/null +++ b/queue-6.6/selftests-mm-fix-incorrect-buffer-mirror-size-in-hmm2-double_map-test.patch @@ -0,0 +1,64 @@ +From 76503e1fa1a53ef041a120825d5ce81c7fe7bdd7 Mon Sep 17 00:00:00 2001 +From: Donet Tom +Date: Fri, 27 Sep 2024 00:07:52 -0500 +Subject: selftests/mm: fix incorrect buffer->mirror size in hmm2 double_map test +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Donet Tom + +commit 76503e1fa1a53ef041a120825d5ce81c7fe7bdd7 upstream. + +The hmm2 double_map test was failing due to an incorrect buffer->mirror +size. The buffer->mirror size was 6, while buffer->ptr size was 6 * +PAGE_SIZE. The test failed because the kernel's copy_to_user function was +attempting to copy a 6 * PAGE_SIZE buffer to buffer->mirror. Since the +size of buffer->mirror was incorrect, copy_to_user failed. + +This patch corrects the buffer->mirror size to 6 * PAGE_SIZE. + +Test Result without this patch +============================== + # RUN hmm2.hmm2_device_private.double_map ... + # hmm-tests.c:1680:double_map:Expected ret (-14) == 0 (0) + # double_map: Test terminated by assertion + # FAIL hmm2.hmm2_device_private.double_map + not ok 53 hmm2.hmm2_device_private.double_map + +Test Result with this patch +=========================== + # RUN hmm2.hmm2_device_private.double_map ... + # OK hmm2.hmm2_device_private.double_map + ok 53 hmm2.hmm2_device_private.double_map + +Link: https://lkml.kernel.org/r/20240927050752.51066-1-donettom@linux.ibm.com +Fixes: fee9f6d1b8df ("mm/hmm/test: add selftests for HMM") +Signed-off-by: Donet Tom +Reviewed-by: Muhammad Usama Anjum +Cc: Jérôme Glisse +Cc: Kees Cook +Cc: Mark Brown +Cc: Przemek Kitszel +Cc: Ritesh Harjani (IBM) +Cc: Shuah Khan +Cc: Ralph Campbell +Cc: Jason Gunthorpe +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/mm/hmm-tests.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/testing/selftests/mm/hmm-tests.c ++++ b/tools/testing/selftests/mm/hmm-tests.c +@@ -1657,7 +1657,7 @@ TEST_F(hmm2, double_map) + + buffer->fd = -1; + buffer->size = size; +- buffer->mirror = malloc(npages); ++ buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + /* Reserve a range of addresses. */ diff --git a/queue-6.6/selftests-rseq-fix-mm_cid-test-failure.patch b/queue-6.6/selftests-rseq-fix-mm_cid-test-failure.patch new file mode 100644 index 00000000000..5389231b4db --- /dev/null +++ b/queue-6.6/selftests-rseq-fix-mm_cid-test-failure.patch @@ -0,0 +1,243 @@ +From a0cc649353bb726d4aa0db60dce467432197b746 Mon Sep 17 00:00:00 2001 +From: Mathieu Desnoyers +Date: Tue, 8 Oct 2024 21:28:01 -0400 +Subject: selftests/rseq: Fix mm_cid test failure + +From: Mathieu Desnoyers + +commit a0cc649353bb726d4aa0db60dce467432197b746 upstream. + +Adapt the rseq.c/rseq.h code to follow GNU C library changes introduced by: + +glibc commit 2e456ccf0c34 ("Linux: Make __rseq_size useful for feature detection (bug 31965)") + +Without this fix, rseq selftests for mm_cid fail: + +./run_param_test.sh +Default parameters +Running test spinlock +Running compare-twice test spinlock +Running mm_cid test spinlock +Error: cpu id getter unavailable + +Fixes: 18c2355838e7 ("selftests/rseq: Implement rseq mm_cid field support") +Signed-off-by: Mathieu Desnoyers +Cc: Peter Zijlstra +CC: Boqun Feng +CC: "Paul E. McKenney" +Cc: Shuah Khan +CC: Carlos O'Donell +CC: Florian Weimer +CC: linux-kselftest@vger.kernel.org +CC: stable@vger.kernel.org +Signed-off-by: Shuah Khan +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/rseq/rseq.c | 110 ++++++++++++++++++++++++------------ + tools/testing/selftests/rseq/rseq.h | 10 --- + 2 files changed, 77 insertions(+), 43 deletions(-) + +--- a/tools/testing/selftests/rseq/rseq.c ++++ b/tools/testing/selftests/rseq/rseq.c +@@ -60,12 +60,6 @@ unsigned int rseq_size = -1U; + /* Flags used during rseq registration. */ + unsigned int rseq_flags; + +-/* +- * rseq feature size supported by the kernel. 0 if the registration was +- * unsuccessful. +- */ +-unsigned int rseq_feature_size = -1U; +- + static int rseq_ownership; + static int rseq_reg_success; /* At least one rseq registration has succeded. */ + +@@ -111,6 +105,43 @@ int rseq_available(void) + } + } + ++/* The rseq areas need to be at least 32 bytes. */ ++static ++unsigned int get_rseq_min_alloc_size(void) ++{ ++ unsigned int alloc_size = rseq_size; ++ ++ if (alloc_size < ORIG_RSEQ_ALLOC_SIZE) ++ alloc_size = ORIG_RSEQ_ALLOC_SIZE; ++ return alloc_size; ++} ++ ++/* ++ * Return the feature size supported by the kernel. ++ * ++ * Depending on the value returned by getauxval(AT_RSEQ_FEATURE_SIZE): ++ * ++ * 0: Return ORIG_RSEQ_FEATURE_SIZE (20) ++ * > 0: Return the value from getauxval(AT_RSEQ_FEATURE_SIZE). ++ * ++ * It should never return a value below ORIG_RSEQ_FEATURE_SIZE. ++ */ ++static ++unsigned int get_rseq_kernel_feature_size(void) ++{ ++ unsigned long auxv_rseq_feature_size, auxv_rseq_align; ++ ++ auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); ++ assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); ++ ++ auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); ++ assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); ++ if (auxv_rseq_feature_size) ++ return auxv_rseq_feature_size; ++ else ++ return ORIG_RSEQ_FEATURE_SIZE; ++} ++ + int rseq_register_current_thread(void) + { + int rc; +@@ -119,7 +150,7 @@ int rseq_register_current_thread(void) + /* Treat libc's ownership as a successful registration. */ + return 0; + } +- rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG); ++ rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG); + if (rc) { + if (RSEQ_READ_ONCE(rseq_reg_success)) { + /* Incoherent success/failure within process. */ +@@ -140,28 +171,12 @@ int rseq_unregister_current_thread(void) + /* Treat libc's ownership as a successful unregistration. */ + return 0; + } +- rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); ++ rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + if (rc) + return -1; + return 0; + } + +-static +-unsigned int get_rseq_feature_size(void) +-{ +- unsigned long auxv_rseq_feature_size, auxv_rseq_align; +- +- auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); +- assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); +- +- auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); +- assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); +- if (auxv_rseq_feature_size) +- return auxv_rseq_feature_size; +- else +- return ORIG_RSEQ_FEATURE_SIZE; +-} +- + static __attribute__((constructor)) + void rseq_init(void) + { +@@ -178,28 +193,54 @@ void rseq_init(void) + } + if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && + *libc_rseq_size_p != 0) { ++ unsigned int libc_rseq_size; ++ + /* rseq registration owned by glibc */ + rseq_offset = *libc_rseq_offset_p; +- rseq_size = *libc_rseq_size_p; ++ libc_rseq_size = *libc_rseq_size_p; + rseq_flags = *libc_rseq_flags_p; +- rseq_feature_size = get_rseq_feature_size(); +- if (rseq_feature_size > rseq_size) +- rseq_feature_size = rseq_size; ++ ++ /* ++ * Previous versions of glibc expose the value ++ * 32 even though the kernel only supported 20 ++ * bytes initially. Therefore treat 32 as a ++ * special-case. glibc 2.40 exposes a 20 bytes ++ * __rseq_size without using getauxval(3) to ++ * query the supported size, while still allocating a 32 ++ * bytes area. Also treat 20 as a special-case. ++ * ++ * Special-cases are handled by using the following ++ * value as active feature set size: ++ * ++ * rseq_size = min(32, get_rseq_kernel_feature_size()) ++ */ ++ switch (libc_rseq_size) { ++ case ORIG_RSEQ_FEATURE_SIZE: ++ fallthrough; ++ case ORIG_RSEQ_ALLOC_SIZE: ++ { ++ unsigned int rseq_kernel_feature_size = get_rseq_kernel_feature_size(); ++ ++ if (rseq_kernel_feature_size < ORIG_RSEQ_ALLOC_SIZE) ++ rseq_size = rseq_kernel_feature_size; ++ else ++ rseq_size = ORIG_RSEQ_ALLOC_SIZE; ++ break; ++ } ++ default: ++ /* Otherwise just use the __rseq_size from libc as rseq_size. */ ++ rseq_size = libc_rseq_size; ++ break; ++ } + return; + } + rseq_ownership = 1; + if (!rseq_available()) { + rseq_size = 0; +- rseq_feature_size = 0; + return; + } + rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer(); + rseq_flags = 0; +- rseq_feature_size = get_rseq_feature_size(); +- if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE) +- rseq_size = ORIG_RSEQ_ALLOC_SIZE; +- else +- rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE; + } + + static __attribute__((destructor)) +@@ -209,7 +250,6 @@ void rseq_exit(void) + return; + rseq_offset = 0; + rseq_size = -1U; +- rseq_feature_size = -1U; + rseq_ownership = 0; + } + +--- a/tools/testing/selftests/rseq/rseq.h ++++ b/tools/testing/selftests/rseq/rseq.h +@@ -68,12 +68,6 @@ extern unsigned int rseq_size; + /* Flags used during rseq registration. */ + extern unsigned int rseq_flags; + +-/* +- * rseq feature size supported by the kernel. 0 if the registration was +- * unsuccessful. +- */ +-extern unsigned int rseq_feature_size; +- + enum rseq_mo { + RSEQ_MO_RELAXED = 0, + RSEQ_MO_CONSUME = 1, /* Unused */ +@@ -193,7 +187,7 @@ static inline uint32_t rseq_current_cpu( + + static inline bool rseq_node_id_available(void) + { +- return (int) rseq_feature_size >= rseq_offsetofend(struct rseq_abi, node_id); ++ return (int) rseq_size >= rseq_offsetofend(struct rseq_abi, node_id); + } + + /* +@@ -207,7 +201,7 @@ static inline uint32_t rseq_current_node + + static inline bool rseq_mm_cid_available(void) + { +- return (int) rseq_feature_size >= rseq_offsetofend(struct rseq_abi, mm_cid); ++ return (int) rseq_size >= rseq_offsetofend(struct rseq_abi, mm_cid); + } + + static inline uint32_t rseq_current_mm_cid(void) diff --git a/queue-6.6/series b/queue-6.6/series index 3a49d73b30b..93ec498c61f 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -185,3 +185,26 @@ usb-xhci-fix-problem-with-xhci-resume-from-suspend.patch usb-storage-ignore-bogus-device-raised-by-jieli-br21-usb-sound-chip.patch usb-gadget-core-force-synchronous-registration.patch hid-intel-ish-hid-fix-uninitialized-variable-rv-in-ish_fw_xfer_direct_dma.patch +drm-v3d-stop-the-active-perfmon-before-being-destroyed.patch +drm-vc4-stop-the-active-perfmon-before-being-destroyed.patch +drm-i915-hdcp-fix-connector-refcounting.patch +bluetooth-hci_conn-fix-uaf-in-hci_enhanced_setup_sync.patch +scsi-wd33c93-don-t-use-stale-scsi_pointer-value.patch +scsi-ufs-use-pre-calculated-offsets-in-ufshcd_init_lrb.patch +mptcp-fallback-when-mptcp-opts-are-dropped-after-1st-data.patch +ata-libata-avoid-superfluous-disk-spin-down-spin-up-during-hibernation.patch +net-explicitly-clear-the-sk-pointer-when-pf-create-fails.patch +net-fix-an-unsafe-loop-on-the-list.patch +net-dsa-lan9303-ensure-chip-reset-and-wait-for-ready-status.patch +net-phy-remove-led-entry-from-leds-list-on-unregister.patch +mptcp-handle-consistently-dss-corruption.patch +mptcp-pm-do-not-remove-closing-subflows.patch +device-dax-correct-pgoff-align-in-dax_set_mapping.patch +nouveau-dmem-fix-vulnerability-in-migrate_to_ram-upon-copy-error.patch +powercap-intel_rapl_tpmi-fix-bogus-register-reading.patch +selftests-mm-fix-incorrect-buffer-mirror-size-in-hmm2-double_map-test.patch +selftests-rseq-fix-mm_cid-test-failure.patch +btrfs-split-remaining-space-to-discard-in-chunks.patch +kthread-unpark-only-parked-kthread.patch +fs-proc-kcore.c-allow-translation-of-physical-memory-addresses.patch +secretmem-disable-memfd_secret-if-arch-cannot-set-direct-map.patch