]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.6-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 14 Oct 2024 12:32:54 +0000 (14:32 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 14 Oct 2024 12:32:54 +0000 (14:32 +0200)
added patches:
ata-libata-avoid-superfluous-disk-spin-down-spin-up-during-hibernation.patch
bluetooth-hci_conn-fix-uaf-in-hci_enhanced_setup_sync.patch
btrfs-split-remaining-space-to-discard-in-chunks.patch
device-dax-correct-pgoff-align-in-dax_set_mapping.patch
drm-i915-hdcp-fix-connector-refcounting.patch
drm-v3d-stop-the-active-perfmon-before-being-destroyed.patch
drm-vc4-stop-the-active-perfmon-before-being-destroyed.patch
fs-proc-kcore.c-allow-translation-of-physical-memory-addresses.patch
kthread-unpark-only-parked-kthread.patch
mptcp-fallback-when-mptcp-opts-are-dropped-after-1st-data.patch
mptcp-handle-consistently-dss-corruption.patch
mptcp-pm-do-not-remove-closing-subflows.patch
net-dsa-lan9303-ensure-chip-reset-and-wait-for-ready-status.patch
net-explicitly-clear-the-sk-pointer-when-pf-create-fails.patch
net-fix-an-unsafe-loop-on-the-list.patch
net-phy-remove-led-entry-from-leds-list-on-unregister.patch
nouveau-dmem-fix-vulnerability-in-migrate_to_ram-upon-copy-error.patch
powercap-intel_rapl_tpmi-fix-bogus-register-reading.patch
scsi-ufs-use-pre-calculated-offsets-in-ufshcd_init_lrb.patch
scsi-wd33c93-don-t-use-stale-scsi_pointer-value.patch
secretmem-disable-memfd_secret-if-arch-cannot-set-direct-map.patch
selftests-mm-fix-incorrect-buffer-mirror-size-in-hmm2-double_map-test.patch
selftests-rseq-fix-mm_cid-test-failure.patch

24 files changed:
queue-6.6/ata-libata-avoid-superfluous-disk-spin-down-spin-up-during-hibernation.patch [new file with mode: 0644]
queue-6.6/bluetooth-hci_conn-fix-uaf-in-hci_enhanced_setup_sync.patch [new file with mode: 0644]
queue-6.6/btrfs-split-remaining-space-to-discard-in-chunks.patch [new file with mode: 0644]
queue-6.6/device-dax-correct-pgoff-align-in-dax_set_mapping.patch [new file with mode: 0644]
queue-6.6/drm-i915-hdcp-fix-connector-refcounting.patch [new file with mode: 0644]
queue-6.6/drm-v3d-stop-the-active-perfmon-before-being-destroyed.patch [new file with mode: 0644]
queue-6.6/drm-vc4-stop-the-active-perfmon-before-being-destroyed.patch [new file with mode: 0644]
queue-6.6/fs-proc-kcore.c-allow-translation-of-physical-memory-addresses.patch [new file with mode: 0644]
queue-6.6/kthread-unpark-only-parked-kthread.patch [new file with mode: 0644]
queue-6.6/mptcp-fallback-when-mptcp-opts-are-dropped-after-1st-data.patch [new file with mode: 0644]
queue-6.6/mptcp-handle-consistently-dss-corruption.patch [new file with mode: 0644]
queue-6.6/mptcp-pm-do-not-remove-closing-subflows.patch [new file with mode: 0644]
queue-6.6/net-dsa-lan9303-ensure-chip-reset-and-wait-for-ready-status.patch [new file with mode: 0644]
queue-6.6/net-explicitly-clear-the-sk-pointer-when-pf-create-fails.patch [new file with mode: 0644]
queue-6.6/net-fix-an-unsafe-loop-on-the-list.patch [new file with mode: 0644]
queue-6.6/net-phy-remove-led-entry-from-leds-list-on-unregister.patch [new file with mode: 0644]
queue-6.6/nouveau-dmem-fix-vulnerability-in-migrate_to_ram-upon-copy-error.patch [new file with mode: 0644]
queue-6.6/powercap-intel_rapl_tpmi-fix-bogus-register-reading.patch [new file with mode: 0644]
queue-6.6/scsi-ufs-use-pre-calculated-offsets-in-ufshcd_init_lrb.patch [new file with mode: 0644]
queue-6.6/scsi-wd33c93-don-t-use-stale-scsi_pointer-value.patch [new file with mode: 0644]
queue-6.6/secretmem-disable-memfd_secret-if-arch-cannot-set-direct-map.patch [new file with mode: 0644]
queue-6.6/selftests-mm-fix-incorrect-buffer-mirror-size-in-hmm2-double_map-test.patch [new file with mode: 0644]
queue-6.6/selftests-rseq-fix-mm_cid-test-failure.patch [new file with mode: 0644]
queue-6.6/series

diff --git a/queue-6.6/ata-libata-avoid-superfluous-disk-spin-down-spin-up-during-hibernation.patch b/queue-6.6/ata-libata-avoid-superfluous-disk-spin-down-spin-up-during-hibernation.patch
new file mode 100644 (file)
index 0000000..2d51210
--- /dev/null
@@ -0,0 +1,76 @@
+From a38719e3157118428e34fbd45b0d0707a5877784 Mon Sep 17 00:00:00 2001
+From: Niklas Cassel <cassel@kernel.org>
+Date: Tue, 8 Oct 2024 15:58:44 +0200
+Subject: ata: libata: avoid superfluous disk spin down + spin up during hibernation
+
+From: Niklas Cassel <cassel@kernel.org>
+
+commit a38719e3157118428e34fbd45b0d0707a5877784 upstream.
+
+A user reported that commit aa3998dbeb3a ("ata: libata-scsi: Disable scsi
+device manage_system_start_stop") introduced a spin down + immediate spin
+up of the disk both when entering and when resuming from hibernation.
+This behavior was not there before, and causes an increased latency both
+when entering and when resuming from hibernation.
+
+Hibernation is done by three consecutive PM events, in the following order:
+1) PM_EVENT_FREEZE
+2) PM_EVENT_THAW
+3) PM_EVENT_HIBERNATE
+
+Commit aa3998dbeb3a ("ata: libata-scsi: Disable scsi device
+manage_system_start_stop") modified ata_eh_handle_port_suspend() to call
+ata_dev_power_set_standby() (which spins down the disk), for both event
+PM_EVENT_FREEZE and event PM_EVENT_HIBERNATE.
+
+Documentation/driver-api/pm/devices.rst, section "Entering Hibernation",
+explicitly mentions that PM_EVENT_FREEZE does not have to be put the device
+in a low-power state, and actually recommends not doing so. Thus, let's not
+spin down the disk on PM_EVENT_FREEZE. (The disk will instead be spun down
+during the subsequent PM_EVENT_HIBERNATE event.)
+
+This way, PM_EVENT_FREEZE will behave as it did before commit aa3998dbeb3a
+("ata: libata-scsi: Disable scsi device manage_system_start_stop"), while
+PM_EVENT_HIBERNATE will continue to spin down the disk.
+
+This will avoid the superfluous spin down + spin up when entering and
+resuming from hibernation, while still making sure that the disk is spun
+down before actually entering hibernation.
+
+Cc: stable@vger.kernel.org # v6.6+
+Fixes: aa3998dbeb3a ("ata: libata-scsi: Disable scsi device manage_system_start_stop")
+Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
+Link: https://lore.kernel.org/r/20241008135843.1266244-2-cassel@kernel.org
+Signed-off-by: Niklas Cassel <cassel@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/libata-eh.c |   18 ++++++++++++++----
+ 1 file changed, 14 insertions(+), 4 deletions(-)
+
+--- a/drivers/ata/libata-eh.c
++++ b/drivers/ata/libata-eh.c
+@@ -4049,10 +4049,20 @@ static void ata_eh_handle_port_suspend(s
+       WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED);
+-      /* Set all devices attached to the port in standby mode */
+-      ata_for_each_link(link, ap, HOST_FIRST) {
+-              ata_for_each_dev(dev, link, ENABLED)
+-                      ata_dev_power_set_standby(dev);
++      /*
++       * We will reach this point for all of the PM events:
++       * PM_EVENT_SUSPEND (if runtime pm, PM_EVENT_AUTO will also be set)
++       * PM_EVENT_FREEZE, and PM_EVENT_HIBERNATE.
++       *
++       * We do not want to perform disk spin down for PM_EVENT_FREEZE.
++       * (Spin down will be performed by the subsequent PM_EVENT_HIBERNATE.)
++       */
++      if (!(ap->pm_mesg.event & PM_EVENT_FREEZE)) {
++              /* Set all devices attached to the port in standby mode */
++              ata_for_each_link(link, ap, HOST_FIRST) {
++                      ata_for_each_dev(dev, link, ENABLED)
++                              ata_dev_power_set_standby(dev);
++              }
+       }
+       /*
diff --git a/queue-6.6/bluetooth-hci_conn-fix-uaf-in-hci_enhanced_setup_sync.patch b/queue-6.6/bluetooth-hci_conn-fix-uaf-in-hci_enhanced_setup_sync.patch
new file mode 100644 (file)
index 0000000..e6274bd
--- /dev/null
@@ -0,0 +1,97 @@
+From 18fd04ad856df07733f5bb07e7f7168e7443d393 Mon Sep 17 00:00:00 2001
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Date: Wed, 2 Oct 2024 11:17:26 -0400
+Subject: Bluetooth: hci_conn: Fix UAF in hci_enhanced_setup_sync
+
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+
+commit 18fd04ad856df07733f5bb07e7f7168e7443d393 upstream.
+
+This checks if the ACL connection remains valid as it could be destroyed
+while hci_enhanced_setup_sync is pending on cmd_sync leading to the
+following trace:
+
+BUG: KASAN: slab-use-after-free in hci_enhanced_setup_sync+0x91b/0xa60
+Read of size 1 at addr ffff888002328ffd by task kworker/u5:2/37
+
+CPU: 0 UID: 0 PID: 37 Comm: kworker/u5:2 Not tainted 6.11.0-rc6-01300-g810be445d8d6 #7099
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-2.fc40 04/01/2014
+Workqueue: hci0 hci_cmd_sync_work
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x5d/0x80
+ ? hci_enhanced_setup_sync+0x91b/0xa60
+ print_report+0x152/0x4c0
+ ? hci_enhanced_setup_sync+0x91b/0xa60
+ ? __virt_addr_valid+0x1fa/0x420
+ ? hci_enhanced_setup_sync+0x91b/0xa60
+ kasan_report+0xda/0x1b0
+ ? hci_enhanced_setup_sync+0x91b/0xa60
+ hci_enhanced_setup_sync+0x91b/0xa60
+ ? __pfx_hci_enhanced_setup_sync+0x10/0x10
+ ? __pfx___mutex_lock+0x10/0x10
+ hci_cmd_sync_work+0x1c2/0x330
+ process_one_work+0x7d9/0x1360
+ ? __pfx_lock_acquire+0x10/0x10
+ ? __pfx_process_one_work+0x10/0x10
+ ? assign_work+0x167/0x240
+ worker_thread+0x5b7/0xf60
+ ? __kthread_parkme+0xac/0x1c0
+ ? __pfx_worker_thread+0x10/0x10
+ ? __pfx_worker_thread+0x10/0x10
+ kthread+0x293/0x360
+ ? __pfx_kthread+0x10/0x10
+ ret_from_fork+0x2f/0x70
+ ? __pfx_kthread+0x10/0x10
+ ret_from_fork_asm+0x1a/0x30
+ </TASK>
+
+Allocated by task 34:
+ kasan_save_stack+0x30/0x50
+ kasan_save_track+0x14/0x30
+ __kasan_kmalloc+0x8f/0xa0
+ __hci_conn_add+0x187/0x17d0
+ hci_connect_sco+0x2e1/0xb90
+ sco_sock_connect+0x2a2/0xb80
+ __sys_connect+0x227/0x2a0
+ __x64_sys_connect+0x6d/0xb0
+ do_syscall_64+0x71/0x140
+ entry_SYSCALL_64_after_hwframe+0x76/0x7e
+
+Freed by task 37:
+ kasan_save_stack+0x30/0x50
+ kasan_save_track+0x14/0x30
+ kasan_save_free_info+0x3b/0x60
+ __kasan_slab_free+0x101/0x160
+ kfree+0xd0/0x250
+ device_release+0x9a/0x210
+ kobject_put+0x151/0x280
+ hci_conn_del+0x448/0xbf0
+ hci_abort_conn_sync+0x46f/0x980
+ hci_cmd_sync_work+0x1c2/0x330
+ process_one_work+0x7d9/0x1360
+ worker_thread+0x5b7/0xf60
+ kthread+0x293/0x360
+ ret_from_fork+0x2f/0x70
+ ret_from_fork_asm+0x1a/0x30
+
+Cc: stable@vger.kernel.org
+Fixes: e07a06b4eb41 ("Bluetooth: Convert SCO configure_datapath to hci_sync")
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bluetooth/hci_conn.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/bluetooth/hci_conn.c
++++ b/net/bluetooth/hci_conn.c
+@@ -290,6 +290,9 @@ static int hci_enhanced_setup_sync(struc
+       kfree(conn_handle);
++      if (!hci_conn_valid(hdev, conn))
++              return -ECANCELED;
++
+       bt_dev_dbg(hdev, "hcon %p", conn);
+       configure_datapath_sync(hdev, &conn->codec);
diff --git a/queue-6.6/btrfs-split-remaining-space-to-discard-in-chunks.patch b/queue-6.6/btrfs-split-remaining-space-to-discard-in-chunks.patch
new file mode 100644 (file)
index 0000000..79c0f2c
--- /dev/null
@@ -0,0 +1,78 @@
+From a99fcb0158978ed332009449b484e5f3ca2d7df4 Mon Sep 17 00:00:00 2001
+From: Luca Stefani <luca.stefani.ge1@gmail.com>
+Date: Tue, 17 Sep 2024 22:33:04 +0200
+Subject: btrfs: split remaining space to discard in chunks
+
+From: Luca Stefani <luca.stefani.ge1@gmail.com>
+
+commit a99fcb0158978ed332009449b484e5f3ca2d7df4 upstream.
+
+Per Qu Wenruo in case we have a very large disk, e.g. 8TiB device,
+mostly empty although we will do the split according to our super block
+locations, the last super block ends at 256G, we can submit a huge
+discard for the range [256G, 8T), causing a large delay.
+
+Split the space left to discard based on BTRFS_MAX_DISCARD_CHUNK_SIZE in
+preparation of introduction of cancellation points to trim. The value
+of the chunk size is arbitrary, it can be higher or derived from actual
+device capabilities but we can't easily read that using
+bio_discard_limit().
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=219180
+Link: https://bugzilla.suse.com/show_bug.cgi?id=1229737
+CC: stable@vger.kernel.org # 5.15+
+Signed-off-by: Luca Stefani <luca.stefani.ge1@gmail.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent-tree.c |   19 +++++++++++++++----
+ fs/btrfs/volumes.h     |    6 ++++++
+ 2 files changed, 21 insertions(+), 4 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -1303,13 +1303,24 @@ static int btrfs_issue_discard(struct bl
+               bytes_left = end - start;
+       }
+-      if (bytes_left) {
++      while (bytes_left) {
++              u64 bytes_to_discard = min(BTRFS_MAX_DISCARD_CHUNK_SIZE, bytes_left);
++
+               ret = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
+-                                         bytes_left >> SECTOR_SHIFT,
++                                         bytes_to_discard >> SECTOR_SHIFT,
+                                          GFP_NOFS);
+-              if (!ret)
+-                      *discarded_bytes += bytes_left;
++
++              if (ret) {
++                      if (ret != -EOPNOTSUPP)
++                              break;
++                      continue;
++              }
++
++              start += bytes_to_discard;
++              bytes_left -= bytes_to_discard;
++              *discarded_bytes += bytes_to_discard;
+       }
++
+       return ret;
+ }
+--- a/fs/btrfs/volumes.h
++++ b/fs/btrfs/volumes.h
+@@ -15,6 +15,12 @@
+ #define BTRFS_MAX_DATA_CHUNK_SIZE     (10ULL * SZ_1G)
++/*
++ * Arbitratry maximum size of one discard request to limit potentially long time
++ * spent in blkdev_issue_discard().
++ */
++#define BTRFS_MAX_DISCARD_CHUNK_SIZE  (SZ_1G)
++
+ extern struct mutex uuid_mutex;
+ #define BTRFS_STRIPE_LEN              SZ_64K
diff --git a/queue-6.6/device-dax-correct-pgoff-align-in-dax_set_mapping.patch b/queue-6.6/device-dax-correct-pgoff-align-in-dax_set_mapping.patch
new file mode 100644 (file)
index 0000000..e22d09b
--- /dev/null
@@ -0,0 +1,112 @@
+From 7fcbd9785d4c17ea533c42f20a9083a83f301fa6 Mon Sep 17 00:00:00 2001
+From: "Kun(llfl)" <llfl@linux.alibaba.com>
+Date: Fri, 27 Sep 2024 15:45:09 +0800
+Subject: device-dax: correct pgoff align in dax_set_mapping()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Kun(llfl) <llfl@linux.alibaba.com>
+
+commit 7fcbd9785d4c17ea533c42f20a9083a83f301fa6 upstream.
+
+pgoff should be aligned using ALIGN_DOWN() instead of ALIGN().  Otherwise,
+vmf->address not aligned to fault_size will be aligned to the next
+alignment, that can result in memory failure getting the wrong address.
+
+It's a subtle situation that only can be observed in
+page_mapped_in_vma() after the page is page fault handled by
+dev_dax_huge_fault.  Generally, there is little chance to perform
+page_mapped_in_vma in dev-dax's page unless in specific error injection
+to the dax device to trigger an MCE - memory-failure.  In that case,
+page_mapped_in_vma() will be triggered to determine which task is
+accessing the failure address and kill that task in the end.
+
+
+We used self-developed dax device (which is 2M aligned mapping) , to
+perform error injection to random address.  It turned out that error
+injected to non-2M-aligned address was causing endless MCE until panic.
+Because page_mapped_in_vma() kept resulting wrong address and the task
+accessing the failure address was never killed properly:
+
+
+[ 3783.719419] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+[ 3784.049006] mce: Uncorrected hardware memory error in user-access at
+200c9742380
+[ 3784.049190] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+[ 3784.448042] mce: Uncorrected hardware memory error in user-access at
+200c9742380
+[ 3784.448186] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+[ 3784.792026] mce: Uncorrected hardware memory error in user-access at
+200c9742380
+[ 3784.792179] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+[ 3785.162502] mce: Uncorrected hardware memory error in user-access at
+200c9742380
+[ 3785.162633] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+[ 3785.461116] mce: Uncorrected hardware memory error in user-access at
+200c9742380
+[ 3785.461247] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+[ 3785.764730] mce: Uncorrected hardware memory error in user-access at
+200c9742380
+[ 3785.764859] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+[ 3786.042128] mce: Uncorrected hardware memory error in user-access at
+200c9742380
+[ 3786.042259] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+[ 3786.464293] mce: Uncorrected hardware memory error in user-access at
+200c9742380
+[ 3786.464423] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+[ 3786.818090] mce: Uncorrected hardware memory error in user-access at
+200c9742380
+[ 3786.818217] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+[ 3787.085297] mce: Uncorrected hardware memory error in user-access at
+200c9742380
+[ 3787.085424] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+
+It took us several weeks to pinpoint this problem,  but we eventually
+used bpftrace to trace the page fault and mce address and successfully
+identified the issue.
+
+
+Joao added:
+
+; Likely we never reproduce in production because we always pin
+: device-dax regions in the region align they provide (Qemu does
+: similarly with prealloc in hugetlb/file backed memory).  I think this
+: bug requires that we touch *unpinned* device-dax regions unaligned to
+: the device-dax selected alignment (page size i.e.  4K/2M/1G)
+
+Link: https://lkml.kernel.org/r/23c02a03e8d666fef11bbe13e85c69c8b4ca0624.1727421694.git.llfl@linux.alibaba.com
+Fixes: b9b5777f09be ("device-dax: use ALIGN() for determining pgoff")
+Signed-off-by: Kun(llfl) <llfl@linux.alibaba.com>
+Tested-by: JianXiong Zhao <zhaojianxiong.zjx@alibaba-inc.com>
+Reviewed-by: Joao Martins <joao.m.martins@oracle.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/dax/device.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/dax/device.c
++++ b/drivers/dax/device.c
+@@ -86,7 +86,7 @@ static void dax_set_mapping(struct vm_fa
+               nr_pages = 1;
+       pgoff = linear_page_index(vmf->vma,
+-                      ALIGN(vmf->address, fault_size));
++                      ALIGN_DOWN(vmf->address, fault_size));
+       for (i = 0; i < nr_pages; i++) {
+               struct page *page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
diff --git a/queue-6.6/drm-i915-hdcp-fix-connector-refcounting.patch b/queue-6.6/drm-i915-hdcp-fix-connector-refcounting.patch
new file mode 100644 (file)
index 0000000..6292444
--- /dev/null
@@ -0,0 +1,68 @@
+From 4cc2718f621a6a57a02581125bb6d914ce74d23b Mon Sep 17 00:00:00 2001
+From: Jani Nikula <jani.nikula@intel.com>
+Date: Tue, 24 Sep 2024 18:30:22 +0300
+Subject: drm/i915/hdcp: fix connector refcounting
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jani Nikula <jani.nikula@intel.com>
+
+commit 4cc2718f621a6a57a02581125bb6d914ce74d23b upstream.
+
+We acquire a connector reference before scheduling an HDCP prop work,
+and expect the work function to release the reference.
+
+However, if the work was already queued, it won't be queued multiple
+times, and the reference is not dropped.
+
+Release the reference immediately if the work was already queued.
+
+Fixes: a6597faa2d59 ("drm/i915: Protect workers against disappearing connectors")
+Cc: Sean Paul <seanpaul@chromium.org>
+Cc: Suraj Kandpal <suraj.kandpal@intel.com>
+Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Cc: stable@vger.kernel.org # v5.10+
+Reviewed-by: Suraj Kandpal <suraj.kandpal@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240924153022.2255299-1-jani.nikula@intel.com
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+(cherry picked from commit abc0742c79bdb3b164eacab24aea0916d2ec1cb5)
+Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/display/intel_hdcp.c |   10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/i915/display/intel_hdcp.c
++++ b/drivers/gpu/drm/i915/display/intel_hdcp.c
+@@ -1005,7 +1005,8 @@ static void intel_hdcp_update_value(stru
+       hdcp->value = value;
+       if (update_property) {
+               drm_connector_get(&connector->base);
+-              queue_work(i915->unordered_wq, &hdcp->prop_work);
++              if (!queue_work(i915->unordered_wq, &hdcp->prop_work))
++                      drm_connector_put(&connector->base);
+       }
+ }
+@@ -2480,7 +2481,8 @@ void intel_hdcp_update_pipe(struct intel
+               mutex_lock(&hdcp->mutex);
+               hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED;
+               drm_connector_get(&connector->base);
+-              queue_work(i915->unordered_wq, &hdcp->prop_work);
++              if (!queue_work(i915->unordered_wq, &hdcp->prop_work))
++                      drm_connector_put(&connector->base);
+               mutex_unlock(&hdcp->mutex);
+       }
+@@ -2497,7 +2499,9 @@ void intel_hdcp_update_pipe(struct intel
+                */
+               if (!desired_and_not_enabled && !content_protection_type_changed) {
+                       drm_connector_get(&connector->base);
+-                      queue_work(i915->unordered_wq, &hdcp->prop_work);
++                      if (!queue_work(i915->unordered_wq, &hdcp->prop_work))
++                              drm_connector_put(&connector->base);
++
+               }
+       }
diff --git a/queue-6.6/drm-v3d-stop-the-active-perfmon-before-being-destroyed.patch b/queue-6.6/drm-v3d-stop-the-active-perfmon-before-being-destroyed.patch
new file mode 100644 (file)
index 0000000..f4b8639
--- /dev/null
@@ -0,0 +1,113 @@
+From 7d1fd3638ee3a9f9bca4785fffb638ca19120718 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
+Date: Fri, 4 Oct 2024 10:02:29 -0300
+Subject: drm/v3d: Stop the active perfmon before being destroyed
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Maíra Canal <mcanal@igalia.com>
+
+commit 7d1fd3638ee3a9f9bca4785fffb638ca19120718 upstream.
+
+When running `kmscube` with one or more performance monitors enabled
+via `GALLIUM_HUD`, the following kernel panic can occur:
+
+[   55.008324] Unable to handle kernel paging request at virtual address 00000000052004a4
+[   55.008368] Mem abort info:
+[   55.008377]   ESR = 0x0000000096000005
+[   55.008387]   EC = 0x25: DABT (current EL), IL = 32 bits
+[   55.008402]   SET = 0, FnV = 0
+[   55.008412]   EA = 0, S1PTW = 0
+[   55.008421]   FSC = 0x05: level 1 translation fault
+[   55.008434] Data abort info:
+[   55.008442]   ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000
+[   55.008455]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
+[   55.008467]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
+[   55.008481] user pgtable: 4k pages, 39-bit VAs, pgdp=00000001046c6000
+[   55.008497] [00000000052004a4] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000
+[   55.008525] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP
+[   55.008542] Modules linked in: rfcomm [...] vc4 v3d snd_soc_hdmi_codec drm_display_helper
+gpu_sched drm_shmem_helper cec drm_dma_helper drm_kms_helper i2c_brcmstb
+drm drm_panel_orientation_quirks snd_soc_core snd_compress snd_pcm_dmaengine snd_pcm snd_timer snd backlight
+[   55.008799] CPU: 2 PID: 166 Comm: v3d_bin Tainted: G         C         6.6.47+rpt-rpi-v8 #1  Debian 1:6.6.47-1+rpt1
+[   55.008824] Hardware name: Raspberry Pi 4 Model B Rev 1.5 (DT)
+[   55.008838] pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+[   55.008855] pc : __mutex_lock.constprop.0+0x90/0x608
+[   55.008879] lr : __mutex_lock.constprop.0+0x58/0x608
+[   55.008895] sp : ffffffc080673cf0
+[   55.008904] x29: ffffffc080673cf0 x28: 0000000000000000 x27: ffffff8106188a28
+[   55.008926] x26: ffffff8101e78040 x25: ffffff8101baa6c0 x24: ffffffd9d989f148
+[   55.008947] x23: ffffffda1c2a4008 x22: 0000000000000002 x21: ffffffc080673d38
+[   55.008968] x20: ffffff8101238000 x19: ffffff8104f83188 x18: 0000000000000000
+[   55.008988] x17: 0000000000000000 x16: ffffffda1bd04d18 x15: 00000055bb08bc90
+[   55.009715] x14: 0000000000000000 x13: 0000000000000000 x12: ffffffda1bd4cbb0
+[   55.010433] x11: 00000000fa83b2da x10: 0000000000001a40 x9 : ffffffda1bd04d04
+[   55.011162] x8 : ffffff8102097b80 x7 : 0000000000000000 x6 : 00000000030a5857
+[   55.011880] x5 : 00ffffffffffffff x4 : 0300000005200470 x3 : 0300000005200470
+[   55.012598] x2 : ffffff8101238000 x1 : 0000000000000021 x0 : 0300000005200470
+[   55.013292] Call trace:
+[   55.013959]  __mutex_lock.constprop.0+0x90/0x608
+[   55.014646]  __mutex_lock_slowpath+0x1c/0x30
+[   55.015317]  mutex_lock+0x50/0x68
+[   55.015961]  v3d_perfmon_stop+0x40/0xe0 [v3d]
+[   55.016627]  v3d_bin_job_run+0x10c/0x2d8 [v3d]
+[   55.017282]  drm_sched_main+0x178/0x3f8 [gpu_sched]
+[   55.017921]  kthread+0x11c/0x128
+[   55.018554]  ret_from_fork+0x10/0x20
+[   55.019168] Code: f9400260 f1001c1f 54001ea9 927df000 (b9403401)
+[   55.019776] ---[ end trace 0000000000000000 ]---
+[   55.020411] note: v3d_bin[166] exited with preempt_count 1
+
+This issue arises because, upon closing the file descriptor (which happens
+when we interrupt `kmscube`), the active performance monitor is not
+stopped. Although all perfmons are destroyed in `v3d_perfmon_close_file()`,
+the active performance monitor's pointer (`v3d->active_perfmon`) is still
+retained.
+
+If `kmscube` is run again, the driver will attempt to stop the active
+performance monitor using the stale pointer in `v3d->active_perfmon`.
+However, this pointer is no longer valid because the previous process has
+already terminated, and all performance monitors associated with it have
+been destroyed and freed.
+
+To fix this, when the active performance monitor belongs to a given
+process, explicitly stop it before destroying and freeing it.
+
+Cc: stable@vger.kernel.org # v5.15+
+Closes: https://github.com/raspberrypi/linux/issues/6389
+Fixes: 26a4dc29b74a ("drm/v3d: Expose performance counters to userspace")
+Signed-off-by: Maíra Canal <mcanal@igalia.com>
+Reviewed-by: Juan A. Suarez <jasuarez@igalia.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241004130625.918580-2-mcanal@igalia.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/v3d/v3d_perfmon.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/v3d/v3d_perfmon.c
++++ b/drivers/gpu/drm/v3d/v3d_perfmon.c
+@@ -103,6 +103,11 @@ void v3d_perfmon_open_file(struct v3d_fi
+ static int v3d_perfmon_idr_del(int id, void *elem, void *data)
+ {
+       struct v3d_perfmon *perfmon = elem;
++      struct v3d_dev *v3d = (struct v3d_dev *)data;
++
++      /* If the active perfmon is being destroyed, stop it first */
++      if (perfmon == v3d->active_perfmon)
++              v3d_perfmon_stop(v3d, perfmon, false);
+       v3d_perfmon_put(perfmon);
+@@ -111,8 +116,10 @@ static int v3d_perfmon_idr_del(int id, v
+ void v3d_perfmon_close_file(struct v3d_file_priv *v3d_priv)
+ {
++      struct v3d_dev *v3d = v3d_priv->v3d;
++
+       mutex_lock(&v3d_priv->perfmon.lock);
+-      idr_for_each(&v3d_priv->perfmon.idr, v3d_perfmon_idr_del, NULL);
++      idr_for_each(&v3d_priv->perfmon.idr, v3d_perfmon_idr_del, v3d);
+       idr_destroy(&v3d_priv->perfmon.idr);
+       mutex_unlock(&v3d_priv->perfmon.lock);
+       mutex_destroy(&v3d_priv->perfmon.lock);
diff --git a/queue-6.6/drm-vc4-stop-the-active-perfmon-before-being-destroyed.patch b/queue-6.6/drm-vc4-stop-the-active-perfmon-before-being-destroyed.patch
new file mode 100644 (file)
index 0000000..c0ae9bd
--- /dev/null
@@ -0,0 +1,62 @@
+From 0b2ad4f6f2bec74a5287d96cb2325a5e11706f22 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
+Date: Fri, 4 Oct 2024 09:36:00 -0300
+Subject: drm/vc4: Stop the active perfmon before being destroyed
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Maíra Canal <mcanal@igalia.com>
+
+commit 0b2ad4f6f2bec74a5287d96cb2325a5e11706f22 upstream.
+
+Upon closing the file descriptor, the active performance monitor is not
+stopped. Although all perfmons are destroyed in `vc4_perfmon_close_file()`,
+the active performance monitor's pointer (`vc4->active_perfmon`) is still
+retained.
+
+If we open a new file descriptor and submit a few jobs with performance
+monitors, the driver will attempt to stop the active performance monitor
+using the stale pointer in `vc4->active_perfmon`. However, this pointer
+is no longer valid because the previous process has already terminated,
+and all performance monitors associated with it have been destroyed and
+freed.
+
+To fix this, when the active performance monitor belongs to a given
+process, explicitly stop it before destroying and freeing it.
+
+Cc: stable@vger.kernel.org # v4.17+
+Cc: Boris Brezillon <bbrezillon@kernel.org>
+Cc: Juan A. Suarez Romero <jasuarez@igalia.com>
+Fixes: 65101d8c9108 ("drm/vc4: Expose performance counters to userspace")
+Signed-off-by: Maíra Canal <mcanal@igalia.com>
+Reviewed-by: Juan A. Suarez <jasuarez@igalia.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241004123817.890016-2-mcanal@igalia.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/vc4/vc4_perfmon.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/vc4/vc4_perfmon.c
++++ b/drivers/gpu/drm/vc4/vc4_perfmon.c
+@@ -116,6 +116,11 @@ void vc4_perfmon_open_file(struct vc4_fi
+ static int vc4_perfmon_idr_del(int id, void *elem, void *data)
+ {
+       struct vc4_perfmon *perfmon = elem;
++      struct vc4_dev *vc4 = (struct vc4_dev *)data;
++
++      /* If the active perfmon is being destroyed, stop it first */
++      if (perfmon == vc4->active_perfmon)
++              vc4_perfmon_stop(vc4, perfmon, false);
+       vc4_perfmon_put(perfmon);
+@@ -130,7 +135,7 @@ void vc4_perfmon_close_file(struct vc4_f
+               return;
+       mutex_lock(&vc4file->perfmon.lock);
+-      idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, NULL);
++      idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, vc4);
+       idr_destroy(&vc4file->perfmon.idr);
+       mutex_unlock(&vc4file->perfmon.lock);
+       mutex_destroy(&vc4file->perfmon.lock);
diff --git a/queue-6.6/fs-proc-kcore.c-allow-translation-of-physical-memory-addresses.patch b/queue-6.6/fs-proc-kcore.c-allow-translation-of-physical-memory-addresses.patch
new file mode 100644 (file)
index 0000000..4dc24ba
--- /dev/null
@@ -0,0 +1,122 @@
+From 3d5854d75e3187147613130561b58f0b06166172 Mon Sep 17 00:00:00 2001
+From: Alexander Gordeev <agordeev@linux.ibm.com>
+Date: Mon, 30 Sep 2024 14:21:19 +0200
+Subject: fs/proc/kcore.c: allow translation of physical memory addresses
+
+From: Alexander Gordeev <agordeev@linux.ibm.com>
+
+commit 3d5854d75e3187147613130561b58f0b06166172 upstream.
+
+When /proc/kcore is read an attempt to read the first two pages results in
+HW-specific page swap on s390 and another (so called prefix) pages are
+accessed instead.  That leads to a wrong read.
+
+Allow architecture-specific translation of memory addresses using
+kc_xlate_dev_mem_ptr() and kc_unxlate_dev_mem_ptr() callbacks similarily
+to /dev/mem xlate_dev_mem_ptr() and unxlate_dev_mem_ptr() callbacks.  That
+way an architecture can deal with specific physical memory ranges.
+
+Re-use the existing /dev/mem callback implementation on s390, which
+handles the described prefix pages swapping correctly.
+
+For other architectures the default callback is basically NOP.  It is
+expected the condition (vaddr == __va(__pa(vaddr))) always holds true for
+KCORE_RAM memory type.
+
+Link: https://lkml.kernel.org/r/20240930122119.1651546-1-agordeev@linux.ibm.com
+Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Suggested-by: Heiko Carstens <hca@linux.ibm.com>
+Cc: Vasily Gorbik <gor@linux.ibm.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/include/asm/io.h |    2 ++
+ fs/proc/kcore.c            |   36 ++++++++++++++++++++++++++++++++++--
+ 2 files changed, 36 insertions(+), 2 deletions(-)
+
+--- a/arch/s390/include/asm/io.h
++++ b/arch/s390/include/asm/io.h
+@@ -16,8 +16,10 @@
+ #include <asm/pci_io.h>
+ #define xlate_dev_mem_ptr xlate_dev_mem_ptr
++#define kc_xlate_dev_mem_ptr xlate_dev_mem_ptr
+ void *xlate_dev_mem_ptr(phys_addr_t phys);
+ #define unxlate_dev_mem_ptr unxlate_dev_mem_ptr
++#define kc_unxlate_dev_mem_ptr unxlate_dev_mem_ptr
+ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
+ #define IO_SPACE_LIMIT 0
+--- a/fs/proc/kcore.c
++++ b/fs/proc/kcore.c
+@@ -50,6 +50,20 @@ static struct proc_dir_entry *proc_root_
+ #define       kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
+ #endif
++#ifndef kc_xlate_dev_mem_ptr
++#define kc_xlate_dev_mem_ptr kc_xlate_dev_mem_ptr
++static inline void *kc_xlate_dev_mem_ptr(phys_addr_t phys)
++{
++      return __va(phys);
++}
++#endif
++#ifndef kc_unxlate_dev_mem_ptr
++#define kc_unxlate_dev_mem_ptr kc_unxlate_dev_mem_ptr
++static inline void kc_unxlate_dev_mem_ptr(phys_addr_t phys, void *virt)
++{
++}
++#endif
++
+ static LIST_HEAD(kclist_head);
+ static DECLARE_RWSEM(kclist_lock);
+ static int kcore_need_update = 1;
+@@ -471,6 +485,8 @@ static ssize_t read_kcore_iter(struct ki
+       while (buflen) {
+               struct page *page;
+               unsigned long pfn;
++              phys_addr_t phys;
++              void *__start;
+               /*
+                * If this is the first iteration or the address is not within
+@@ -537,7 +553,8 @@ static ssize_t read_kcore_iter(struct ki
+                       }
+                       break;
+               case KCORE_RAM:
+-                      pfn = __pa(start) >> PAGE_SHIFT;
++                      phys = __pa(start);
++                      pfn =  phys >> PAGE_SHIFT;
+                       page = pfn_to_online_page(pfn);
+                       /*
+@@ -556,13 +573,28 @@ static ssize_t read_kcore_iter(struct ki
+                       fallthrough;
+               case KCORE_VMEMMAP:
+               case KCORE_TEXT:
++                      if (m->type == KCORE_RAM) {
++                              __start = kc_xlate_dev_mem_ptr(phys);
++                              if (!__start) {
++                                      ret = -ENOMEM;
++                                      if (iov_iter_zero(tsz, iter) != tsz)
++                                              ret = -EFAULT;
++                                      goto out;
++                              }
++                      } else {
++                              __start = (void *)start;
++                      }
++
+                       /*
+                        * Sadly we must use a bounce buffer here to be able to
+                        * make use of copy_from_kernel_nofault(), as these
+                        * memory regions might not always be mapped on all
+                        * architectures.
+                        */
+-                      if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
++                      ret = copy_from_kernel_nofault(buf, __start, tsz);
++                      if (m->type == KCORE_RAM)
++                              kc_unxlate_dev_mem_ptr(phys, __start);
++                      if (ret) {
+                               if (iov_iter_zero(tsz, iter) != tsz) {
+                                       ret = -EFAULT;
+                                       goto out;
diff --git a/queue-6.6/kthread-unpark-only-parked-kthread.patch b/queue-6.6/kthread-unpark-only-parked-kthread.patch
new file mode 100644 (file)
index 0000000..4da762c
--- /dev/null
@@ -0,0 +1,65 @@
+From 214e01ad4ed7158cab66498810094fac5d09b218 Mon Sep 17 00:00:00 2001
+From: Frederic Weisbecker <frederic@kernel.org>
+Date: Fri, 13 Sep 2024 23:46:34 +0200
+Subject: kthread: unpark only parked kthread
+
+From: Frederic Weisbecker <frederic@kernel.org>
+
+commit 214e01ad4ed7158cab66498810094fac5d09b218 upstream.
+
+Calling into kthread unparking unconditionally is mostly harmless when
+the kthread is already unparked. The wake up is then simply ignored
+because the target is not in TASK_PARKED state.
+
+However if the kthread is per CPU, the wake up is preceded by a call
+to kthread_bind() which expects the task to be inactive and in
+TASK_PARKED state, which obviously isn't the case if it is unparked.
+
+As a result, calling kthread_stop() on an unparked per-cpu kthread
+triggers such a warning:
+
+       WARNING: CPU: 0 PID: 11 at kernel/kthread.c:525 __kthread_bind_mask kernel/kthread.c:525
+        <TASK>
+        kthread_stop+0x17a/0x630 kernel/kthread.c:707
+        destroy_workqueue+0x136/0xc40 kernel/workqueue.c:5810
+        wg_destruct+0x1e2/0x2e0 drivers/net/wireguard/device.c:257
+        netdev_run_todo+0xe1a/0x1000 net/core/dev.c:10693
+        default_device_exit_batch+0xa14/0xa90 net/core/dev.c:11769
+        ops_exit_list net/core/net_namespace.c:178 [inline]
+        cleanup_net+0x89d/0xcc0 net/core/net_namespace.c:640
+        process_one_work kernel/workqueue.c:3231 [inline]
+        process_scheduled_works+0xa2c/0x1830 kernel/workqueue.c:3312
+        worker_thread+0x86d/0xd70 kernel/workqueue.c:3393
+        kthread+0x2f0/0x390 kernel/kthread.c:389
+        ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147
+        ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244
+        </TASK>
+
+Fix this with skipping unecessary unparking while stopping a kthread.
+
+Link: https://lkml.kernel.org/r/20240913214634.12557-1-frederic@kernel.org
+Fixes: 5c25b5ff89f0 ("workqueue: Tag bound workers with KTHREAD_IS_PER_CPU")
+Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
+Reported-by: syzbot+943d34fa3cf2191e3068@syzkaller.appspotmail.com
+Tested-by: syzbot+943d34fa3cf2191e3068@syzkaller.appspotmail.com
+Suggested-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Hillf Danton <hdanton@sina.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/kthread.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/kernel/kthread.c
++++ b/kernel/kthread.c
+@@ -622,6 +622,8 @@ void kthread_unpark(struct task_struct *
+ {
+       struct kthread *kthread = to_kthread(k);
++      if (!test_bit(KTHREAD_SHOULD_PARK, &kthread->flags))
++              return;
+       /*
+        * Newly created kthread was parked when the CPU was offline.
+        * The binding was lost and we need to set it again.
diff --git a/queue-6.6/mptcp-fallback-when-mptcp-opts-are-dropped-after-1st-data.patch b/queue-6.6/mptcp-fallback-when-mptcp-opts-are-dropped-after-1st-data.patch
new file mode 100644 (file)
index 0000000..df6c4b7
--- /dev/null
@@ -0,0 +1,85 @@
+From 119d51e225febc8152476340a880f5415a01e99e Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Tue, 8 Oct 2024 13:04:54 +0200
+Subject: mptcp: fallback when MPTCP opts are dropped after 1st data
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit 119d51e225febc8152476340a880f5415a01e99e upstream.
+
+As reported by Christoph [1], before this patch, an MPTCP connection was
+wrongly reset when a host received a first data packet with MPTCP
+options after the 3wHS, but got the next ones without.
+
+According to the MPTCP v1 specs [2], a fallback should happen in this
+case, because the host didn't receive a DATA_ACK from the other peer,
+nor receive data for more than the initial window which implies a
+DATA_ACK being received by the other peer.
+
+The patch here re-uses the same logic as the one used in other places:
+by looking at allow_infinite_fallback, which is disabled at the creation
+of an additional subflow. It's not looking at the first DATA_ACK (or
+implying one received from the other side) as suggested by the RFC, but
+it is in continuation with what was already done, which is safer, and it
+fixes the reported issue. The next step, looking at this first DATA_ACK,
+is tracked in [4].
+
+This patch has been validated using the following Packetdrill script:
+
+   0 socket(..., SOCK_STREAM, IPPROTO_MPTCP) = 3
+  +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+  +0 bind(3, ..., ...) = 0
+  +0 listen(3, 1) = 0
+
+  // 3WHS is OK
+  +0.0 < S  0:0(0)       win 65535  <mss 1460, sackOK, nop, nop, nop, wscale 6, mpcapable v1 flags[flag_h] nokey>
+  +0.0 > S. 0:0(0) ack 1            <mss 1460, nop, nop, sackOK, nop, wscale 8, mpcapable v1 flags[flag_h] key[skey]>
+  +0.1 <  . 1:1(0) ack 1 win 2048                                              <mpcapable v1 flags[flag_h] key[ckey=2, skey]>
+  +0 accept(3, ..., ...) = 4
+
+  // Data from the client with valid MPTCP options (no DATA_ACK: normal)
+  +0.1 < P. 1:501(500) ack 1 win 2048 <mpcapable v1 flags[flag_h] key[skey, ckey] mpcdatalen 500, nop, nop>
+  // From here, the MPTCP options will be dropped by a middlebox
+  +0.0 >  . 1:1(0)     ack 501        <dss dack8=501 dll=0 nocs>
+
+  +0.1 read(4, ..., 500) = 500
+  +0   write(4, ..., 100) = 100
+
+  // The server replies with data, still thinking MPTCP is being used
+  +0.0 > P. 1:101(100)   ack 501          <dss dack8=501 dsn8=1 ssn=1 dll=100 nocs, nop, nop>
+  // But the client already did a fallback to TCP, because the two previous packets have been received without MPTCP options
+  +0.1 <  . 501:501(0)   ack 101 win 2048
+
+  +0.0 < P. 501:601(100) ack 101 win 2048
+  // The server should fallback to TCP, not reset: it didn't get a DATA_ACK, nor data for more than the initial window
+  +0.0 >  . 101:101(0)   ack 601
+
+Note that this script requires Packetdrill with MPTCP support, see [3].
+
+Fixes: dea2b1ea9c70 ("mptcp: do not reset MP_CAPABLE subflow on mapping errors")
+Cc: stable@vger.kernel.org
+Reported-by: Christoph Paasch <cpaasch@apple.com>
+Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/518 [1]
+Link: https://datatracker.ietf.org/doc/html/rfc8684#name-fallback [2]
+Link: https://github.com/multipath-tcp/packetdrill [3]
+Link: https://github.com/multipath-tcp/mptcp_net-next/issues/519 [4]
+Reviewed-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20241008-net-mptcp-fallback-fixes-v1-3-c6fb8e93e551@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/subflow.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -1247,7 +1247,7 @@ static bool subflow_can_fallback(struct
+       else if (READ_ONCE(msk->csum_enabled))
+               return !subflow->valid_csum_seen;
+       else
+-              return !subflow->fully_established;
++              return READ_ONCE(msk->allow_infinite_fallback);
+ }
+ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
diff --git a/queue-6.6/mptcp-handle-consistently-dss-corruption.patch b/queue-6.6/mptcp-handle-consistently-dss-corruption.patch
new file mode 100644 (file)
index 0000000..2844b09
--- /dev/null
@@ -0,0 +1,107 @@
+From e32d262c89e2b22cb0640223f953b548617ed8a6 Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Tue, 8 Oct 2024 13:04:52 +0200
+Subject: mptcp: handle consistently DSS corruption
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit e32d262c89e2b22cb0640223f953b548617ed8a6 upstream.
+
+Bugged peer implementation can send corrupted DSS options, consistently
+hitting a few warning in the data path. Use DEBUG_NET assertions, to
+avoid the splat on some builds and handle consistently the error, dumping
+related MIBs and performing fallback and/or reset according to the
+subflow type.
+
+Fixes: 6771bfd9ee24 ("mptcp: update mptcp ack sequence from work queue")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20241008-net-mptcp-fallback-fixes-v1-1-c6fb8e93e551@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/mib.c      |    2 ++
+ net/mptcp/mib.h      |    2 ++
+ net/mptcp/protocol.c |   24 +++++++++++++++++++++---
+ net/mptcp/subflow.c  |    4 +++-
+ 4 files changed, 28 insertions(+), 4 deletions(-)
+
+--- a/net/mptcp/mib.c
++++ b/net/mptcp/mib.c
+@@ -26,6 +26,8 @@ static const struct snmp_mib mptcp_snmp_
+       SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX),
+       SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
+       SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
++      SNMP_MIB_ITEM("DSSCorruptionFallback", MPTCP_MIB_DSSCORRUPTIONFALLBACK),
++      SNMP_MIB_ITEM("DSSCorruptionReset", MPTCP_MIB_DSSCORRUPTIONRESET),
+       SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX),
+       SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
+       SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH),
+--- a/net/mptcp/mib.h
++++ b/net/mptcp/mib.h
+@@ -19,6 +19,8 @@ enum linux_mptcp_mib_field {
+       MPTCP_MIB_JOINACKRX,            /* Received an ACK + MP_JOIN */
+       MPTCP_MIB_JOINACKMAC,           /* HMAC was wrong on ACK + MP_JOIN */
+       MPTCP_MIB_DSSNOMATCH,           /* Received a new mapping that did not match the previous one */
++      MPTCP_MIB_DSSCORRUPTIONFALLBACK,/* DSS corruption detected, fallback */
++      MPTCP_MIB_DSSCORRUPTIONRESET,   /* DSS corruption detected, MPJ subflow reset */
+       MPTCP_MIB_INFINITEMAPTX,        /* Sent an infinite mapping */
+       MPTCP_MIB_INFINITEMAPRX,        /* Received an infinite mapping */
+       MPTCP_MIB_DSSTCPMISMATCH,       /* DSS-mapping did not map with TCP's sequence numbers */
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -620,6 +620,18 @@ static bool mptcp_check_data_fin(struct
+       return ret;
+ }
++static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk)
++{
++      if (READ_ONCE(msk->allow_infinite_fallback)) {
++              MPTCP_INC_STATS(sock_net(ssk),
++                              MPTCP_MIB_DSSCORRUPTIONFALLBACK);
++              mptcp_do_fallback(ssk);
++      } else {
++              MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCORRUPTIONRESET);
++              mptcp_subflow_reset(ssk);
++      }
++}
++
+ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
+                                          struct sock *ssk,
+                                          unsigned int *bytes)
+@@ -692,10 +704,16 @@ static bool __mptcp_move_skbs_from_subfl
+                               moved += len;
+                       seq += len;
+-                      if (WARN_ON_ONCE(map_remaining < len))
+-                              break;
++                      if (unlikely(map_remaining < len)) {
++                              DEBUG_NET_WARN_ON_ONCE(1);
++                              mptcp_dss_corruption(msk, ssk);
++                      }
+               } else {
+-                      WARN_ON_ONCE(!fin);
++                      if (unlikely(!fin)) {
++                              DEBUG_NET_WARN_ON_ONCE(1);
++                              mptcp_dss_corruption(msk, ssk);
++                      }
++
+                       sk_eat_skb(ssk, skb);
+                       done = true;
+               }
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -944,8 +944,10 @@ static bool skb_is_fully_mapped(struct s
+       unsigned int skb_consumed;
+       skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq;
+-      if (WARN_ON_ONCE(skb_consumed >= skb->len))
++      if (unlikely(skb_consumed >= skb->len)) {
++              DEBUG_NET_WARN_ON_ONCE(1);
+               return true;
++      }
+       return skb->len - skb_consumed <= subflow->map_data_len -
+                                         mptcp_subflow_get_map_offset(subflow);
diff --git a/queue-6.6/mptcp-pm-do-not-remove-closing-subflows.patch b/queue-6.6/mptcp-pm-do-not-remove-closing-subflows.patch
new file mode 100644 (file)
index 0000000..4a97524
--- /dev/null
@@ -0,0 +1,41 @@
+From db0a37b7ac27d8ca27d3dc676a16d081c16ec7b9 Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Tue, 8 Oct 2024 13:04:55 +0200
+Subject: mptcp: pm: do not remove closing subflows
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit db0a37b7ac27d8ca27d3dc676a16d081c16ec7b9 upstream.
+
+In a previous fix, the in-kernel path-manager has been modified not to
+retrigger the removal of a subflow if it was already closed, e.g. when
+the initial subflow is removed, but kept in the subflows list.
+
+To be complete, this fix should also skip the subflows that are in any
+closing state: mptcp_close_ssk() will initiate the closure, but the
+switch to the TCP_CLOSE state depends on the other peer.
+
+Fixes: 58e1b66b4e4b ("mptcp: pm: do not remove already closed subflows")
+Cc: stable@vger.kernel.org
+Suggested-by: Paolo Abeni <pabeni@redhat.com>
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20241008-net-mptcp-fallback-fixes-v1-4-c6fb8e93e551@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/pm_netlink.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/mptcp/pm_netlink.c
++++ b/net/mptcp/pm_netlink.c
+@@ -864,7 +864,8 @@ static void mptcp_pm_nl_rm_addr_or_subfl
+                       int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
+                       u8 id = subflow_get_local_id(subflow);
+-                      if (inet_sk_state_load(ssk) == TCP_CLOSE)
++                      if ((1 << inet_sk_state_load(ssk)) &
++                          (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING | TCPF_CLOSE))
+                               continue;
+                       if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id)
+                               continue;
diff --git a/queue-6.6/net-dsa-lan9303-ensure-chip-reset-and-wait-for-ready-status.patch b/queue-6.6/net-dsa-lan9303-ensure-chip-reset-and-wait-for-ready-status.patch
new file mode 100644 (file)
index 0000000..6e4813f
--- /dev/null
@@ -0,0 +1,83 @@
+From 5c14e51d2d7df49fe0d4e64a12c58d2542f452ff Mon Sep 17 00:00:00 2001
+From: Anatolij Gustschin <agust@denx.de>
+Date: Fri, 4 Oct 2024 13:36:54 +0200
+Subject: net: dsa: lan9303: ensure chip reset and wait for READY status
+
+From: Anatolij Gustschin <agust@denx.de>
+
+commit 5c14e51d2d7df49fe0d4e64a12c58d2542f452ff upstream.
+
+Accessing device registers seems to be not reliable, the chip
+revision is sometimes detected wrongly (0 instead of expected 1).
+
+Ensure that the chip reset is performed via reset GPIO and then
+wait for 'Device Ready' status in HW_CFG register before doing
+any register initializations.
+
+Cc: stable@vger.kernel.org
+Fixes: a1292595e006 ("net: dsa: add new DSA switch driver for the SMSC-LAN9303")
+Signed-off-by: Anatolij Gustschin <agust@denx.de>
+[alex: reworked using read_poll_timeout()]
+Signed-off-by: Alexander Sverdlin <alexander.sverdlin@siemens.com>
+Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
+Link: https://patch.msgid.link/20241004113655.3436296-1-alexander.sverdlin@siemens.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/lan9303-core.c |   29 +++++++++++++++++++++++++++++
+ 1 file changed, 29 insertions(+)
+
+--- a/drivers/net/dsa/lan9303-core.c
++++ b/drivers/net/dsa/lan9303-core.c
+@@ -6,6 +6,7 @@
+ #include <linux/module.h>
+ #include <linux/gpio/consumer.h>
+ #include <linux/regmap.h>
++#include <linux/iopoll.h>
+ #include <linux/mutex.h>
+ #include <linux/mii.h>
+ #include <linux/of.h>
+@@ -839,6 +840,8 @@ static void lan9303_handle_reset(struct
+       if (!chip->reset_gpio)
+               return;
++      gpiod_set_value_cansleep(chip->reset_gpio, 1);
++
+       if (chip->reset_duration != 0)
+               msleep(chip->reset_duration);
+@@ -864,8 +867,34 @@ static int lan9303_disable_processing(st
+ static int lan9303_check_device(struct lan9303 *chip)
+ {
+       int ret;
++      int err;
+       u32 reg;
++      /* In I2C-managed configurations this polling loop will clash with
++       * switch's reading of EEPROM right after reset and this behaviour is
++       * not configurable. While lan9303_read() already has quite long retry
++       * timeout, seems not all cases are being detected as arbitration error.
++       *
++       * According to datasheet, EEPROM loader has 30ms timeout (in case of
++       * missing EEPROM).
++       *
++       * Loading of the largest supported EEPROM is expected to take at least
++       * 5.9s.
++       */
++      err = read_poll_timeout(lan9303_read, ret,
++                              !ret && reg & LAN9303_HW_CFG_READY,
++                              20000, 6000000, false,
++                              chip->regmap, LAN9303_HW_CFG, &reg);
++      if (ret) {
++              dev_err(chip->dev, "failed to read HW_CFG reg: %pe\n",
++                      ERR_PTR(ret));
++              return ret;
++      }
++      if (err) {
++              dev_err(chip->dev, "HW_CFG not ready: 0x%08x\n", reg);
++              return err;
++      }
++
+       ret = lan9303_read(chip->regmap, LAN9303_CHIP_REV, &reg);
+       if (ret) {
+               dev_err(chip->dev, "failed to read chip revision register: %d\n",
diff --git a/queue-6.6/net-explicitly-clear-the-sk-pointer-when-pf-create-fails.patch b/queue-6.6/net-explicitly-clear-the-sk-pointer-when-pf-create-fails.patch
new file mode 100644 (file)
index 0000000..45cadc2
--- /dev/null
@@ -0,0 +1,56 @@
+From 631083143315d1b192bd7d915b967b37819e88ea Mon Sep 17 00:00:00 2001
+From: Ignat Korchagin <ignat@cloudflare.com>
+Date: Thu, 3 Oct 2024 18:01:51 +0100
+Subject: net: explicitly clear the sk pointer, when pf->create fails
+
+From: Ignat Korchagin <ignat@cloudflare.com>
+
+commit 631083143315d1b192bd7d915b967b37819e88ea upstream.
+
+We have recently noticed the exact same KASAN splat as in commit
+6cd4a78d962b ("net: do not leave a dangling sk pointer, when socket
+creation fails"). The problem is that commit did not fully address the
+problem, as some pf->create implementations do not use sk_common_release
+in their error paths.
+
+For example, we can use the same reproducer as in the above commit, but
+changing ping to arping. arping uses AF_PACKET socket and if packet_create
+fails, it will just sk_free the allocated sk object.
+
+While we could chase all the pf->create implementations and make sure they
+NULL the freed sk object on error from the socket, we can't guarantee
+future protocols will not make the same mistake.
+
+So it is easier to just explicitly NULL the sk pointer upon return from
+pf->create in __sock_create. We do know that pf->create always releases the
+allocated sk object on error, so if the pointer is not NULL, it is
+definitely dangling.
+
+Fixes: 6cd4a78d962b ("net: do not leave a dangling sk pointer, when socket creation fails")
+Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20241003170151.69445-1-ignat@cloudflare.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/socket.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -1569,8 +1569,13 @@ int __sock_create(struct net *net, int f
+       rcu_read_unlock();
+       err = pf->create(net, sock, protocol, kern);
+-      if (err < 0)
++      if (err < 0) {
++              /* ->create should release the allocated sock->sk object on error
++               * but it may leave the dangling pointer
++               */
++              sock->sk = NULL;
+               goto out_module_put;
++      }
+       /*
+        * Now to bump the refcnt of the [loadable] module that owns this
diff --git a/queue-6.6/net-fix-an-unsafe-loop-on-the-list.patch b/queue-6.6/net-fix-an-unsafe-loop-on-the-list.patch
new file mode 100644 (file)
index 0000000..3b7b26f
--- /dev/null
@@ -0,0 +1,60 @@
+From 1dae9f1187189bc09ff6d25ca97ead711f7e26f9 Mon Sep 17 00:00:00 2001
+From: Anastasia Kovaleva <a.kovaleva@yadro.com>
+Date: Thu, 3 Oct 2024 13:44:31 +0300
+Subject: net: Fix an unsafe loop on the list
+
+From: Anastasia Kovaleva <a.kovaleva@yadro.com>
+
+commit 1dae9f1187189bc09ff6d25ca97ead711f7e26f9 upstream.
+
+The kernel may crash when deleting a genetlink family if there are still
+listeners for that family:
+
+Oops: Kernel access of bad area, sig: 11 [#1]
+  ...
+  NIP [c000000000c080bc] netlink_update_socket_mc+0x3c/0xc0
+  LR [c000000000c0f764] __netlink_clear_multicast_users+0x74/0xc0
+  Call Trace:
+__netlink_clear_multicast_users+0x74/0xc0
+genl_unregister_family+0xd4/0x2d0
+
+Change the unsafe loop on the list to a safe one, because inside the
+loop there is an element removal from this list.
+
+Fixes: b8273570f802 ("genetlink: fix netns vs. netlink table locking (2)")
+Cc: stable@vger.kernel.org
+Signed-off-by: Anastasia Kovaleva <a.kovaleva@yadro.com>
+Reviewed-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20241003104431.12391-1-a.kovaleva@yadro.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sock.h       |    2 ++
+ net/netlink/af_netlink.c |    3 ++-
+ 2 files changed, 4 insertions(+), 1 deletion(-)
+
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -902,6 +902,8 @@ static inline void sk_add_bind2_node(str
+       hlist_for_each_entry(__sk, list, sk_bind_node)
+ #define sk_for_each_bound_bhash2(__sk, list) \
+       hlist_for_each_entry(__sk, list, sk_bind2_node)
++#define sk_for_each_bound_safe(__sk, tmp, list) \
++      hlist_for_each_entry_safe(__sk, tmp, list, sk_bind_node)
+ /**
+  * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -2143,8 +2143,9 @@ void __netlink_clear_multicast_users(str
+ {
+       struct sock *sk;
+       struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
++      struct hlist_node *tmp;
+-      sk_for_each_bound(sk, &tbl->mc_list)
++      sk_for_each_bound_safe(sk, tmp, &tbl->mc_list)
+               netlink_update_socket_mc(nlk_sk(sk), group, 0);
+ }
diff --git a/queue-6.6/net-phy-remove-led-entry-from-leds-list-on-unregister.patch b/queue-6.6/net-phy-remove-led-entry-from-leds-list-on-unregister.patch
new file mode 100644 (file)
index 0000000..1a52dd2
--- /dev/null
@@ -0,0 +1,58 @@
+From f50b5d74c68e551667e265123659b187a30fe3a5 Mon Sep 17 00:00:00 2001
+From: Christian Marangi <ansuelsmth@gmail.com>
+Date: Fri, 4 Oct 2024 20:27:58 +0200
+Subject: net: phy: Remove LED entry from LEDs list on unregister
+
+From: Christian Marangi <ansuelsmth@gmail.com>
+
+commit f50b5d74c68e551667e265123659b187a30fe3a5 upstream.
+
+Commit c938ab4da0eb ("net: phy: Manual remove LEDs to ensure correct
+ordering") correctly fixed a problem with using devm_ but missed
+removing the LED entry from the LEDs list.
+
+This cause kernel panic on specific scenario where the port for the PHY
+is torn down and up and the kmod for the PHY is removed.
+
+On setting the port down the first time, the assosiacted LEDs are
+correctly unregistered. The associated kmod for the PHY is now removed.
+The kmod is now added again and the port is now put up, the associated LED
+are registered again.
+On putting the port down again for the second time after these step, the
+LED list now have 4 elements. With the first 2 already unregistered
+previously and the 2 new one registered again.
+
+This cause a kernel panic as the first 2 element should have been
+removed.
+
+Fix this by correctly removing the element when LED is unregistered.
+
+Reported-by: Daniel Golle <daniel@makrotopia.org>
+Tested-by: Daniel Golle <daniel@makrotopia.org>
+Cc: stable@vger.kernel.org
+Fixes: c938ab4da0eb ("net: phy: Manual remove LEDs to ensure correct ordering")
+Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://patch.msgid.link/20241004182759.14032-1-ansuelsmth@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/phy_device.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/phy/phy_device.c
++++ b/drivers/net/phy/phy_device.c
+@@ -3082,10 +3082,11 @@ static __maybe_unused int phy_led_hw_is_
+ static void phy_leds_unregister(struct phy_device *phydev)
+ {
+-      struct phy_led *phyled;
++      struct phy_led *phyled, *tmp;
+-      list_for_each_entry(phyled, &phydev->leds, list) {
++      list_for_each_entry_safe(phyled, tmp, &phydev->leds, list) {
+               led_classdev_unregister(&phyled->led_cdev);
++              list_del(&phyled->list);
+       }
+ }
diff --git a/queue-6.6/nouveau-dmem-fix-vulnerability-in-migrate_to_ram-upon-copy-error.patch b/queue-6.6/nouveau-dmem-fix-vulnerability-in-migrate_to_ram-upon-copy-error.patch
new file mode 100644 (file)
index 0000000..2bf20f1
--- /dev/null
@@ -0,0 +1,48 @@
+From 835745a377a4519decd1a36d6b926e369b3033e2 Mon Sep 17 00:00:00 2001
+From: Yonatan Maman <Ymaman@Nvidia.com>
+Date: Tue, 8 Oct 2024 14:59:43 +0300
+Subject: nouveau/dmem: Fix vulnerability in migrate_to_ram upon copy error
+
+From: Yonatan Maman <Ymaman@Nvidia.com>
+
+commit 835745a377a4519decd1a36d6b926e369b3033e2 upstream.
+
+The `nouveau_dmem_copy_one` function ensures that the copy push command is
+sent to the device firmware but does not track whether it was executed
+successfully.
+
+In the case of a copy error (e.g., firmware or hardware failure), the
+copy push command will be sent via the firmware channel, and
+`nouveau_dmem_copy_one` will likely report success, leading to the
+`migrate_to_ram` function returning a dirty HIGH_USER page to the user.
+
+This can result in a security vulnerability, as a HIGH_USER page that may
+contain sensitive or corrupted data could be returned to the user.
+
+To prevent this vulnerability, we allocate a zero page. Thus, in case of
+an error, a non-dirty (zero) page will be returned to the user.
+
+Fixes: 5be73b690875 ("drm/nouveau/dmem: device memory helpers for SVM")
+Signed-off-by: Yonatan Maman <Ymaman@Nvidia.com>
+Co-developed-by: Gal Shalom <GalShalom@Nvidia.com>
+Signed-off-by: Gal Shalom <GalShalom@Nvidia.com>
+Reviewed-by: Ben Skeggs <bskeggs@nvidia.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Danilo Krummrich <dakr@kernel.org>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241008115943.990286-3-ymaman@nvidia.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/nouveau/nouveau_dmem.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
++++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
+@@ -193,7 +193,7 @@ static vm_fault_t nouveau_dmem_migrate_t
+       if (!spage || !(src & MIGRATE_PFN_MIGRATE))
+               goto done;
+-      dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address);
++      dpage = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vmf->vma, vmf->address);
+       if (!dpage)
+               goto done;
diff --git a/queue-6.6/powercap-intel_rapl_tpmi-fix-bogus-register-reading.patch b/queue-6.6/powercap-intel_rapl_tpmi-fix-bogus-register-reading.patch
new file mode 100644 (file)
index 0000000..4623b95
--- /dev/null
@@ -0,0 +1,34 @@
+From 91e8f835a7eda4ba2c0c4002a3108a0e3b22d34e Mon Sep 17 00:00:00 2001
+From: Zhang Rui <rui.zhang@intel.com>
+Date: Mon, 30 Sep 2024 16:17:56 +0800
+Subject: powercap: intel_rapl_tpmi: Fix bogus register reading
+
+From: Zhang Rui <rui.zhang@intel.com>
+
+commit 91e8f835a7eda4ba2c0c4002a3108a0e3b22d34e upstream.
+
+The TPMI_RAPL_REG_DOMAIN_INFO value needs to be multiplied by 8 to get
+the register offset.
+
+Cc: All applicable <stable@vger.kernel.org>
+Fixes: 903eb9fb85e3 ("powercap: intel_rapl_tpmi: Fix System Domain probing")
+Signed-off-by: Zhang Rui <rui.zhang@intel.com>
+Link: https://patch.msgid.link/20240930081801.28502-2-rui.zhang@intel.com
+[ rjw: Changelog edits ]
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/powercap/intel_rapl_tpmi.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/powercap/intel_rapl_tpmi.c
++++ b/drivers/powercap/intel_rapl_tpmi.c
+@@ -192,7 +192,7 @@ static int parse_one_domain(struct tpmi_
+                       pr_warn(FW_BUG "System domain must support Domain Info register\n");
+                       return -ENODEV;
+               }
+-              tpmi_domain_info = readq(trp->base + offset + TPMI_RAPL_REG_DOMAIN_INFO);
++              tpmi_domain_info = readq(trp->base + offset + TPMI_RAPL_REG_DOMAIN_INFO * 8);
+               if (!(tpmi_domain_info & TPMI_RAPL_DOMAIN_ROOT))
+                       return 0;
+               domain_type = RAPL_DOMAIN_PLATFORM;
diff --git a/queue-6.6/scsi-ufs-use-pre-calculated-offsets-in-ufshcd_init_lrb.patch b/queue-6.6/scsi-ufs-use-pre-calculated-offsets-in-ufshcd_init_lrb.patch
new file mode 100644 (file)
index 0000000..3b3a922
--- /dev/null
@@ -0,0 +1,41 @@
+From d5130c5a093257aa4542aaded8034ef116a7624a Mon Sep 17 00:00:00 2001
+From: Avri Altman <avri.altman@wdc.com>
+Date: Tue, 10 Sep 2024 07:45:43 +0300
+Subject: scsi: ufs: Use pre-calculated offsets in ufshcd_init_lrb()
+
+From: Avri Altman <avri.altman@wdc.com>
+
+commit d5130c5a093257aa4542aaded8034ef116a7624a upstream.
+
+Replace manual offset calculations for response_upiu and prd_table in
+ufshcd_init_lrb() with pre-calculated offsets already stored in the
+utp_transfer_req_desc structure. The pre-calculated offsets are set
+differently in ufshcd_host_memory_configure() based on the
+UFSHCD_QUIRK_PRDT_BYTE_GRAN quirk, ensuring correct alignment and
+access.
+
+Fixes: 26f968d7de82 ("scsi: ufs: Introduce UFSHCD_QUIRK_PRDT_BYTE_GRAN quirk")
+Cc: stable@vger.kernel.org
+Signed-off-by: Avri Altman <avri.altman@wdc.com>
+Link: https://lore.kernel.org/r/20240910044543.3812642-1-avri.altman@wdc.com
+Acked-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ufs/core/ufshcd.c |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/drivers/ufs/core/ufshcd.c
++++ b/drivers/ufs/core/ufshcd.c
+@@ -2804,9 +2804,8 @@ static void ufshcd_init_lrb(struct ufs_h
+       struct utp_transfer_req_desc *utrdlp = hba->utrdl_base_addr;
+       dma_addr_t cmd_desc_element_addr = hba->ucdl_dma_addr +
+               i * ufshcd_get_ucd_size(hba);
+-      u16 response_offset = offsetof(struct utp_transfer_cmd_desc,
+-                                     response_upiu);
+-      u16 prdt_offset = offsetof(struct utp_transfer_cmd_desc, prd_table);
++      u16 response_offset = le16_to_cpu(utrdlp[i].response_upiu_offset);
++      u16 prdt_offset = le16_to_cpu(utrdlp[i].prd_table_offset);
+       lrb->utr_descriptor_ptr = utrdlp + i;
+       lrb->utrd_dma_addr = hba->utrdl_dma_addr +
diff --git a/queue-6.6/scsi-wd33c93-don-t-use-stale-scsi_pointer-value.patch b/queue-6.6/scsi-wd33c93-don-t-use-stale-scsi_pointer-value.patch
new file mode 100644 (file)
index 0000000..389dfed
--- /dev/null
@@ -0,0 +1,43 @@
+From 9023ed8d91eb1fcc93e64dc4962f7412b1c4cbec Mon Sep 17 00:00:00 2001
+From: Daniel Palmer <daniel@0x0f.com>
+Date: Thu, 3 Oct 2024 13:29:47 +1000
+Subject: scsi: wd33c93: Don't use stale scsi_pointer value
+
+From: Daniel Palmer <daniel@0x0f.com>
+
+commit 9023ed8d91eb1fcc93e64dc4962f7412b1c4cbec upstream.
+
+A regression was introduced with commit dbb2da557a6a ("scsi: wd33c93:
+Move the SCSI pointer to private command data") which results in an oops
+in wd33c93_intr(). That commit added the scsi_pointer variable and
+initialized it from hostdata->connected. However, during selection,
+hostdata->connected is not yet valid. Fix this by getting the current
+scsi_pointer from hostdata->selecting.
+
+Cc: Daniel Palmer <daniel@0x0f.com>
+Cc: Michael Schmitz <schmitzmic@gmail.com>
+Cc: stable@kernel.org
+Fixes: dbb2da557a6a ("scsi: wd33c93: Move the SCSI pointer to private command data")
+Signed-off-by: Daniel Palmer <daniel@0x0f.com>
+Co-developed-by: Finn Thain <fthain@linux-m68k.org>
+Signed-off-by: Finn Thain <fthain@linux-m68k.org>
+Link: https://lore.kernel.org/r/09e11a0a54e6aa2a88bd214526d305aaf018f523.1727926187.git.fthain@linux-m68k.org
+Reviewed-by: Michael Schmitz <schmitzmic@gmail.com>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/wd33c93.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/scsi/wd33c93.c
++++ b/drivers/scsi/wd33c93.c
+@@ -831,7 +831,7 @@ wd33c93_intr(struct Scsi_Host *instance)
+               /* construct an IDENTIFY message with correct disconnect bit */
+               hostdata->outgoing_msg[0] = IDENTIFY(0, cmd->device->lun);
+-              if (scsi_pointer->phase)
++              if (WD33C93_scsi_pointer(cmd)->phase)
+                       hostdata->outgoing_msg[0] |= 0x40;
+               if (hostdata->sync_stat[cmd->device->id] == SS_FIRST) {
diff --git a/queue-6.6/secretmem-disable-memfd_secret-if-arch-cannot-set-direct-map.patch b/queue-6.6/secretmem-disable-memfd_secret-if-arch-cannot-set-direct-map.patch
new file mode 100644 (file)
index 0000000..bfb0c8a
--- /dev/null
@@ -0,0 +1,75 @@
+From 532b53cebe58f34ce1c0f34d866f5c0e335c53c6 Mon Sep 17 00:00:00 2001
+From: Patrick Roy <roypat@amazon.co.uk>
+Date: Tue, 1 Oct 2024 09:00:41 +0100
+Subject: secretmem: disable memfd_secret() if arch cannot set direct map
+
+From: Patrick Roy <roypat@amazon.co.uk>
+
+commit 532b53cebe58f34ce1c0f34d866f5c0e335c53c6 upstream.
+
+Return -ENOSYS from memfd_secret() syscall if !can_set_direct_map().  This
+is the case for example on some arm64 configurations, where marking 4k
+PTEs in the direct map not present can only be done if the direct map is
+set up at 4k granularity in the first place (as ARM's break-before-make
+semantics do not easily allow breaking apart large/gigantic pages).
+
+More precisely, on arm64 systems with !can_set_direct_map(),
+set_direct_map_invalid_noflush() is a no-op, however it returns success
+(0) instead of an error.  This means that memfd_secret will seemingly
+"work" (e.g.  syscall succeeds, you can mmap the fd and fault in pages),
+but it does not actually achieve its goal of removing its memory from the
+direct map.
+
+Note that with this patch, memfd_secret() will start erroring on systems
+where can_set_direct_map() returns false (arm64 with
+CONFIG_RODATA_FULL_DEFAULT_ENABLED=n, CONFIG_DEBUG_PAGEALLOC=n and
+CONFIG_KFENCE=n), but that still seems better than the current silent
+failure.  Since CONFIG_RODATA_FULL_DEFAULT_ENABLED defaults to 'y', most
+arm64 systems actually have a working memfd_secret() and aren't be
+affected.
+
+From going through the iterations of the original memfd_secret patch
+series, it seems that disabling the syscall in these scenarios was the
+intended behavior [1] (preferred over having
+set_direct_map_invalid_noflush return an error as that would result in
+SIGBUSes at page-fault time), however the check for it got dropped between
+v16 [2] and v17 [3], when secretmem moved away from CMA allocations.
+
+[1]: https://lore.kernel.org/lkml/20201124164930.GK8537@kernel.org/
+[2]: https://lore.kernel.org/lkml/20210121122723.3446-11-rppt@kernel.org/#t
+[3]: https://lore.kernel.org/lkml/20201125092208.12544-10-rppt@kernel.org/
+
+Link: https://lkml.kernel.org/r/20241001080056.784735-1-roypat@amazon.co.uk
+Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas")
+Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
+Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Cc: Alexander Graf <graf@amazon.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: James Gowans <jgowans@amazon.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/secretmem.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/secretmem.c
++++ b/mm/secretmem.c
+@@ -238,7 +238,7 @@ SYSCALL_DEFINE1(memfd_secret, unsigned i
+       /* make sure local flags do not confict with global fcntl.h */
+       BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC);
+-      if (!secretmem_enable)
++      if (!secretmem_enable || !can_set_direct_map())
+               return -ENOSYS;
+       if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC))
+@@ -280,7 +280,7 @@ static struct file_system_type secretmem
+ static int __init secretmem_init(void)
+ {
+-      if (!secretmem_enable)
++      if (!secretmem_enable || !can_set_direct_map())
+               return 0;
+       secretmem_mnt = kern_mount(&secretmem_fs);
diff --git a/queue-6.6/selftests-mm-fix-incorrect-buffer-mirror-size-in-hmm2-double_map-test.patch b/queue-6.6/selftests-mm-fix-incorrect-buffer-mirror-size-in-hmm2-double_map-test.patch
new file mode 100644 (file)
index 0000000..de16027
--- /dev/null
@@ -0,0 +1,64 @@
+From 76503e1fa1a53ef041a120825d5ce81c7fe7bdd7 Mon Sep 17 00:00:00 2001
+From: Donet Tom <donettom@linux.ibm.com>
+Date: Fri, 27 Sep 2024 00:07:52 -0500
+Subject: selftests/mm: fix incorrect buffer->mirror size in hmm2 double_map test
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Donet Tom <donettom@linux.ibm.com>
+
+commit 76503e1fa1a53ef041a120825d5ce81c7fe7bdd7 upstream.
+
+The hmm2 double_map test was failing due to an incorrect buffer->mirror
+size.  The buffer->mirror size was 6, while buffer->ptr size was 6 *
+PAGE_SIZE.  The test failed because the kernel's copy_to_user function was
+attempting to copy a 6 * PAGE_SIZE buffer to buffer->mirror.  Since the
+size of buffer->mirror was incorrect, copy_to_user failed.
+
+This patch corrects the buffer->mirror size to 6 * PAGE_SIZE.
+
+Test Result without this patch
+==============================
+ #  RUN           hmm2.hmm2_device_private.double_map ...
+ # hmm-tests.c:1680:double_map:Expected ret (-14) == 0 (0)
+ # double_map: Test terminated by assertion
+ #          FAIL  hmm2.hmm2_device_private.double_map
+ not ok 53 hmm2.hmm2_device_private.double_map
+
+Test Result with this patch
+===========================
+ #  RUN           hmm2.hmm2_device_private.double_map ...
+ #            OK  hmm2.hmm2_device_private.double_map
+ ok 53 hmm2.hmm2_device_private.double_map
+
+Link: https://lkml.kernel.org/r/20240927050752.51066-1-donettom@linux.ibm.com
+Fixes: fee9f6d1b8df ("mm/hmm/test: add selftests for HMM")
+Signed-off-by: Donet Tom <donettom@linux.ibm.com>
+Reviewed-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Cc: Jérôme Glisse <jglisse@redhat.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Mark Brown <broonie@kernel.org>
+Cc: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Cc: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Jason Gunthorpe <jgg@mellanox.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/mm/hmm-tests.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/testing/selftests/mm/hmm-tests.c
++++ b/tools/testing/selftests/mm/hmm-tests.c
+@@ -1657,7 +1657,7 @@ TEST_F(hmm2, double_map)
+       buffer->fd = -1;
+       buffer->size = size;
+-      buffer->mirror = malloc(npages);
++      buffer->mirror = malloc(size);
+       ASSERT_NE(buffer->mirror, NULL);
+       /* Reserve a range of addresses. */
diff --git a/queue-6.6/selftests-rseq-fix-mm_cid-test-failure.patch b/queue-6.6/selftests-rseq-fix-mm_cid-test-failure.patch
new file mode 100644 (file)
index 0000000..5389231
--- /dev/null
@@ -0,0 +1,243 @@
+From a0cc649353bb726d4aa0db60dce467432197b746 Mon Sep 17 00:00:00 2001
+From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Date: Tue, 8 Oct 2024 21:28:01 -0400
+Subject: selftests/rseq: Fix mm_cid test failure
+
+From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+
+commit a0cc649353bb726d4aa0db60dce467432197b746 upstream.
+
+Adapt the rseq.c/rseq.h code to follow GNU C library changes introduced by:
+
+glibc commit 2e456ccf0c34 ("Linux: Make __rseq_size useful for feature detection (bug 31965)")
+
+Without this fix, rseq selftests for mm_cid fail:
+
+./run_param_test.sh
+Default parameters
+Running test spinlock
+Running compare-twice test spinlock
+Running mm_cid test spinlock
+Error: cpu id getter unavailable
+
+Fixes: 18c2355838e7 ("selftests/rseq: Implement rseq mm_cid field support")
+Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+CC: Boqun Feng <boqun.feng@gmail.com>
+CC: "Paul E. McKenney" <paulmck@kernel.org>
+Cc: Shuah Khan <skhan@linuxfoundation.org>
+CC: Carlos O'Donell <carlos@redhat.com>
+CC: Florian Weimer <fweimer@redhat.com>
+CC: linux-kselftest@vger.kernel.org
+CC: stable@vger.kernel.org
+Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/rseq/rseq.c |  110 ++++++++++++++++++++++++------------
+ tools/testing/selftests/rseq/rseq.h |   10 ---
+ 2 files changed, 77 insertions(+), 43 deletions(-)
+
+--- a/tools/testing/selftests/rseq/rseq.c
++++ b/tools/testing/selftests/rseq/rseq.c
+@@ -60,12 +60,6 @@ unsigned int rseq_size = -1U;
+ /* Flags used during rseq registration.  */
+ unsigned int rseq_flags;
+-/*
+- * rseq feature size supported by the kernel. 0 if the registration was
+- * unsuccessful.
+- */
+-unsigned int rseq_feature_size = -1U;
+-
+ static int rseq_ownership;
+ static int rseq_reg_success;  /* At least one rseq registration has succeded. */
+@@ -111,6 +105,43 @@ int rseq_available(void)
+       }
+ }
++/* The rseq areas need to be at least 32 bytes. */
++static
++unsigned int get_rseq_min_alloc_size(void)
++{
++      unsigned int alloc_size = rseq_size;
++
++      if (alloc_size < ORIG_RSEQ_ALLOC_SIZE)
++              alloc_size = ORIG_RSEQ_ALLOC_SIZE;
++      return alloc_size;
++}
++
++/*
++ * Return the feature size supported by the kernel.
++ *
++ * Depending on the value returned by getauxval(AT_RSEQ_FEATURE_SIZE):
++ *
++ * 0:   Return ORIG_RSEQ_FEATURE_SIZE (20)
++ * > 0: Return the value from getauxval(AT_RSEQ_FEATURE_SIZE).
++ *
++ * It should never return a value below ORIG_RSEQ_FEATURE_SIZE.
++ */
++static
++unsigned int get_rseq_kernel_feature_size(void)
++{
++      unsigned long auxv_rseq_feature_size, auxv_rseq_align;
++
++      auxv_rseq_align = getauxval(AT_RSEQ_ALIGN);
++      assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE);
++
++      auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE);
++      assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE);
++      if (auxv_rseq_feature_size)
++              return auxv_rseq_feature_size;
++      else
++              return ORIG_RSEQ_FEATURE_SIZE;
++}
++
+ int rseq_register_current_thread(void)
+ {
+       int rc;
+@@ -119,7 +150,7 @@ int rseq_register_current_thread(void)
+               /* Treat libc's ownership as a successful registration. */
+               return 0;
+       }
+-      rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG);
++      rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG);
+       if (rc) {
+               if (RSEQ_READ_ONCE(rseq_reg_success)) {
+                       /* Incoherent success/failure within process. */
+@@ -140,28 +171,12 @@ int rseq_unregister_current_thread(void)
+               /* Treat libc's ownership as a successful unregistration. */
+               return 0;
+       }
+-      rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
++      rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
+       if (rc)
+               return -1;
+       return 0;
+ }
+-static
+-unsigned int get_rseq_feature_size(void)
+-{
+-      unsigned long auxv_rseq_feature_size, auxv_rseq_align;
+-
+-      auxv_rseq_align = getauxval(AT_RSEQ_ALIGN);
+-      assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE);
+-
+-      auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE);
+-      assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE);
+-      if (auxv_rseq_feature_size)
+-              return auxv_rseq_feature_size;
+-      else
+-              return ORIG_RSEQ_FEATURE_SIZE;
+-}
+-
+ static __attribute__((constructor))
+ void rseq_init(void)
+ {
+@@ -178,28 +193,54 @@ void rseq_init(void)
+       }
+       if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
+                       *libc_rseq_size_p != 0) {
++              unsigned int libc_rseq_size;
++
+               /* rseq registration owned by glibc */
+               rseq_offset = *libc_rseq_offset_p;
+-              rseq_size = *libc_rseq_size_p;
++              libc_rseq_size = *libc_rseq_size_p;
+               rseq_flags = *libc_rseq_flags_p;
+-              rseq_feature_size = get_rseq_feature_size();
+-              if (rseq_feature_size > rseq_size)
+-                      rseq_feature_size = rseq_size;
++
++              /*
++               * Previous versions of glibc expose the value
++               * 32 even though the kernel only supported 20
++               * bytes initially. Therefore treat 32 as a
++               * special-case. glibc 2.40 exposes a 20 bytes
++               * __rseq_size without using getauxval(3) to
++               * query the supported size, while still allocating a 32
++               * bytes area. Also treat 20 as a special-case.
++               *
++               * Special-cases are handled by using the following
++               * value as active feature set size:
++               *
++               *   rseq_size = min(32, get_rseq_kernel_feature_size())
++               */
++              switch (libc_rseq_size) {
++              case ORIG_RSEQ_FEATURE_SIZE:
++                      fallthrough;
++              case ORIG_RSEQ_ALLOC_SIZE:
++              {
++                      unsigned int rseq_kernel_feature_size = get_rseq_kernel_feature_size();
++
++                      if (rseq_kernel_feature_size < ORIG_RSEQ_ALLOC_SIZE)
++                              rseq_size = rseq_kernel_feature_size;
++                      else
++                              rseq_size = ORIG_RSEQ_ALLOC_SIZE;
++                      break;
++              }
++              default:
++                      /* Otherwise just use the __rseq_size from libc as rseq_size. */
++                      rseq_size = libc_rseq_size;
++                      break;
++              }
+               return;
+       }
+       rseq_ownership = 1;
+       if (!rseq_available()) {
+               rseq_size = 0;
+-              rseq_feature_size = 0;
+               return;
+       }
+       rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer();
+       rseq_flags = 0;
+-      rseq_feature_size = get_rseq_feature_size();
+-      if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE)
+-              rseq_size = ORIG_RSEQ_ALLOC_SIZE;
+-      else
+-              rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE;
+ }
+ static __attribute__((destructor))
+@@ -209,7 +250,6 @@ void rseq_exit(void)
+               return;
+       rseq_offset = 0;
+       rseq_size = -1U;
+-      rseq_feature_size = -1U;
+       rseq_ownership = 0;
+ }
+--- a/tools/testing/selftests/rseq/rseq.h
++++ b/tools/testing/selftests/rseq/rseq.h
+@@ -68,12 +68,6 @@ extern unsigned int rseq_size;
+ /* Flags used during rseq registration. */
+ extern unsigned int rseq_flags;
+-/*
+- * rseq feature size supported by the kernel. 0 if the registration was
+- * unsuccessful.
+- */
+-extern unsigned int rseq_feature_size;
+-
+ enum rseq_mo {
+       RSEQ_MO_RELAXED = 0,
+       RSEQ_MO_CONSUME = 1,    /* Unused */
+@@ -193,7 +187,7 @@ static inline uint32_t rseq_current_cpu(
+ static inline bool rseq_node_id_available(void)
+ {
+-      return (int) rseq_feature_size >= rseq_offsetofend(struct rseq_abi, node_id);
++      return (int) rseq_size >= rseq_offsetofend(struct rseq_abi, node_id);
+ }
+ /*
+@@ -207,7 +201,7 @@ static inline uint32_t rseq_current_node
+ static inline bool rseq_mm_cid_available(void)
+ {
+-      return (int) rseq_feature_size >= rseq_offsetofend(struct rseq_abi, mm_cid);
++      return (int) rseq_size >= rseq_offsetofend(struct rseq_abi, mm_cid);
+ }
+ static inline uint32_t rseq_current_mm_cid(void)
index 3a49d73b30bc8ab38a937aeaf9e8355918273650..93ec498c61f0254a327032f391caaaa94ffd7c84 100644 (file)
@@ -185,3 +185,26 @@ usb-xhci-fix-problem-with-xhci-resume-from-suspend.patch
 usb-storage-ignore-bogus-device-raised-by-jieli-br21-usb-sound-chip.patch
 usb-gadget-core-force-synchronous-registration.patch
 hid-intel-ish-hid-fix-uninitialized-variable-rv-in-ish_fw_xfer_direct_dma.patch
+drm-v3d-stop-the-active-perfmon-before-being-destroyed.patch
+drm-vc4-stop-the-active-perfmon-before-being-destroyed.patch
+drm-i915-hdcp-fix-connector-refcounting.patch
+bluetooth-hci_conn-fix-uaf-in-hci_enhanced_setup_sync.patch
+scsi-wd33c93-don-t-use-stale-scsi_pointer-value.patch
+scsi-ufs-use-pre-calculated-offsets-in-ufshcd_init_lrb.patch
+mptcp-fallback-when-mptcp-opts-are-dropped-after-1st-data.patch
+ata-libata-avoid-superfluous-disk-spin-down-spin-up-during-hibernation.patch
+net-explicitly-clear-the-sk-pointer-when-pf-create-fails.patch
+net-fix-an-unsafe-loop-on-the-list.patch
+net-dsa-lan9303-ensure-chip-reset-and-wait-for-ready-status.patch
+net-phy-remove-led-entry-from-leds-list-on-unregister.patch
+mptcp-handle-consistently-dss-corruption.patch
+mptcp-pm-do-not-remove-closing-subflows.patch
+device-dax-correct-pgoff-align-in-dax_set_mapping.patch
+nouveau-dmem-fix-vulnerability-in-migrate_to_ram-upon-copy-error.patch
+powercap-intel_rapl_tpmi-fix-bogus-register-reading.patch
+selftests-mm-fix-incorrect-buffer-mirror-size-in-hmm2-double_map-test.patch
+selftests-rseq-fix-mm_cid-test-failure.patch
+btrfs-split-remaining-space-to-discard-in-chunks.patch
+kthread-unpark-only-parked-kthread.patch
+fs-proc-kcore.c-allow-translation-of-physical-memory-addresses.patch
+secretmem-disable-memfd_secret-if-arch-cannot-set-direct-map.patch