6.1-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 14 Oct 2024 12:32:24 +0000 (14:32 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 14 Oct 2024 12:32:24 +0000 (14:32 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 14 Oct 2024 12:32:24 +0000 (14:32 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 14 Oct 2024 12:32:24 +0000 (14:32 +0200)
diff --git a/queue-6.1/ata-libata-avoid-superfluous-disk-spin-down-spin-up-during-hibernation.patch b/queue-6.1/ata-libata-avoid-superfluous-disk-spin-down-spin-up-during-hibernation.patch

new file mode 100644 (file)

index 0000000..6bf8ee1
--- /dev/null
+++ b/queue-6.1/ata-libata-avoid-superfluous-disk-spin-down-spin-up-during-hibernation.patch
@@ -0,0 +1,76 @@
+From a38719e3157118428e34fbd45b0d0707a5877784 Mon Sep 17 00:00:00 2001
+From: Niklas Cassel <cassel@kernel.org>
+Date: Tue, 8 Oct 2024 15:58:44 +0200
+Subject: ata: libata: avoid superfluous disk spin down + spin up during hibernation
+
+From: Niklas Cassel <cassel@kernel.org>
+
+commit a38719e3157118428e34fbd45b0d0707a5877784 upstream.
+
+A user reported that commit aa3998dbeb3a ("ata: libata-scsi: Disable scsi
+device manage_system_start_stop") introduced a spin down + immediate spin
+up of the disk both when entering and when resuming from hibernation.
+This behavior was not there before, and causes an increased latency both
+when entering and when resuming from hibernation.
+
+Hibernation is done by three consecutive PM events, in the following order:
+1) PM_EVENT_FREEZE
+2) PM_EVENT_THAW
+3) PM_EVENT_HIBERNATE
+
+Commit aa3998dbeb3a ("ata: libata-scsi: Disable scsi device
+manage_system_start_stop") modified ata_eh_handle_port_suspend() to call
+ata_dev_power_set_standby() (which spins down the disk), for both event
+PM_EVENT_FREEZE and event PM_EVENT_HIBERNATE.
+
+Documentation/driver-api/pm/devices.rst, section "Entering Hibernation",
+explicitly mentions that PM_EVENT_FREEZE does not have to be put the device
+in a low-power state, and actually recommends not doing so. Thus, let's not
+spin down the disk on PM_EVENT_FREEZE. (The disk will instead be spun down
+during the subsequent PM_EVENT_HIBERNATE event.)
+
+This way, PM_EVENT_FREEZE will behave as it did before commit aa3998dbeb3a
+("ata: libata-scsi: Disable scsi device manage_system_start_stop"), while
+PM_EVENT_HIBERNATE will continue to spin down the disk.
+
+This will avoid the superfluous spin down + spin up when entering and
+resuming from hibernation, while still making sure that the disk is spun
+down before actually entering hibernation.
+
+Cc: stable@vger.kernel.org # v6.6+
+Fixes: aa3998dbeb3a ("ata: libata-scsi: Disable scsi device manage_system_start_stop")
+Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
+Link: https://lore.kernel.org/r/20241008135843.1266244-2-cassel@kernel.org
+Signed-off-by: Niklas Cassel <cassel@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/libata-eh.c |   18 ++++++++++++++----
+ 1 file changed, 14 insertions(+), 4 deletions(-)
+
+--- a/drivers/ata/libata-eh.c
++++ b/drivers/ata/libata-eh.c
+@@ -3946,10 +3946,20 @@ static void ata_eh_handle_port_suspend(s
+ 
+       WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED);
+ 
+-      /* Set all devices attached to the port in standby mode */
+-      ata_for_each_link(link, ap, HOST_FIRST) {
+-              ata_for_each_dev(dev, link, ENABLED)
+-                      ata_dev_power_set_standby(dev);
++      /*
++       * We will reach this point for all of the PM events:
++       * PM_EVENT_SUSPEND (if runtime pm, PM_EVENT_AUTO will also be set)
++       * PM_EVENT_FREEZE, and PM_EVENT_HIBERNATE.
++       *
++       * We do not want to perform disk spin down for PM_EVENT_FREEZE.
++       * (Spin down will be performed by the subsequent PM_EVENT_HIBERNATE.)
++       */
++      if (!(ap->pm_mesg.event & PM_EVENT_FREEZE)) {
++              /* Set all devices attached to the port in standby mode */
++              ata_for_each_link(link, ap, HOST_FIRST) {
++                      ata_for_each_dev(dev, link, ENABLED)
++                              ata_dev_power_set_standby(dev);
++              }
+       }
+ 
+       /*
diff --git a/queue-6.1/device-dax-correct-pgoff-align-in-dax_set_mapping.patch b/queue-6.1/device-dax-correct-pgoff-align-in-dax_set_mapping.patch

new file mode 100644 (file)

index 0000000..ba27fb2
--- /dev/null
+++ b/queue-6.1/device-dax-correct-pgoff-align-in-dax_set_mapping.patch
@@ -0,0 +1,117 @@
+From 7fcbd9785d4c17ea533c42f20a9083a83f301fa6 Mon Sep 17 00:00:00 2001
+From: "Kun(llfl)" <llfl@linux.alibaba.com>
+Date: Fri, 27 Sep 2024 15:45:09 +0800
+Subject: device-dax: correct pgoff align in dax_set_mapping()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Kun(llfl) <llfl@linux.alibaba.com>
+
+commit 7fcbd9785d4c17ea533c42f20a9083a83f301fa6 upstream.
+
+pgoff should be aligned using ALIGN_DOWN() instead of ALIGN().  Otherwise,
+vmf->address not aligned to fault_size will be aligned to the next
+alignment, that can result in memory failure getting the wrong address.
+
+It's a subtle situation that only can be observed in
+page_mapped_in_vma() after the page is page fault handled by
+dev_dax_huge_fault.  Generally, there is little chance to perform
+page_mapped_in_vma in dev-dax's page unless in specific error injection
+to the dax device to trigger an MCE - memory-failure.  In that case,
+page_mapped_in_vma() will be triggered to determine which task is
+accessing the failure address and kill that task in the end.
+
+
+We used self-developed dax device (which is 2M aligned mapping) , to
+perform error injection to random address.  It turned out that error
+injected to non-2M-aligned address was causing endless MCE until panic.
+Because page_mapped_in_vma() kept resulting wrong address and the task
+accessing the failure address was never killed properly:
+
+
+[ 3783.719419] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+[ 3784.049006] mce: Uncorrected hardware memory error in user-access at
+200c9742380
+[ 3784.049190] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+[ 3784.448042] mce: Uncorrected hardware memory error in user-access at
+200c9742380
+[ 3784.448186] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+[ 3784.792026] mce: Uncorrected hardware memory error in user-access at
+200c9742380
+[ 3784.792179] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+[ 3785.162502] mce: Uncorrected hardware memory error in user-access at
+200c9742380
+[ 3785.162633] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+[ 3785.461116] mce: Uncorrected hardware memory error in user-access at
+200c9742380
+[ 3785.461247] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+[ 3785.764730] mce: Uncorrected hardware memory error in user-access at
+200c9742380
+[ 3785.764859] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+[ 3786.042128] mce: Uncorrected hardware memory error in user-access at
+200c9742380
+[ 3786.042259] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+[ 3786.464293] mce: Uncorrected hardware memory error in user-access at
+200c9742380
+[ 3786.464423] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+[ 3786.818090] mce: Uncorrected hardware memory error in user-access at
+200c9742380
+[ 3786.818217] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+[ 3787.085297] mce: Uncorrected hardware memory error in user-access at
+200c9742380
+[ 3787.085424] Memory failure: 0x200c9742: recovery action for dax page:
+Recovered
+
+It took us several weeks to pinpoint this problem,  but we eventually
+used bpftrace to trace the page fault and mce address and successfully
+identified the issue.
+
+
+Joao added:
+
+; Likely we never reproduce in production because we always pin
+: device-dax regions in the region align they provide (Qemu does
+: similarly with prealloc in hugetlb/file backed memory).  I think this
+: bug requires that we touch *unpinned* device-dax regions unaligned to
+: the device-dax selected alignment (page size i.e.  4K/2M/1G)
+
+Link: https://lkml.kernel.org/r/23c02a03e8d666fef11bbe13e85c69c8b4ca0624.1727421694.git.llfl@linux.alibaba.com
+Fixes: b9b5777f09be ("device-dax: use ALIGN() for determining pgoff")
+Signed-off-by: Kun(llfl) <llfl@linux.alibaba.com>
+Tested-by: JianXiong Zhao <zhaojianxiong.zjx@alibaba-inc.com>
+Reviewed-by: Joao Martins <joao.m.martins@oracle.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/dax/device.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/dax/device.c b/drivers/dax/device.c
+index 9c1a729cd77e..6d74e62bbee0 100644
+--- a/drivers/dax/device.c
++++ b/drivers/dax/device.c
+@@ -86,7 +86,7 @@ static void dax_set_mapping(struct vm_fault *vmf, pfn_t pfn,
+               nr_pages = 1;
+ 
+       pgoff = linear_page_index(vmf->vma,
+-                      ALIGN(vmf->address, fault_size));
++                      ALIGN_DOWN(vmf->address, fault_size));
+ 
+       for (i = 0; i < nr_pages; i++) {
+               struct page *page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
+-- 
+2.47.0
+
diff --git a/queue-6.1/drm-v3d-stop-the-active-perfmon-before-being-destroyed.patch b/queue-6.1/drm-v3d-stop-the-active-perfmon-before-being-destroyed.patch

new file mode 100644 (file)

index 0000000..88ca7b0
--- /dev/null
+++ b/queue-6.1/drm-v3d-stop-the-active-perfmon-before-being-destroyed.patch
@@ -0,0 +1,113 @@
+From 7d1fd3638ee3a9f9bca4785fffb638ca19120718 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
+Date: Fri, 4 Oct 2024 10:02:29 -0300
+Subject: drm/v3d: Stop the active perfmon before being destroyed
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Maíra Canal <mcanal@igalia.com>
+
+commit 7d1fd3638ee3a9f9bca4785fffb638ca19120718 upstream.
+
+When running `kmscube` with one or more performance monitors enabled
+via `GALLIUM_HUD`, the following kernel panic can occur:
+
+[   55.008324] Unable to handle kernel paging request at virtual address 00000000052004a4
+[   55.008368] Mem abort info:
+[   55.008377]   ESR = 0x0000000096000005
+[   55.008387]   EC = 0x25: DABT (current EL), IL = 32 bits
+[   55.008402]   SET = 0, FnV = 0
+[   55.008412]   EA = 0, S1PTW = 0
+[   55.008421]   FSC = 0x05: level 1 translation fault
+[   55.008434] Data abort info:
+[   55.008442]   ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000
+[   55.008455]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
+[   55.008467]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
+[   55.008481] user pgtable: 4k pages, 39-bit VAs, pgdp=00000001046c6000
+[   55.008497] [00000000052004a4] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000
+[   55.008525] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP
+[   55.008542] Modules linked in: rfcomm [...] vc4 v3d snd_soc_hdmi_codec drm_display_helper
+gpu_sched drm_shmem_helper cec drm_dma_helper drm_kms_helper i2c_brcmstb
+drm drm_panel_orientation_quirks snd_soc_core snd_compress snd_pcm_dmaengine snd_pcm snd_timer snd backlight
+[   55.008799] CPU: 2 PID: 166 Comm: v3d_bin Tainted: G         C         6.6.47+rpt-rpi-v8 #1  Debian 1:6.6.47-1+rpt1
+[   55.008824] Hardware name: Raspberry Pi 4 Model B Rev 1.5 (DT)
+[   55.008838] pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+[   55.008855] pc : __mutex_lock.constprop.0+0x90/0x608
+[   55.008879] lr : __mutex_lock.constprop.0+0x58/0x608
+[   55.008895] sp : ffffffc080673cf0
+[   55.008904] x29: ffffffc080673cf0 x28: 0000000000000000 x27: ffffff8106188a28
+[   55.008926] x26: ffffff8101e78040 x25: ffffff8101baa6c0 x24: ffffffd9d989f148
+[   55.008947] x23: ffffffda1c2a4008 x22: 0000000000000002 x21: ffffffc080673d38
+[   55.008968] x20: ffffff8101238000 x19: ffffff8104f83188 x18: 0000000000000000
+[   55.008988] x17: 0000000000000000 x16: ffffffda1bd04d18 x15: 00000055bb08bc90
+[   55.009715] x14: 0000000000000000 x13: 0000000000000000 x12: ffffffda1bd4cbb0
+[   55.010433] x11: 00000000fa83b2da x10: 0000000000001a40 x9 : ffffffda1bd04d04
+[   55.011162] x8 : ffffff8102097b80 x7 : 0000000000000000 x6 : 00000000030a5857
+[   55.011880] x5 : 00ffffffffffffff x4 : 0300000005200470 x3 : 0300000005200470
+[   55.012598] x2 : ffffff8101238000 x1 : 0000000000000021 x0 : 0300000005200470
+[   55.013292] Call trace:
+[   55.013959]  __mutex_lock.constprop.0+0x90/0x608
+[   55.014646]  __mutex_lock_slowpath+0x1c/0x30
+[   55.015317]  mutex_lock+0x50/0x68
+[   55.015961]  v3d_perfmon_stop+0x40/0xe0 [v3d]
+[   55.016627]  v3d_bin_job_run+0x10c/0x2d8 [v3d]
+[   55.017282]  drm_sched_main+0x178/0x3f8 [gpu_sched]
+[   55.017921]  kthread+0x11c/0x128
+[   55.018554]  ret_from_fork+0x10/0x20
+[   55.019168] Code: f9400260 f1001c1f 54001ea9 927df000 (b9403401)
+[   55.019776] ---[ end trace 0000000000000000 ]---
+[   55.020411] note: v3d_bin[166] exited with preempt_count 1
+
+This issue arises because, upon closing the file descriptor (which happens
+when we interrupt `kmscube`), the active performance monitor is not
+stopped. Although all perfmons are destroyed in `v3d_perfmon_close_file()`,
+the active performance monitor's pointer (`v3d->active_perfmon`) is still
+retained.
+
+If `kmscube` is run again, the driver will attempt to stop the active
+performance monitor using the stale pointer in `v3d->active_perfmon`.
+However, this pointer is no longer valid because the previous process has
+already terminated, and all performance monitors associated with it have
+been destroyed and freed.
+
+To fix this, when the active performance monitor belongs to a given
+process, explicitly stop it before destroying and freeing it.
+
+Cc: stable@vger.kernel.org # v5.15+
+Closes: https://github.com/raspberrypi/linux/issues/6389
+Fixes: 26a4dc29b74a ("drm/v3d: Expose performance counters to userspace")
+Signed-off-by: Maíra Canal <mcanal@igalia.com>
+Reviewed-by: Juan A. Suarez <jasuarez@igalia.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241004130625.918580-2-mcanal@igalia.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/v3d/v3d_perfmon.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/v3d/v3d_perfmon.c
++++ b/drivers/gpu/drm/v3d/v3d_perfmon.c
+@@ -101,6 +101,11 @@ void v3d_perfmon_open_file(struct v3d_fi
+ static int v3d_perfmon_idr_del(int id, void *elem, void *data)
+ {
+       struct v3d_perfmon *perfmon = elem;
++      struct v3d_dev *v3d = (struct v3d_dev *)data;
++
++      /* If the active perfmon is being destroyed, stop it first */
++      if (perfmon == v3d->active_perfmon)
++              v3d_perfmon_stop(v3d, perfmon, false);
+ 
+       v3d_perfmon_put(perfmon);
+ 
+@@ -109,8 +114,10 @@ static int v3d_perfmon_idr_del(int id, v
+ 
+ void v3d_perfmon_close_file(struct v3d_file_priv *v3d_priv)
+ {
++      struct v3d_dev *v3d = v3d_priv->v3d;
++
+       mutex_lock(&v3d_priv->perfmon.lock);
+-      idr_for_each(&v3d_priv->perfmon.idr, v3d_perfmon_idr_del, NULL);
++      idr_for_each(&v3d_priv->perfmon.idr, v3d_perfmon_idr_del, v3d);
+       idr_destroy(&v3d_priv->perfmon.idr);
+       mutex_unlock(&v3d_priv->perfmon.lock);
+ }
diff --git a/queue-6.1/drm-vc4-stop-the-active-perfmon-before-being-destroyed.patch b/queue-6.1/drm-vc4-stop-the-active-perfmon-before-being-destroyed.patch

new file mode 100644 (file)

index 0000000..c0ae9bd
--- /dev/null
+++ b/queue-6.1/drm-vc4-stop-the-active-perfmon-before-being-destroyed.patch
@@ -0,0 +1,62 @@
+From 0b2ad4f6f2bec74a5287d96cb2325a5e11706f22 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
+Date: Fri, 4 Oct 2024 09:36:00 -0300
+Subject: drm/vc4: Stop the active perfmon before being destroyed
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Maíra Canal <mcanal@igalia.com>
+
+commit 0b2ad4f6f2bec74a5287d96cb2325a5e11706f22 upstream.
+
+Upon closing the file descriptor, the active performance monitor is not
+stopped. Although all perfmons are destroyed in `vc4_perfmon_close_file()`,
+the active performance monitor's pointer (`vc4->active_perfmon`) is still
+retained.
+
+If we open a new file descriptor and submit a few jobs with performance
+monitors, the driver will attempt to stop the active performance monitor
+using the stale pointer in `vc4->active_perfmon`. However, this pointer
+is no longer valid because the previous process has already terminated,
+and all performance monitors associated with it have been destroyed and
+freed.
+
+To fix this, when the active performance monitor belongs to a given
+process, explicitly stop it before destroying and freeing it.
+
+Cc: stable@vger.kernel.org # v4.17+
+Cc: Boris Brezillon <bbrezillon@kernel.org>
+Cc: Juan A. Suarez Romero <jasuarez@igalia.com>
+Fixes: 65101d8c9108 ("drm/vc4: Expose performance counters to userspace")
+Signed-off-by: Maíra Canal <mcanal@igalia.com>
+Reviewed-by: Juan A. Suarez <jasuarez@igalia.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241004123817.890016-2-mcanal@igalia.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/vc4/vc4_perfmon.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/vc4/vc4_perfmon.c
++++ b/drivers/gpu/drm/vc4/vc4_perfmon.c
+@@ -116,6 +116,11 @@ void vc4_perfmon_open_file(struct vc4_fi
+ static int vc4_perfmon_idr_del(int id, void *elem, void *data)
+ {
+       struct vc4_perfmon *perfmon = elem;
++      struct vc4_dev *vc4 = (struct vc4_dev *)data;
++
++      /* If the active perfmon is being destroyed, stop it first */
++      if (perfmon == vc4->active_perfmon)
++              vc4_perfmon_stop(vc4, perfmon, false);
+ 
+       vc4_perfmon_put(perfmon);
+ 
+@@ -130,7 +135,7 @@ void vc4_perfmon_close_file(struct vc4_f
+               return;
+ 
+       mutex_lock(&vc4file->perfmon.lock);
+-      idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, NULL);
++      idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, vc4);
+       idr_destroy(&vc4file->perfmon.idr);
+       mutex_unlock(&vc4file->perfmon.lock);
+       mutex_destroy(&vc4file->perfmon.lock);
diff --git a/queue-6.1/kthread-unpark-only-parked-kthread.patch b/queue-6.1/kthread-unpark-only-parked-kthread.patch

new file mode 100644 (file)

index 0000000..4da762c
--- /dev/null
+++ b/queue-6.1/kthread-unpark-only-parked-kthread.patch
@@ -0,0 +1,65 @@
+From 214e01ad4ed7158cab66498810094fac5d09b218 Mon Sep 17 00:00:00 2001
+From: Frederic Weisbecker <frederic@kernel.org>
+Date: Fri, 13 Sep 2024 23:46:34 +0200
+Subject: kthread: unpark only parked kthread
+
+From: Frederic Weisbecker <frederic@kernel.org>
+
+commit 214e01ad4ed7158cab66498810094fac5d09b218 upstream.
+
+Calling into kthread unparking unconditionally is mostly harmless when
+the kthread is already unparked. The wake up is then simply ignored
+because the target is not in TASK_PARKED state.
+
+However if the kthread is per CPU, the wake up is preceded by a call
+to kthread_bind() which expects the task to be inactive and in
+TASK_PARKED state, which obviously isn't the case if it is unparked.
+
+As a result, calling kthread_stop() on an unparked per-cpu kthread
+triggers such a warning:
+
+       WARNING: CPU: 0 PID: 11 at kernel/kthread.c:525 __kthread_bind_mask kernel/kthread.c:525
+        <TASK>
+        kthread_stop+0x17a/0x630 kernel/kthread.c:707
+        destroy_workqueue+0x136/0xc40 kernel/workqueue.c:5810
+        wg_destruct+0x1e2/0x2e0 drivers/net/wireguard/device.c:257
+        netdev_run_todo+0xe1a/0x1000 net/core/dev.c:10693
+        default_device_exit_batch+0xa14/0xa90 net/core/dev.c:11769
+        ops_exit_list net/core/net_namespace.c:178 [inline]
+        cleanup_net+0x89d/0xcc0 net/core/net_namespace.c:640
+        process_one_work kernel/workqueue.c:3231 [inline]
+        process_scheduled_works+0xa2c/0x1830 kernel/workqueue.c:3312
+        worker_thread+0x86d/0xd70 kernel/workqueue.c:3393
+        kthread+0x2f0/0x390 kernel/kthread.c:389
+        ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147
+        ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244
+        </TASK>
+
+Fix this with skipping unecessary unparking while stopping a kthread.
+
+Link: https://lkml.kernel.org/r/20240913214634.12557-1-frederic@kernel.org
+Fixes: 5c25b5ff89f0 ("workqueue: Tag bound workers with KTHREAD_IS_PER_CPU")
+Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
+Reported-by: syzbot+943d34fa3cf2191e3068@syzkaller.appspotmail.com
+Tested-by: syzbot+943d34fa3cf2191e3068@syzkaller.appspotmail.com
+Suggested-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Hillf Danton <hdanton@sina.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/kthread.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/kernel/kthread.c
++++ b/kernel/kthread.c
+@@ -622,6 +622,8 @@ void kthread_unpark(struct task_struct *
+ {
+       struct kthread *kthread = to_kthread(k);
+ 
++      if (!test_bit(KTHREAD_SHOULD_PARK, &kthread->flags))
++              return;
+       /*
+        * Newly created kthread was parked when the CPU was offline.
+        * The binding was lost and we need to set it again.
diff --git a/queue-6.1/mptcp-fallback-when-mptcp-opts-are-dropped-after-1st-data.patch b/queue-6.1/mptcp-fallback-when-mptcp-opts-are-dropped-after-1st-data.patch

new file mode 100644 (file)

index 0000000..44d61a6
--- /dev/null
+++ b/queue-6.1/mptcp-fallback-when-mptcp-opts-are-dropped-after-1st-data.patch
@@ -0,0 +1,85 @@
+From 119d51e225febc8152476340a880f5415a01e99e Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Tue, 8 Oct 2024 13:04:54 +0200
+Subject: mptcp: fallback when MPTCP opts are dropped after 1st data
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit 119d51e225febc8152476340a880f5415a01e99e upstream.
+
+As reported by Christoph [1], before this patch, an MPTCP connection was
+wrongly reset when a host received a first data packet with MPTCP
+options after the 3wHS, but got the next ones without.
+
+According to the MPTCP v1 specs [2], a fallback should happen in this
+case, because the host didn't receive a DATA_ACK from the other peer,
+nor receive data for more than the initial window which implies a
+DATA_ACK being received by the other peer.
+
+The patch here re-uses the same logic as the one used in other places:
+by looking at allow_infinite_fallback, which is disabled at the creation
+of an additional subflow. It's not looking at the first DATA_ACK (or
+implying one received from the other side) as suggested by the RFC, but
+it is in continuation with what was already done, which is safer, and it
+fixes the reported issue. The next step, looking at this first DATA_ACK,
+is tracked in [4].
+
+This patch has been validated using the following Packetdrill script:
+
+   0 socket(..., SOCK_STREAM, IPPROTO_MPTCP) = 3
+  +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+  +0 bind(3, ..., ...) = 0
+  +0 listen(3, 1) = 0
+
+  // 3WHS is OK
+  +0.0 < S  0:0(0)       win 65535  <mss 1460, sackOK, nop, nop, nop, wscale 6, mpcapable v1 flags[flag_h] nokey>
+  +0.0 > S. 0:0(0) ack 1            <mss 1460, nop, nop, sackOK, nop, wscale 8, mpcapable v1 flags[flag_h] key[skey]>
+  +0.1 <  . 1:1(0) ack 1 win 2048                                              <mpcapable v1 flags[flag_h] key[ckey=2, skey]>
+  +0 accept(3, ..., ...) = 4
+
+  // Data from the client with valid MPTCP options (no DATA_ACK: normal)
+  +0.1 < P. 1:501(500) ack 1 win 2048 <mpcapable v1 flags[flag_h] key[skey, ckey] mpcdatalen 500, nop, nop>
+  // From here, the MPTCP options will be dropped by a middlebox
+  +0.0 >  . 1:1(0)     ack 501        <dss dack8=501 dll=0 nocs>
+
+  +0.1 read(4, ..., 500) = 500
+  +0   write(4, ..., 100) = 100
+
+  // The server replies with data, still thinking MPTCP is being used
+  +0.0 > P. 1:101(100)   ack 501          <dss dack8=501 dsn8=1 ssn=1 dll=100 nocs, nop, nop>
+  // But the client already did a fallback to TCP, because the two previous packets have been received without MPTCP options
+  +0.1 <  . 501:501(0)   ack 101 win 2048
+
+  +0.0 < P. 501:601(100) ack 101 win 2048
+  // The server should fallback to TCP, not reset: it didn't get a DATA_ACK, nor data for more than the initial window
+  +0.0 >  . 101:101(0)   ack 601
+
+Note that this script requires Packetdrill with MPTCP support, see [3].
+
+Fixes: dea2b1ea9c70 ("mptcp: do not reset MP_CAPABLE subflow on mapping errors")
+Cc: stable@vger.kernel.org
+Reported-by: Christoph Paasch <cpaasch@apple.com>
+Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/518 [1]
+Link: https://datatracker.ietf.org/doc/html/rfc8684#name-fallback [2]
+Link: https://github.com/multipath-tcp/packetdrill [3]
+Link: https://github.com/multipath-tcp/mptcp_net-next/issues/519 [4]
+Reviewed-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20241008-net-mptcp-fallback-fixes-v1-3-c6fb8e93e551@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/subflow.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -1158,7 +1158,7 @@ static bool subflow_can_fallback(struct
+       else if (READ_ONCE(msk->csum_enabled))
+               return !subflow->valid_csum_seen;
+       else
+-              return !subflow->fully_established;
++              return READ_ONCE(msk->allow_infinite_fallback);
+ }
+ 
+ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
diff --git a/queue-6.1/mptcp-handle-consistently-dss-corruption.patch b/queue-6.1/mptcp-handle-consistently-dss-corruption.patch

new file mode 100644 (file)

index 0000000..946ec15
--- /dev/null
+++ b/queue-6.1/mptcp-handle-consistently-dss-corruption.patch
@@ -0,0 +1,107 @@
+From e32d262c89e2b22cb0640223f953b548617ed8a6 Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Tue, 8 Oct 2024 13:04:52 +0200
+Subject: mptcp: handle consistently DSS corruption
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit e32d262c89e2b22cb0640223f953b548617ed8a6 upstream.
+
+Bugged peer implementation can send corrupted DSS options, consistently
+hitting a few warning in the data path. Use DEBUG_NET assertions, to
+avoid the splat on some builds and handle consistently the error, dumping
+related MIBs and performing fallback and/or reset according to the
+subflow type.
+
+Fixes: 6771bfd9ee24 ("mptcp: update mptcp ack sequence from work queue")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20241008-net-mptcp-fallback-fixes-v1-1-c6fb8e93e551@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/mib.c      |    2 ++
+ net/mptcp/mib.h      |    2 ++
+ net/mptcp/protocol.c |   24 +++++++++++++++++++++---
+ net/mptcp/subflow.c  |    4 +++-
+ 4 files changed, 28 insertions(+), 4 deletions(-)
+
+--- a/net/mptcp/mib.c
++++ b/net/mptcp/mib.c
+@@ -26,6 +26,8 @@ static const struct snmp_mib mptcp_snmp_
+       SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX),
+       SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
+       SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
++      SNMP_MIB_ITEM("DSSCorruptionFallback", MPTCP_MIB_DSSCORRUPTIONFALLBACK),
++      SNMP_MIB_ITEM("DSSCorruptionReset", MPTCP_MIB_DSSCORRUPTIONRESET),
+       SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX),
+       SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
+       SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH),
+--- a/net/mptcp/mib.h
++++ b/net/mptcp/mib.h
+@@ -19,6 +19,8 @@ enum linux_mptcp_mib_field {
+       MPTCP_MIB_JOINACKRX,            /* Received an ACK + MP_JOIN */
+       MPTCP_MIB_JOINACKMAC,           /* HMAC was wrong on ACK + MP_JOIN */
+       MPTCP_MIB_DSSNOMATCH,           /* Received a new mapping that did not match the previous one */
++      MPTCP_MIB_DSSCORRUPTIONFALLBACK,/* DSS corruption detected, fallback */
++      MPTCP_MIB_DSSCORRUPTIONRESET,   /* DSS corruption detected, MPJ subflow reset */
+       MPTCP_MIB_INFINITEMAPTX,        /* Sent an infinite mapping */
+       MPTCP_MIB_INFINITEMAPRX,        /* Received an infinite mapping */
+       MPTCP_MIB_DSSTCPMISMATCH,       /* DSS-mapping did not map with TCP's sequence numbers */
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -630,6 +630,18 @@ static bool mptcp_check_data_fin(struct
+       return ret;
+ }
+ 
++static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk)
++{
++      if (READ_ONCE(msk->allow_infinite_fallback)) {
++              MPTCP_INC_STATS(sock_net(ssk),
++                              MPTCP_MIB_DSSCORRUPTIONFALLBACK);
++              mptcp_do_fallback(ssk);
++      } else {
++              MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCORRUPTIONRESET);
++              mptcp_subflow_reset(ssk);
++      }
++}
++
+ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
+                                          struct sock *ssk,
+                                          unsigned int *bytes)
+@@ -702,10 +714,16 @@ static bool __mptcp_move_skbs_from_subfl
+                               moved += len;
+                       seq += len;
+ 
+-                      if (WARN_ON_ONCE(map_remaining < len))
+-                              break;
++                      if (unlikely(map_remaining < len)) {
++                              DEBUG_NET_WARN_ON_ONCE(1);
++                              mptcp_dss_corruption(msk, ssk);
++                      }
+               } else {
+-                      WARN_ON_ONCE(!fin);
++                      if (unlikely(!fin)) {
++                              DEBUG_NET_WARN_ON_ONCE(1);
++                              mptcp_dss_corruption(msk, ssk);
++                      }
++
+                       sk_eat_skb(ssk, skb);
+                       done = true;
+               }
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -855,8 +855,10 @@ static bool skb_is_fully_mapped(struct s
+       unsigned int skb_consumed;
+ 
+       skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq;
+-      if (WARN_ON_ONCE(skb_consumed >= skb->len))
++      if (unlikely(skb_consumed >= skb->len)) {
++              DEBUG_NET_WARN_ON_ONCE(1);
+               return true;
++      }
+ 
+       return skb->len - skb_consumed <= subflow->map_data_len -
+                                         mptcp_subflow_get_map_offset(subflow);
diff --git a/queue-6.1/mptcp-pm-do-not-remove-closing-subflows.patch b/queue-6.1/mptcp-pm-do-not-remove-closing-subflows.patch

new file mode 100644 (file)

index 0000000..4a97524
--- /dev/null
+++ b/queue-6.1/mptcp-pm-do-not-remove-closing-subflows.patch
@@ -0,0 +1,41 @@
+From db0a37b7ac27d8ca27d3dc676a16d081c16ec7b9 Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Tue, 8 Oct 2024 13:04:55 +0200
+Subject: mptcp: pm: do not remove closing subflows
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit db0a37b7ac27d8ca27d3dc676a16d081c16ec7b9 upstream.
+
+In a previous fix, the in-kernel path-manager has been modified not to
+retrigger the removal of a subflow if it was already closed, e.g. when
+the initial subflow is removed, but kept in the subflows list.
+
+To be complete, this fix should also skip the subflows that are in any
+closing state: mptcp_close_ssk() will initiate the closure, but the
+switch to the TCP_CLOSE state depends on the other peer.
+
+Fixes: 58e1b66b4e4b ("mptcp: pm: do not remove already closed subflows")
+Cc: stable@vger.kernel.org
+Suggested-by: Paolo Abeni <pabeni@redhat.com>
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20241008-net-mptcp-fallback-fixes-v1-4-c6fb8e93e551@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/pm_netlink.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/mptcp/pm_netlink.c
++++ b/net/mptcp/pm_netlink.c
+@@ -864,7 +864,8 @@ static void mptcp_pm_nl_rm_addr_or_subfl
+                       int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
+                       u8 id = subflow_get_local_id(subflow);
+ 
+-                      if (inet_sk_state_load(ssk) == TCP_CLOSE)
++                      if ((1 << inet_sk_state_load(ssk)) &
++                          (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING | TCPF_CLOSE))
+                               continue;
+                       if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id)
+                               continue;
diff --git a/queue-6.1/net-dsa-lan9303-ensure-chip-reset-and-wait-for-ready-status.patch b/queue-6.1/net-dsa-lan9303-ensure-chip-reset-and-wait-for-ready-status.patch

new file mode 100644 (file)

index 0000000..092b20d
--- /dev/null
+++ b/queue-6.1/net-dsa-lan9303-ensure-chip-reset-and-wait-for-ready-status.patch
@@ -0,0 +1,83 @@
+From 5c14e51d2d7df49fe0d4e64a12c58d2542f452ff Mon Sep 17 00:00:00 2001
+From: Anatolij Gustschin <agust@denx.de>
+Date: Fri, 4 Oct 2024 13:36:54 +0200
+Subject: net: dsa: lan9303: ensure chip reset and wait for READY status
+
+From: Anatolij Gustschin <agust@denx.de>
+
+commit 5c14e51d2d7df49fe0d4e64a12c58d2542f452ff upstream.
+
+Accessing device registers seems to be not reliable, the chip
+revision is sometimes detected wrongly (0 instead of expected 1).
+
+Ensure that the chip reset is performed via reset GPIO and then
+wait for 'Device Ready' status in HW_CFG register before doing
+any register initializations.
+
+Cc: stable@vger.kernel.org
+Fixes: a1292595e006 ("net: dsa: add new DSA switch driver for the SMSC-LAN9303")
+Signed-off-by: Anatolij Gustschin <agust@denx.de>
+[alex: reworked using read_poll_timeout()]
+Signed-off-by: Alexander Sverdlin <alexander.sverdlin@siemens.com>
+Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
+Link: https://patch.msgid.link/20241004113655.3436296-1-alexander.sverdlin@siemens.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/lan9303-core.c |   29 +++++++++++++++++++++++++++++
+ 1 file changed, 29 insertions(+)
+
+--- a/drivers/net/dsa/lan9303-core.c
++++ b/drivers/net/dsa/lan9303-core.c
+@@ -6,6 +6,7 @@
+ #include <linux/module.h>
+ #include <linux/gpio/consumer.h>
+ #include <linux/regmap.h>
++#include <linux/iopoll.h>
+ #include <linux/mutex.h>
+ #include <linux/mii.h>
+ #include <linux/phy.h>
+@@ -825,6 +826,8 @@ static void lan9303_handle_reset(struct
+       if (!chip->reset_gpio)
+               return;
+ 
++      gpiod_set_value_cansleep(chip->reset_gpio, 1);
++
+       if (chip->reset_duration != 0)
+               msleep(chip->reset_duration);
+ 
+@@ -850,8 +853,34 @@ static int lan9303_disable_processing(st
+ static int lan9303_check_device(struct lan9303 *chip)
+ {
+       int ret;
++      int err;
+       u32 reg;
+ 
++      /* In I2C-managed configurations this polling loop will clash with
++       * switch's reading of EEPROM right after reset and this behaviour is
++       * not configurable. While lan9303_read() already has quite long retry
++       * timeout, seems not all cases are being detected as arbitration error.
++       *
++       * According to datasheet, EEPROM loader has 30ms timeout (in case of
++       * missing EEPROM).
++       *
++       * Loading of the largest supported EEPROM is expected to take at least
++       * 5.9s.
++       */
++      err = read_poll_timeout(lan9303_read, ret,
++                              !ret && reg & LAN9303_HW_CFG_READY,
++                              20000, 6000000, false,
++                              chip->regmap, LAN9303_HW_CFG, &reg);
++      if (ret) {
++              dev_err(chip->dev, "failed to read HW_CFG reg: %pe\n",
++                      ERR_PTR(ret));
++              return ret;
++      }
++      if (err) {
++              dev_err(chip->dev, "HW_CFG not ready: 0x%08x\n", reg);
++              return err;
++      }
++
+       ret = lan9303_read(chip->regmap, LAN9303_CHIP_REV, &reg);
+       if (ret) {
+               dev_err(chip->dev, "failed to read chip revision register: %d\n",
diff --git a/queue-6.1/net-explicitly-clear-the-sk-pointer-when-pf-create-fails.patch b/queue-6.1/net-explicitly-clear-the-sk-pointer-when-pf-create-fails.patch

new file mode 100644 (file)

index 0000000..bd62554
--- /dev/null
+++ b/queue-6.1/net-explicitly-clear-the-sk-pointer-when-pf-create-fails.patch
@@ -0,0 +1,56 @@
+From 631083143315d1b192bd7d915b967b37819e88ea Mon Sep 17 00:00:00 2001
+From: Ignat Korchagin <ignat@cloudflare.com>
+Date: Thu, 3 Oct 2024 18:01:51 +0100
+Subject: net: explicitly clear the sk pointer, when pf->create fails
+
+From: Ignat Korchagin <ignat@cloudflare.com>
+
+commit 631083143315d1b192bd7d915b967b37819e88ea upstream.
+
+We have recently noticed the exact same KASAN splat as in commit
+6cd4a78d962b ("net: do not leave a dangling sk pointer, when socket
+creation fails"). The problem is that commit did not fully address the
+problem, as some pf->create implementations do not use sk_common_release
+in their error paths.
+
+For example, we can use the same reproducer as in the above commit, but
+changing ping to arping. arping uses AF_PACKET socket and if packet_create
+fails, it will just sk_free the allocated sk object.
+
+While we could chase all the pf->create implementations and make sure they
+NULL the freed sk object on error from the socket, we can't guarantee
+future protocols will not make the same mistake.
+
+So it is easier to just explicitly NULL the sk pointer upon return from
+pf->create in __sock_create. We do know that pf->create always releases the
+allocated sk object on error, so if the pointer is not NULL, it is
+definitely dangling.
+
+Fixes: 6cd4a78d962b ("net: do not leave a dangling sk pointer, when socket creation fails")
+Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20241003170151.69445-1-ignat@cloudflare.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/socket.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -1548,8 +1548,13 @@ int __sock_create(struct net *net, int f
+       rcu_read_unlock();
+ 
+       err = pf->create(net, sock, protocol, kern);
+-      if (err < 0)
++      if (err < 0) {
++              /* ->create should release the allocated sock->sk object on error
++               * but it may leave the dangling pointer
++               */
++              sock->sk = NULL;
+               goto out_module_put;
++      }
+ 
+       /*
+        * Now to bump the refcnt of the [loadable] module that owns this
diff --git a/queue-6.1/net-fix-an-unsafe-loop-on-the-list.patch b/queue-6.1/net-fix-an-unsafe-loop-on-the-list.patch

new file mode 100644 (file)

index 0000000..fae368d
--- /dev/null
+++ b/queue-6.1/net-fix-an-unsafe-loop-on-the-list.patch
@@ -0,0 +1,60 @@
+From 1dae9f1187189bc09ff6d25ca97ead711f7e26f9 Mon Sep 17 00:00:00 2001
+From: Anastasia Kovaleva <a.kovaleva@yadro.com>
+Date: Thu, 3 Oct 2024 13:44:31 +0300
+Subject: net: Fix an unsafe loop on the list
+
+From: Anastasia Kovaleva <a.kovaleva@yadro.com>
+
+commit 1dae9f1187189bc09ff6d25ca97ead711f7e26f9 upstream.
+
+The kernel may crash when deleting a genetlink family if there are still
+listeners for that family:
+
+Oops: Kernel access of bad area, sig: 11 [#1]
+  ...
+  NIP [c000000000c080bc] netlink_update_socket_mc+0x3c/0xc0
+  LR [c000000000c0f764] __netlink_clear_multicast_users+0x74/0xc0
+  Call Trace:
+__netlink_clear_multicast_users+0x74/0xc0
+genl_unregister_family+0xd4/0x2d0
+
+Change the unsafe loop on the list to a safe one, because inside the
+loop there is an element removal from this list.
+
+Fixes: b8273570f802 ("genetlink: fix netns vs. netlink table locking (2)")
+Cc: stable@vger.kernel.org
+Signed-off-by: Anastasia Kovaleva <a.kovaleva@yadro.com>
+Reviewed-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20241003104431.12391-1-a.kovaleva@yadro.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sock.h       |    2 ++
+ net/netlink/af_netlink.c |    3 ++-
+ 2 files changed, 4 insertions(+), 1 deletion(-)
+
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -898,6 +898,8 @@ static inline void sk_add_bind2_node(str
+       hlist_for_each_entry(__sk, list, sk_bind_node)
+ #define sk_for_each_bound_bhash2(__sk, list) \
+       hlist_for_each_entry(__sk, list, sk_bind2_node)
++#define sk_for_each_bound_safe(__sk, tmp, list) \
++      hlist_for_each_entry_safe(__sk, tmp, list, sk_bind_node)
+ 
+ /**
+  * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -2112,8 +2112,9 @@ void __netlink_clear_multicast_users(str
+ {
+       struct sock *sk;
+       struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
++      struct hlist_node *tmp;
+ 
+-      sk_for_each_bound(sk, &tbl->mc_list)
++      sk_for_each_bound_safe(sk, tmp, &tbl->mc_list)
+               netlink_update_socket_mc(nlk_sk(sk), group, 0);
+ }
+ 
diff --git a/queue-6.1/nouveau-dmem-fix-vulnerability-in-migrate_to_ram-upon-copy-error.patch b/queue-6.1/nouveau-dmem-fix-vulnerability-in-migrate_to_ram-upon-copy-error.patch

new file mode 100644 (file)

index 0000000..b8590ab
--- /dev/null
+++ b/queue-6.1/nouveau-dmem-fix-vulnerability-in-migrate_to_ram-upon-copy-error.patch
@@ -0,0 +1,48 @@
+From 835745a377a4519decd1a36d6b926e369b3033e2 Mon Sep 17 00:00:00 2001
+From: Yonatan Maman <Ymaman@Nvidia.com>
+Date: Tue, 8 Oct 2024 14:59:43 +0300
+Subject: nouveau/dmem: Fix vulnerability in migrate_to_ram upon copy error
+
+From: Yonatan Maman <Ymaman@Nvidia.com>
+
+commit 835745a377a4519decd1a36d6b926e369b3033e2 upstream.
+
+The `nouveau_dmem_copy_one` function ensures that the copy push command is
+sent to the device firmware but does not track whether it was executed
+successfully.
+
+In the case of a copy error (e.g., firmware or hardware failure), the
+copy push command will be sent via the firmware channel, and
+`nouveau_dmem_copy_one` will likely report success, leading to the
+`migrate_to_ram` function returning a dirty HIGH_USER page to the user.
+
+This can result in a security vulnerability, as a HIGH_USER page that may
+contain sensitive or corrupted data could be returned to the user.
+
+To prevent this vulnerability, we allocate a zero page. Thus, in case of
+an error, a non-dirty (zero) page will be returned to the user.
+
+Fixes: 5be73b690875 ("drm/nouveau/dmem: device memory helpers for SVM")
+Signed-off-by: Yonatan Maman <Ymaman@Nvidia.com>
+Co-developed-by: Gal Shalom <GalShalom@Nvidia.com>
+Signed-off-by: Gal Shalom <GalShalom@Nvidia.com>
+Reviewed-by: Ben Skeggs <bskeggs@nvidia.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Danilo Krummrich <dakr@kernel.org>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241008115943.990286-3-ymaman@nvidia.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/nouveau/nouveau_dmem.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
++++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
+@@ -194,7 +194,7 @@ static vm_fault_t nouveau_dmem_migrate_t
+       if (!spage || !(src & MIGRATE_PFN_MIGRATE))
+               goto done;
+ 
+-      dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address);
++      dpage = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vmf->vma, vmf->address);
+       if (!dpage)
+               goto done;
+ 
diff --git a/queue-6.1/scsi-wd33c93-don-t-use-stale-scsi_pointer-value.patch b/queue-6.1/scsi-wd33c93-don-t-use-stale-scsi_pointer-value.patch

new file mode 100644 (file)

index 0000000..389dfed
--- /dev/null
+++ b/queue-6.1/scsi-wd33c93-don-t-use-stale-scsi_pointer-value.patch
@@ -0,0 +1,43 @@
+From 9023ed8d91eb1fcc93e64dc4962f7412b1c4cbec Mon Sep 17 00:00:00 2001
+From: Daniel Palmer <daniel@0x0f.com>
+Date: Thu, 3 Oct 2024 13:29:47 +1000
+Subject: scsi: wd33c93: Don't use stale scsi_pointer value
+
+From: Daniel Palmer <daniel@0x0f.com>
+
+commit 9023ed8d91eb1fcc93e64dc4962f7412b1c4cbec upstream.
+
+A regression was introduced with commit dbb2da557a6a ("scsi: wd33c93:
+Move the SCSI pointer to private command data") which results in an oops
+in wd33c93_intr(). That commit added the scsi_pointer variable and
+initialized it from hostdata->connected. However, during selection,
+hostdata->connected is not yet valid. Fix this by getting the current
+scsi_pointer from hostdata->selecting.
+
+Cc: Daniel Palmer <daniel@0x0f.com>
+Cc: Michael Schmitz <schmitzmic@gmail.com>
+Cc: stable@kernel.org
+Fixes: dbb2da557a6a ("scsi: wd33c93: Move the SCSI pointer to private command data")
+Signed-off-by: Daniel Palmer <daniel@0x0f.com>
+Co-developed-by: Finn Thain <fthain@linux-m68k.org>
+Signed-off-by: Finn Thain <fthain@linux-m68k.org>
+Link: https://lore.kernel.org/r/09e11a0a54e6aa2a88bd214526d305aaf018f523.1727926187.git.fthain@linux-m68k.org
+Reviewed-by: Michael Schmitz <schmitzmic@gmail.com>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/wd33c93.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/scsi/wd33c93.c
++++ b/drivers/scsi/wd33c93.c
+@@ -831,7 +831,7 @@ wd33c93_intr(struct Scsi_Host *instance)
+               /* construct an IDENTIFY message with correct disconnect bit */
+ 
+               hostdata->outgoing_msg[0] = IDENTIFY(0, cmd->device->lun);
+-              if (scsi_pointer->phase)
++              if (WD33C93_scsi_pointer(cmd)->phase)
+                       hostdata->outgoing_msg[0] |= 0x40;
+ 
+               if (hostdata->sync_stat[cmd->device->id] == SS_FIRST) {
diff --git a/queue-6.1/secretmem-disable-memfd_secret-if-arch-cannot-set-direct-map.patch b/queue-6.1/secretmem-disable-memfd_secret-if-arch-cannot-set-direct-map.patch

new file mode 100644 (file)

index 0000000..ef074ad
--- /dev/null
+++ b/queue-6.1/secretmem-disable-memfd_secret-if-arch-cannot-set-direct-map.patch
@@ -0,0 +1,75 @@
+From 532b53cebe58f34ce1c0f34d866f5c0e335c53c6 Mon Sep 17 00:00:00 2001
+From: Patrick Roy <roypat@amazon.co.uk>
+Date: Tue, 1 Oct 2024 09:00:41 +0100
+Subject: secretmem: disable memfd_secret() if arch cannot set direct map
+
+From: Patrick Roy <roypat@amazon.co.uk>
+
+commit 532b53cebe58f34ce1c0f34d866f5c0e335c53c6 upstream.
+
+Return -ENOSYS from memfd_secret() syscall if !can_set_direct_map().  This
+is the case for example on some arm64 configurations, where marking 4k
+PTEs in the direct map not present can only be done if the direct map is
+set up at 4k granularity in the first place (as ARM's break-before-make
+semantics do not easily allow breaking apart large/gigantic pages).
+
+More precisely, on arm64 systems with !can_set_direct_map(),
+set_direct_map_invalid_noflush() is a no-op, however it returns success
+(0) instead of an error.  This means that memfd_secret will seemingly
+"work" (e.g.  syscall succeeds, you can mmap the fd and fault in pages),
+but it does not actually achieve its goal of removing its memory from the
+direct map.
+
+Note that with this patch, memfd_secret() will start erroring on systems
+where can_set_direct_map() returns false (arm64 with
+CONFIG_RODATA_FULL_DEFAULT_ENABLED=n, CONFIG_DEBUG_PAGEALLOC=n and
+CONFIG_KFENCE=n), but that still seems better than the current silent
+failure.  Since CONFIG_RODATA_FULL_DEFAULT_ENABLED defaults to 'y', most
+arm64 systems actually have a working memfd_secret() and aren't be
+affected.
+
+From going through the iterations of the original memfd_secret patch
+series, it seems that disabling the syscall in these scenarios was the
+intended behavior [1] (preferred over having
+set_direct_map_invalid_noflush return an error as that would result in
+SIGBUSes at page-fault time), however the check for it got dropped between
+v16 [2] and v17 [3], when secretmem moved away from CMA allocations.
+
+[1]: https://lore.kernel.org/lkml/20201124164930.GK8537@kernel.org/
+[2]: https://lore.kernel.org/lkml/20210121122723.3446-11-rppt@kernel.org/#t
+[3]: https://lore.kernel.org/lkml/20201125092208.12544-10-rppt@kernel.org/
+
+Link: https://lkml.kernel.org/r/20241001080056.784735-1-roypat@amazon.co.uk
+Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas")
+Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
+Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Cc: Alexander Graf <graf@amazon.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: James Gowans <jgowans@amazon.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/secretmem.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/secretmem.c
++++ b/mm/secretmem.c
+@@ -236,7 +236,7 @@ SYSCALL_DEFINE1(memfd_secret, unsigned i
+       /* make sure local flags do not confict with global fcntl.h */
+       BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC);
+ 
+-      if (!secretmem_enable)
++      if (!secretmem_enable || !can_set_direct_map())
+               return -ENOSYS;
+ 
+       if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC))
+@@ -278,7 +278,7 @@ static struct file_system_type secretmem
+ 
+ static int __init secretmem_init(void)
+ {
+-      if (!secretmem_enable)
++      if (!secretmem_enable || !can_set_direct_map())
+               return 0;
+ 
+       secretmem_mnt = kern_mount(&secretmem_fs);
diff --git a/queue-6.1/series b/queue-6.1/series

index 6489a93ada19c3f844f9145ff4a49386620dccc5..574b0dc09afeaa2ab6a7399d29239ff5d29e1e67 100644 (file)
--- a/queue-6.1/series
+++ b/queue-6.1/series
@@ -778,3 +778,17 @@ usb-xhci-fix-problem-with-xhci-resume-from-suspend.patch
  usb-storage-ignore-bogus-device-raised-by-jieli-br21-usb-sound-chip.patch
  usb-gadget-core-force-synchronous-registration.patch
  hid-intel-ish-hid-fix-uninitialized-variable-rv-in-ish_fw_xfer_direct_dma.patch
+drm-v3d-stop-the-active-perfmon-before-being-destroyed.patch
+drm-vc4-stop-the-active-perfmon-before-being-destroyed.patch
+scsi-wd33c93-don-t-use-stale-scsi_pointer-value.patch
+mptcp-fallback-when-mptcp-opts-are-dropped-after-1st-data.patch
+ata-libata-avoid-superfluous-disk-spin-down-spin-up-during-hibernation.patch
+net-explicitly-clear-the-sk-pointer-when-pf-create-fails.patch
+net-fix-an-unsafe-loop-on-the-list.patch
+net-dsa-lan9303-ensure-chip-reset-and-wait-for-ready-status.patch
+mptcp-handle-consistently-dss-corruption.patch
+mptcp-pm-do-not-remove-closing-subflows.patch
+device-dax-correct-pgoff-align-in-dax_set_mapping.patch
+nouveau-dmem-fix-vulnerability-in-migrate_to_ram-upon-copy-error.patch
+kthread-unpark-only-parked-kthread.patch
+secretmem-disable-memfd_secret-if-arch-cannot-set-direct-map.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 14 Oct 2024 12:32:24 +0000 (14:32 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 14 Oct 2024 12:32:24 +0000 (14:32 +0200)
queue-6.1/ata-libata-avoid-superfluous-disk-spin-down-spin-up-during-hibernation.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/device-dax-correct-pgoff-align-in-dax_set_mapping.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/drm-v3d-stop-the-active-perfmon-before-being-destroyed.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/drm-vc4-stop-the-active-perfmon-before-being-destroyed.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/kthread-unpark-only-parked-kthread.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mptcp-fallback-when-mptcp-opts-are-dropped-after-1st-data.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mptcp-handle-consistently-dss-corruption.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mptcp-pm-do-not-remove-closing-subflows.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-dsa-lan9303-ensure-chip-reset-and-wait-for-ready-status.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-explicitly-clear-the-sk-pointer-when-pf-create-fails.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-fix-an-unsafe-loop-on-the-list.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/nouveau-dmem-fix-vulnerability-in-migrate_to_ram-upon-copy-error.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/scsi-wd33c93-don-t-use-stale-scsi_pointer-value.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/secretmem-disable-memfd_secret-if-arch-cannot-set-direct-map.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/series		patch \| blob \| blame \| history