From: Sasha Levin Date: Sun, 15 Dec 2024 16:52:01 +0000 (-0500) Subject: Fixes for 6.1 X-Git-Tag: v5.4.288~40 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=848ce04e664a64254e7c6be020992f77f01da846;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.1 Signed-off-by: Sasha Levin --- diff --git a/queue-6.1/acpi-nfit-vmalloc-out-of-bounds-read-in-acpi_nfit_ct.patch b/queue-6.1/acpi-nfit-vmalloc-out-of-bounds-read-in-acpi_nfit_ct.patch new file mode 100644 index 00000000000..23d913bd915 --- /dev/null +++ b/queue-6.1/acpi-nfit-vmalloc-out-of-bounds-read-in-acpi_nfit_ct.patch @@ -0,0 +1,63 @@ +From c9889ce191567293c1d5269e213f83e07ad9a81e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 18 Nov 2024 21:56:09 +0530 +Subject: acpi: nfit: vmalloc-out-of-bounds Read in acpi_nfit_ctl + +From: Suraj Sonawane + +[ Upstream commit 265e98f72bac6c41a4492d3e30a8e5fd22fe0779 ] + +Fix an issue detected by syzbot with KASAN: + +BUG: KASAN: vmalloc-out-of-bounds in cmd_to_func drivers/acpi/nfit/ +core.c:416 [inline] +BUG: KASAN: vmalloc-out-of-bounds in acpi_nfit_ctl+0x20e8/0x24a0 +drivers/acpi/nfit/core.c:459 + +The issue occurs in cmd_to_func when the call_pkg->nd_reserved2 +array is accessed without verifying that call_pkg points to a buffer +that is appropriately sized as a struct nd_cmd_pkg. This can lead +to out-of-bounds access and undefined behavior if the buffer does not +have sufficient space. + +To address this, a check was added in acpi_nfit_ctl() to ensure that +buf is not NULL and that buf_len is less than sizeof(*call_pkg) +before accessing it. This ensures safe access to the members of +call_pkg, including the nd_reserved2 array. + +Reported-by: syzbot+7534f060ebda6b8b51b3@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=7534f060ebda6b8b51b3 +Tested-by: syzbot+7534f060ebda6b8b51b3@syzkaller.appspotmail.com +Fixes: ebe9f6f19d80 ("acpi/nfit: Fix bus command validation") +Signed-off-by: Suraj Sonawane +Reviewed-by: Alison Schofield +Reviewed-by: Dave Jiang +Link: https://patch.msgid.link/20241118162609.29063-1-surajsonawane0215@gmail.com +Signed-off-by: Ira Weiny +Signed-off-by: Sasha Levin +--- + drivers/acpi/nfit/core.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c +index 6d4ac934cd49..1535fe196646 100644 +--- a/drivers/acpi/nfit/core.c ++++ b/drivers/acpi/nfit/core.c +@@ -454,8 +454,13 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, + if (cmd_rc) + *cmd_rc = -EINVAL; + +- if (cmd == ND_CMD_CALL) ++ if (cmd == ND_CMD_CALL) { ++ if (!buf || buf_len < sizeof(*call_pkg)) ++ return -EINVAL; ++ + call_pkg = buf; ++ } ++ + func = cmd_to_func(nfit_mem, cmd, call_pkg, &family); + if (func < 0) + return func; +-- +2.39.5 + diff --git a/queue-6.1/acpi-resource-fix-memory-resource-type-union-access.patch b/queue-6.1/acpi-resource-fix-memory-resource-type-union-access.patch new file mode 100644 index 00000000000..fd4d3457b9f --- /dev/null +++ b/queue-6.1/acpi-resource-fix-memory-resource-type-union-access.patch @@ -0,0 +1,55 @@ +From a1bd7753ce4d66d8b0766cefb37ee694a80e12d6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 Dec 2024 12:06:13 +0200 +Subject: ACPI: resource: Fix memory resource type union access +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ilpo Järvinen + +[ Upstream commit 7899ca9f3bd2b008e9a7c41f2a9f1986052d7e96 ] + +In acpi_decode_space() addr->info.mem.caching is checked on main level +for any resource type but addr->info.mem is part of union and thus +valid only if the resource type is memory range. + +Move the check inside the preceeding switch/case to only execute it +when the union is of correct type. + +Fixes: fcb29bbcd540 ("ACPI: Add prefetch decoding to the address space parser") +Signed-off-by: Ilpo Järvinen +Link: https://patch.msgid.link/20241202100614.20731-1-ilpo.jarvinen@linux.intel.com +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Sasha Levin +--- + drivers/acpi/resource.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c +index d57bc814dec4..b36b8592667d 100644 +--- a/drivers/acpi/resource.c ++++ b/drivers/acpi/resource.c +@@ -250,6 +250,9 @@ static bool acpi_decode_space(struct resource_win *win, + switch (addr->resource_type) { + case ACPI_MEMORY_RANGE: + acpi_dev_memresource_flags(res, len, wp); ++ ++ if (addr->info.mem.caching == ACPI_PREFETCHABLE_MEMORY) ++ res->flags |= IORESOURCE_PREFETCH; + break; + case ACPI_IO_RANGE: + acpi_dev_ioresource_flags(res, len, iodec, +@@ -265,9 +268,6 @@ static bool acpi_decode_space(struct resource_win *win, + if (addr->producer_consumer == ACPI_PRODUCER) + res->flags |= IORESOURCE_WINDOW; + +- if (addr->info.mem.caching == ACPI_PREFETCHABLE_MEMORY) +- res->flags |= IORESOURCE_PREFETCH; +- + return !(res->flags & IORESOURCE_DISABLED); + } + +-- +2.39.5 + diff --git a/queue-6.1/acpica-events-evxfregn-don-t-release-the-contextmute.patch b/queue-6.1/acpica-events-evxfregn-don-t-release-the-contextmute.patch new file mode 100644 index 00000000000..ad47cb88266 --- /dev/null +++ b/queue-6.1/acpica-events-evxfregn-don-t-release-the-contextmute.patch @@ -0,0 +1,41 @@ +From ce1ba5e934728684b30332c643c947ebb01c4558 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Nov 2024 11:29:54 +0300 +Subject: ACPICA: events/evxfregn: don't release the ContextMutex that was + never acquired + +From: Daniil Tatianin + +[ Upstream commit c53d96a4481f42a1635b96d2c1acbb0a126bfd54 ] + +This bug was first introduced in c27f3d011b08, where the author of the +patch probably meant to do DeleteMutex instead of ReleaseMutex. The +mutex leak was noticed later on and fixed in e4dfe108371, but the bogus +MutexRelease line was never removed, so do it now. + +Link: https://github.com/acpica/acpica/pull/982 +Fixes: c27f3d011b08 ("ACPICA: Fix race in generic_serial_bus (I2C) and GPIO op_region parameter handling") +Signed-off-by: Daniil Tatianin +Link: https://patch.msgid.link/20241122082954.658356-1-d-tatianin@yandex-team.ru +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Sasha Levin +--- + drivers/acpi/acpica/evxfregn.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c +index 0a8372bf6a77..6fa6b485e30d 100644 +--- a/drivers/acpi/acpica/evxfregn.c ++++ b/drivers/acpi/acpica/evxfregn.c +@@ -201,8 +201,6 @@ acpi_remove_address_space_handler(acpi_handle device, + + /* Now we can delete the handler object */ + +- acpi_os_release_mutex(handler_obj->address_space. +- context_mutex); + acpi_ut_remove_reference(handler_obj); + goto unlock_and_exit; + } +-- +2.39.5 + diff --git a/queue-6.1/amdgpu-uvd-get-ring-reference-from-rq-scheduler.patch b/queue-6.1/amdgpu-uvd-get-ring-reference-from-rq-scheduler.patch new file mode 100644 index 00000000000..f5e0df2b176 --- /dev/null +++ b/queue-6.1/amdgpu-uvd-get-ring-reference-from-rq-scheduler.patch @@ -0,0 +1,40 @@ +From 273732dc26481dfeff8aceb7f5432bbfdad959f6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Dec 2024 11:30:01 -0500 +Subject: amdgpu/uvd: get ring reference from rq scheduler +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: David (Ming Qiang) Wu + +[ Upstream commit 47f402a3e08113e0f5d8e1e6fcc197667a16022f ] + +base.sched may not be set for each instance and should not +be used for cases such as non-IB tests. + +Fixes: 2320c9e6a768 ("drm/sched: memset() 'job' in drm_sched_job_init()") +Signed-off-by: David (Ming Qiang) Wu +Reviewed-by: Christian König +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +index e668b3baa8c6..1c5d79528ca7 100644 +--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +@@ -1284,7 +1284,7 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) + { +- struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); ++ struct amdgpu_ring *ring = amdgpu_job_ring(job); + unsigned i; + + /* No patching necessary for the first instance */ +-- +2.39.5 + diff --git a/queue-6.1/asoc-amd-yc-fix-the-wrong-return-value.patch b/queue-6.1/asoc-amd-yc-fix-the-wrong-return-value.patch new file mode 100644 index 00000000000..03b1007772d --- /dev/null +++ b/queue-6.1/asoc-amd-yc-fix-the-wrong-return-value.patch @@ -0,0 +1,57 @@ +From 3463de93d0fee1b4a2ff4787255ac7f4a70dd29c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 Dec 2024 14:40:25 +0530 +Subject: ASoC: amd: yc: Fix the wrong return value + +From: Venkata Prasad Potturu + +[ Upstream commit 984795e76def5c903724b8d6a8228e356bbdf2af ] + +With the current implementation, when ACP driver fails to read +ACPI _WOV entry then the DMI overrides code won't invoke, +may cause regressions for some BIOS versions. + +Add a condition check to jump to check the DMI entries incase of +ACP driver fail to read ACPI _WOV method. + +Fixes: 4095cf872084 (ASoC: amd: yc: Fix for enabling DMIC on acp6x via _DSD entry) + +Signed-off-by: Venkata Prasad Potturu +Link: https://patch.msgid.link/20241210091026.996860-1-venkataprasad.potturu@amd.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/amd/yc/acp6x-mach.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c +index af9601bea275..9c1bf0eb2deb 100644 +--- a/sound/soc/amd/yc/acp6x-mach.c ++++ b/sound/soc/amd/yc/acp6x-mach.c +@@ -473,14 +473,19 @@ static int acp6x_probe(struct platform_device *pdev) + + handle = ACPI_HANDLE(pdev->dev.parent); + ret = acpi_evaluate_integer(handle, "_WOV", NULL, &dmic_status); +- if (!ACPI_FAILURE(ret)) ++ if (!ACPI_FAILURE(ret)) { + wov_en = dmic_status; ++ if (!wov_en) ++ return -ENODEV; ++ } else { ++ /* Incase of ACPI method read failure then jump to check_dmi_entry */ ++ goto check_dmi_entry; ++ } + +- if (is_dmic_enable && wov_en) ++ if (is_dmic_enable) + platform_set_drvdata(pdev, &acp6x_card); +- else +- return 0; + ++check_dmi_entry: + /* check for any DMI overrides */ + dmi_id = dmi_first_match(yc_acp_quirk_table); + if (dmi_id) +-- +2.39.5 + diff --git a/queue-6.1/batman-adv-do-not-let-tt-changes-list-grows-indefini.patch b/queue-6.1/batman-adv-do-not-let-tt-changes-list-grows-indefini.patch new file mode 100644 index 00000000000..d9d43639879 --- /dev/null +++ b/queue-6.1/batman-adv-do-not-let-tt-changes-list-grows-indefini.patch @@ -0,0 +1,77 @@ +From 152192729f30567f9b376587d8063bb33478b6d1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Nov 2024 16:52:50 +0100 +Subject: batman-adv: Do not let TT changes list grows indefinitely + +From: Remi Pommarel + +[ Upstream commit fff8f17c1a6fc802ca23bbd3a276abfde8cc58e6 ] + +When TT changes list is too big to fit in packet due to MTU size, an +empty OGM is sent expected other node to send TT request to get the +changes. The issue is that tt.last_changeset was not built thus the +originator was responding with previous changes to those TT requests +(see batadv_send_my_tt_response). Also the changes list was never +cleaned up effectively never ending growing from this point onwards, +repeatedly sending the same TT response changes over and over, and +creating a new empty OGM every OGM interval expecting for the local +changes to be purged. + +When there is more TT changes that can fit in packet, drop all changes, +send empty OGM and wait for TT request so we can respond with a full +table instead. + +Fixes: e1bf0c14096f ("batman-adv: tvlv - convert tt data sent within OGMs") +Signed-off-by: Remi Pommarel +Acked-by: Antonio Quartulli +Signed-off-by: Sven Eckelmann +Signed-off-by: Simon Wunderlich +Signed-off-by: Sasha Levin +--- + net/batman-adv/translation-table.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c +index d7f874ff1a70..2b5453801bf0 100644 +--- a/net/batman-adv/translation-table.c ++++ b/net/batman-adv/translation-table.c +@@ -990,6 +990,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) + int tt_diff_len, tt_change_len = 0; + int tt_diff_entries_num = 0; + int tt_diff_entries_count = 0; ++ bool drop_changes = false; + size_t tt_extra_len = 0; + u16 tvlv_len; + +@@ -997,10 +998,17 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) + tt_diff_len = batadv_tt_len(tt_diff_entries_num); + + /* if we have too many changes for one packet don't send any +- * and wait for the tt table request which will be fragmented ++ * and wait for the tt table request so we can reply with the full ++ * (fragmented) table. ++ * ++ * The local change history should still be cleaned up so the next ++ * TT round can start again with a clean state. + */ +- if (tt_diff_len > bat_priv->soft_iface->mtu) ++ if (tt_diff_len > bat_priv->soft_iface->mtu) { + tt_diff_len = 0; ++ tt_diff_entries_num = 0; ++ drop_changes = true; ++ } + + tvlv_len = batadv_tt_prepare_tvlv_local_data(bat_priv, &tt_data, + &tt_change, &tt_diff_len); +@@ -1009,7 +1017,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) + + tt_data->flags = BATADV_TT_OGM_DIFF; + +- if (tt_diff_len == 0) ++ if (!drop_changes && tt_diff_len == 0) + goto container_register; + + spin_lock_bh(&bat_priv->tt.changes_list_lock); +-- +2.39.5 + diff --git a/queue-6.1/batman-adv-do-not-send-uninitialized-tt-changes.patch b/queue-6.1/batman-adv-do-not-send-uninitialized-tt-changes.patch new file mode 100644 index 00000000000..308c9e617c9 --- /dev/null +++ b/queue-6.1/batman-adv-do-not-send-uninitialized-tt-changes.patch @@ -0,0 +1,78 @@ +From fc1701146db86a86220aef0de2ae9cfc08bbf791 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Nov 2024 16:52:48 +0100 +Subject: batman-adv: Do not send uninitialized TT changes + +From: Remi Pommarel + +[ Upstream commit f2f7358c3890e7366cbcb7512b4bc8b4394b2d61 ] + +The number of TT changes can be less than initially expected in +batadv_tt_tvlv_container_update() (changes can be removed by +batadv_tt_local_event() in ADD+DEL sequence between reading +tt_diff_entries_num and actually iterating the change list under lock). + +Thus tt_diff_len could be bigger than the actual changes size that need +to be sent. Because batadv_send_my_tt_response sends the whole +packet, uninitialized data can be interpreted as TT changes on other +nodes leading to weird TT global entries on those nodes such as: + + * 00:00:00:00:00:00 -1 [....] ( 0) 88:12:4e:ad:7e:ba (179) (0x45845380) + * 00:00:00:00:78:79 4092 [.W..] ( 0) 88:12:4e:ad:7e:3c (145) (0x8ebadb8b) + +All of the above also applies to OGM tvlv container buffer's tvlv_len. + +Remove the extra allocated space to avoid sending uninitialized TT +changes in batadv_send_my_tt_response() and batadv_v_ogm_send_softif(). + +Fixes: e1bf0c14096f ("batman-adv: tvlv - convert tt data sent within OGMs") +Signed-off-by: Remi Pommarel +Signed-off-by: Sven Eckelmann +Signed-off-by: Simon Wunderlich +Signed-off-by: Sasha Levin +--- + net/batman-adv/translation-table.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c +index 4fc66cd95dc4..7711f87397d5 100644 +--- a/net/batman-adv/translation-table.c ++++ b/net/batman-adv/translation-table.c +@@ -990,6 +990,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) + int tt_diff_len, tt_change_len = 0; + int tt_diff_entries_num = 0; + int tt_diff_entries_count = 0; ++ size_t tt_extra_len = 0; + u16 tvlv_len; + + tt_diff_entries_num = atomic_read(&bat_priv->tt.local_changes); +@@ -1027,6 +1028,9 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) + } + spin_unlock_bh(&bat_priv->tt.changes_list_lock); + ++ tt_extra_len = batadv_tt_len(tt_diff_entries_num - ++ tt_diff_entries_count); ++ + /* Keep the buffer for possible tt_request */ + spin_lock_bh(&bat_priv->tt.last_changeset_lock); + kfree(bat_priv->tt.last_changeset); +@@ -1035,6 +1039,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) + tt_change_len = batadv_tt_len(tt_diff_entries_count); + /* check whether this new OGM has no changes due to size problems */ + if (tt_diff_entries_count > 0) { ++ tt_diff_len -= tt_extra_len; + /* if kmalloc() fails we will reply with the full table + * instead of providing the diff + */ +@@ -1047,6 +1052,8 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) + } + spin_unlock_bh(&bat_priv->tt.last_changeset_lock); + ++ /* Remove extra packet space for OGM */ ++ tvlv_len -= tt_extra_len; + container_register: + batadv_tvlv_container_register(bat_priv, BATADV_TVLV_TT, 1, tt_data, + tvlv_len); +-- +2.39.5 + diff --git a/queue-6.1/batman-adv-remove-uninitialized-data-in-full-table-t.patch b/queue-6.1/batman-adv-remove-uninitialized-data-in-full-table-t.patch new file mode 100644 index 00000000000..0a92eb8d57f --- /dev/null +++ b/queue-6.1/batman-adv-remove-uninitialized-data-in-full-table-t.patch @@ -0,0 +1,115 @@ +From 7f97341dce0f57ad29b4f961815997b63091edb1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Nov 2024 16:52:49 +0100 +Subject: batman-adv: Remove uninitialized data in full table TT response + +From: Remi Pommarel + +[ Upstream commit 8038806db64da15721775d6b834990cacbfcf0b2 ] + +The number of entries filled by batadv_tt_tvlv_generate() can be less +than initially expected in batadv_tt_prepare_tvlv_{global,local}_data() +(changes can be removed by batadv_tt_local_event() in ADD+DEL sequence +in the meantime as the lock held during the whole tvlv global/local data +generation). + +Thus tvlv_len could be bigger than the actual TT entry size that need +to be sent so full table TT_RESPONSE could hold invalid TT entries such +as below. + + * 00:00:00:00:00:00 -1 [....] ( 0) 88:12:4e:ad:7e:ba (179) (0x45845380) + * 00:00:00:00:78:79 4092 [.W..] ( 0) 88:12:4e:ad:7e:3c (145) (0x8ebadb8b) + +Remove the extra allocated space to avoid sending uninitialized entries +for full table TT_RESPONSE in both batadv_send_other_tt_response() and +batadv_send_my_tt_response(). + +Fixes: 7ea7b4a14275 ("batman-adv: make the TT CRC logic VLAN specific") +Signed-off-by: Remi Pommarel +Signed-off-by: Sven Eckelmann +Signed-off-by: Simon Wunderlich +Signed-off-by: Sasha Levin +--- + net/batman-adv/translation-table.c | 37 ++++++++++++++++++------------ + 1 file changed, 22 insertions(+), 15 deletions(-) + +diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c +index 7711f87397d5..d7f874ff1a70 100644 +--- a/net/batman-adv/translation-table.c ++++ b/net/batman-adv/translation-table.c +@@ -2754,14 +2754,16 @@ static bool batadv_tt_global_valid(const void *entry_ptr, + * + * Fills the tvlv buff with the tt entries from the specified hash. If valid_cb + * is not provided then this becomes a no-op. ++ * ++ * Return: Remaining unused length in tvlv_buff. + */ +-static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, +- struct batadv_hashtable *hash, +- void *tvlv_buff, u16 tt_len, +- bool (*valid_cb)(const void *, +- const void *, +- u8 *flags), +- void *cb_data) ++static u16 batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, ++ struct batadv_hashtable *hash, ++ void *tvlv_buff, u16 tt_len, ++ bool (*valid_cb)(const void *, ++ const void *, ++ u8 *flags), ++ void *cb_data) + { + struct batadv_tt_common_entry *tt_common_entry; + struct batadv_tvlv_tt_change *tt_change; +@@ -2775,7 +2777,7 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, + tt_change = tvlv_buff; + + if (!valid_cb) +- return; ++ return tt_len; + + rcu_read_lock(); + for (i = 0; i < hash->size; i++) { +@@ -2801,6 +2803,8 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, + } + } + rcu_read_unlock(); ++ ++ return batadv_tt_len(tt_tot - tt_num_entries); + } + + /** +@@ -3076,10 +3080,11 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv, + goto out; + + /* fill the rest of the tvlv with the real TT entries */ +- batadv_tt_tvlv_generate(bat_priv, bat_priv->tt.global_hash, +- tt_change, tt_len, +- batadv_tt_global_valid, +- req_dst_orig_node); ++ tvlv_len -= batadv_tt_tvlv_generate(bat_priv, ++ bat_priv->tt.global_hash, ++ tt_change, tt_len, ++ batadv_tt_global_valid, ++ req_dst_orig_node); + } + + /* Don't send the response, if larger than fragmented packet. */ +@@ -3203,9 +3208,11 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv, + goto out; + + /* fill the rest of the tvlv with the real TT entries */ +- batadv_tt_tvlv_generate(bat_priv, bat_priv->tt.local_hash, +- tt_change, tt_len, +- batadv_tt_local_valid, NULL); ++ tvlv_len -= batadv_tt_tvlv_generate(bat_priv, ++ bat_priv->tt.local_hash, ++ tt_change, tt_len, ++ batadv_tt_local_valid, ++ NULL); + } + + tvlv_tt_data->flags = BATADV_TT_RESPONSE; +-- +2.39.5 + diff --git a/queue-6.1/blk-iocost-avoid-using-clamp-on-inuse-in-__propagate.patch b/queue-6.1/blk-iocost-avoid-using-clamp-on-inuse-in-__propagate.patch new file mode 100644 index 00000000000..bf8dd8799f0 --- /dev/null +++ b/queue-6.1/blk-iocost-avoid-using-clamp-on-inuse-in-__propagate.patch @@ -0,0 +1,77 @@ +From b361277e9fce0ae6246b92484c83cea75952a4bd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Dec 2024 10:13:29 -0700 +Subject: blk-iocost: Avoid using clamp() on inuse in __propagate_weights() + +From: Nathan Chancellor + +[ Upstream commit 57e420c84f9ab55ba4c5e2ae9c5f6c8e1ea834d2 ] + +After a recent change to clamp() and its variants [1] that increases the +coverage of the check that high is greater than low because it can be +done through inlining, certain build configurations (such as s390 +defconfig) fail to build with clang with: + + block/blk-iocost.c:1101:11: error: call to '__compiletime_assert_557' declared with 'error' attribute: clamp() low limit 1 greater than high limit active + 1101 | inuse = clamp_t(u32, inuse, 1, active); + | ^ + include/linux/minmax.h:218:36: note: expanded from macro 'clamp_t' + 218 | #define clamp_t(type, val, lo, hi) __careful_clamp(type, val, lo, hi) + | ^ + include/linux/minmax.h:195:2: note: expanded from macro '__careful_clamp' + 195 | __clamp_once(type, val, lo, hi, __UNIQUE_ID(v_), __UNIQUE_ID(l_), __UNIQUE_ID(h_)) + | ^ + include/linux/minmax.h:188:2: note: expanded from macro '__clamp_once' + 188 | BUILD_BUG_ON_MSG(statically_true(ulo > uhi), \ + | ^ + +__propagate_weights() is called with an active value of zero in +ioc_check_iocgs(), which results in the high value being less than the +low value, which is undefined because the value returned depends on the +order of the comparisons. + +The purpose of this expression is to ensure inuse is not more than +active and at least 1. This could be written more simply with a ternary +expression that uses min(inuse, active) as the condition so that the +value of that condition can be used if it is not zero and one if it is. +Do this conversion to resolve the error and add a comment to deter +people from turning this back into clamp(). + +Fixes: 7caa47151ab2 ("blkcg: implement blk-iocost") +Link: https://lore.kernel.org/r/34d53778977747f19cce2abb287bb3e6@AcuMS.aculab.com/ [1] +Suggested-by: David Laight +Reported-by: Linux Kernel Functional Testing +Closes: https://lore.kernel.org/llvm/CA+G9fYsD7mw13wredcZn0L-KBA3yeoVSTuxnss-AEWMN3ha0cA@mail.gmail.com/ +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202412120322.3GfVe3vF-lkp@intel.com/ +Signed-off-by: Nathan Chancellor +Acked-by: Tejun Heo +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + block/blk-iocost.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/block/blk-iocost.c b/block/blk-iocost.c +index 772e909e9fbf..e270e64ba342 100644 +--- a/block/blk-iocost.c ++++ b/block/blk-iocost.c +@@ -1082,7 +1082,14 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse, + inuse = DIV64_U64_ROUND_UP(active * iocg->child_inuse_sum, + iocg->child_active_sum); + } else { +- inuse = clamp_t(u32, inuse, 1, active); ++ /* ++ * It may be tempting to turn this into a clamp expression with ++ * a lower limit of 1 but active may be 0, which cannot be used ++ * as an upper limit in that situation. This expression allows ++ * active to clamp inuse unless it is 0, in which case inuse ++ * becomes 1. ++ */ ++ inuse = min(inuse, active) ?: 1; + } + + iocg->last_inuse = iocg->inuse; +-- +2.39.5 + diff --git a/queue-6.1/bluetooth-iso-fix-recursive-locking-warning.patch b/queue-6.1/bluetooth-iso-fix-recursive-locking-warning.patch new file mode 100644 index 00000000000..dbf210eab68 --- /dev/null +++ b/queue-6.1/bluetooth-iso-fix-recursive-locking-warning.patch @@ -0,0 +1,78 @@ +From 1b78c266a3245665fc74d17103a9ef1faa723cc4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Dec 2024 14:28:49 +0200 +Subject: Bluetooth: iso: Fix recursive locking warning + +From: Iulia Tanasescu + +[ Upstream commit 9bde7c3b3ad0e1f39d6df93dd1c9caf63e19e50f ] + +This updates iso_sock_accept to use nested locking for the parent +socket, to avoid lockdep warnings caused because the parent and +child sockets are locked by the same thread: + +[ 41.585683] ============================================ +[ 41.585688] WARNING: possible recursive locking detected +[ 41.585694] 6.12.0-rc6+ #22 Not tainted +[ 41.585701] -------------------------------------------- +[ 41.585705] iso-tester/3139 is trying to acquire lock: +[ 41.585711] ffff988b29530a58 (sk_lock-AF_BLUETOOTH) + at: bt_accept_dequeue+0xe3/0x280 [bluetooth] +[ 41.585905] + but task is already holding lock: +[ 41.585909] ffff988b29533a58 (sk_lock-AF_BLUETOOTH) + at: iso_sock_accept+0x61/0x2d0 [bluetooth] +[ 41.586064] + other info that might help us debug this: +[ 41.586069] Possible unsafe locking scenario: + +[ 41.586072] CPU0 +[ 41.586076] ---- +[ 41.586079] lock(sk_lock-AF_BLUETOOTH); +[ 41.586086] lock(sk_lock-AF_BLUETOOTH); +[ 41.586093] + *** DEADLOCK *** + +[ 41.586097] May be due to missing lock nesting notation + +[ 41.586101] 1 lock held by iso-tester/3139: +[ 41.586107] #0: ffff988b29533a58 (sk_lock-AF_BLUETOOTH) + at: iso_sock_accept+0x61/0x2d0 [bluetooth] + +Fixes: ccf74f2390d6 ("Bluetooth: Add BTPROTO_ISO socket type") +Signed-off-by: Iulia Tanasescu +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + net/bluetooth/iso.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c +index ff15d5192768..437cbeaa9619 100644 +--- a/net/bluetooth/iso.c ++++ b/net/bluetooth/iso.c +@@ -964,7 +964,11 @@ static int iso_sock_accept(struct socket *sock, struct socket *newsock, + long timeo; + int err = 0; + +- lock_sock(sk); ++ /* Use explicit nested locking to avoid lockdep warnings generated ++ * because the parent socket and the child socket are locked on the ++ * same thread. ++ */ ++ lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + + timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + +@@ -995,7 +999,7 @@ static int iso_sock_accept(struct socket *sock, struct socket *newsock, + release_sock(sk); + + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); +- lock_sock(sk); ++ lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + } + remove_wait_queue(sk_sleep(sk), &wait); + +-- +2.39.5 + diff --git a/queue-6.1/bluetooth-sco-add-support-for-16-bits-transparent-vo.patch b/queue-6.1/bluetooth-sco-add-support-for-16-bits-transparent-vo.patch new file mode 100644 index 00000000000..4b7c3328e92 --- /dev/null +++ b/queue-6.1/bluetooth-sco-add-support-for-16-bits-transparent-vo.patch @@ -0,0 +1,104 @@ +From e5f52faf8a283bcef111e3bc88b1d89f091a9ca7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 16:51:59 +0100 +Subject: Bluetooth: SCO: Add support for 16 bits transparent voice setting +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Frédéric Danis + +[ Upstream commit 29a651451e6c264f58cd9d9a26088e579d17b242 ] + +The voice setting is used by sco_connect() or sco_conn_defer_accept() +after being set by sco_sock_setsockopt(). + +The PCM part of the voice setting is used for offload mode through PCM +chipset port. +This commits add support for mSBC 16 bits offloading, i.e. audio data +not transported over HCI. + +The BCM4349B1 supports 16 bits transparent data on its I2S port. +If BT_VOICE_TRANSPARENT is used when accepting a SCO connection, this +gives only garbage audio while using BT_VOICE_TRANSPARENT_16BIT gives +correct audio. +This has been tested with connection to iPhone 14 and Samsung S24. + +Fixes: ad10b1a48754 ("Bluetooth: Add Bluetooth socket voice option") +Signed-off-by: Frédéric Danis +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + include/net/bluetooth/bluetooth.h | 1 + + net/bluetooth/sco.c | 29 +++++++++++++++-------------- + 2 files changed, 16 insertions(+), 14 deletions(-) + +diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h +index 41fc7f12971a..5689b4744764 100644 +--- a/include/net/bluetooth/bluetooth.h ++++ b/include/net/bluetooth/bluetooth.h +@@ -122,6 +122,7 @@ struct bt_voice { + + #define BT_VOICE_TRANSPARENT 0x0003 + #define BT_VOICE_CVSD_16BIT 0x0060 ++#define BT_VOICE_TRANSPARENT_16BIT 0x0063 + + #define BT_SNDMTU 12 + #define BT_RCVMTU 13 +diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c +index ad5afde17213..fe8728041ad0 100644 +--- a/net/bluetooth/sco.c ++++ b/net/bluetooth/sco.c +@@ -268,10 +268,13 @@ static int sco_connect(struct sock *sk) + else + type = SCO_LINK; + +- if (sco_pi(sk)->setting == BT_VOICE_TRANSPARENT && +- (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) { +- err = -EOPNOTSUPP; +- goto unlock; ++ switch (sco_pi(sk)->setting & SCO_AIRMODE_MASK) { ++ case SCO_AIRMODE_TRANSP: ++ if (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev)) { ++ err = -EOPNOTSUPP; ++ goto unlock; ++ } ++ break; + } + + hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst, +@@ -888,13 +891,6 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, + if (err) + break; + +- /* Explicitly check for these values */ +- if (voice.setting != BT_VOICE_TRANSPARENT && +- voice.setting != BT_VOICE_CVSD_16BIT) { +- err = -EINVAL; +- break; +- } +- + sco_pi(sk)->setting = voice.setting; + hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, + BDADDR_BREDR); +@@ -902,9 +898,14 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, + err = -EBADFD; + break; + } +- if (enhanced_sync_conn_capable(hdev) && +- voice.setting == BT_VOICE_TRANSPARENT) +- sco_pi(sk)->codec.id = BT_CODEC_TRANSPARENT; ++ ++ switch (sco_pi(sk)->setting & SCO_AIRMODE_MASK) { ++ case SCO_AIRMODE_TRANSP: ++ if (enhanced_sync_conn_capable(hdev)) ++ sco_pi(sk)->codec.id = BT_CODEC_TRANSPARENT; ++ break; ++ } ++ + hci_dev_put(hdev); + break; + +-- +2.39.5 + diff --git a/queue-6.1/bonding-fix-feature-propagation-of-netif_f_gso_encap.patch b/queue-6.1/bonding-fix-feature-propagation-of-netif_f_gso_encap.patch new file mode 100644 index 00000000000..870cbfeab71 --- /dev/null +++ b/queue-6.1/bonding-fix-feature-propagation-of-netif_f_gso_encap.patch @@ -0,0 +1,101 @@ +From daa4b2dc2184b464a16688b8dbbf4238a05d62aa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 Dec 2024 15:12:43 +0100 +Subject: bonding: Fix feature propagation of NETIF_F_GSO_ENCAP_ALL + +From: Daniel Borkmann + +[ Upstream commit 77b11c8bf3a228d1c63464534c2dcc8d9c8bf7ff ] + +Drivers like mlx5 expose NIC's vlan_features such as +NETIF_F_GSO_UDP_TUNNEL & NETIF_F_GSO_UDP_TUNNEL_CSUM which are +later not propagated when the underlying devices are bonded and +a vlan device created on top of the bond. + +Right now, the more cumbersome workaround for this is to create +the vlan on top of the mlx5 and then enslave the vlan devices +to a bond. + +To fix this, add NETIF_F_GSO_ENCAP_ALL to BOND_VLAN_FEATURES +such that bond_compute_features() can probe and propagate the +vlan_features from the slave devices up to the vlan device. + +Given the following bond: + + # ethtool -i enp2s0f{0,1}np{0,1} + driver: mlx5_core + [...] + + # ethtool -k enp2s0f0np0 | grep udp + tx-udp_tnl-segmentation: on + tx-udp_tnl-csum-segmentation: on + tx-udp-segmentation: on + rx-udp_tunnel-port-offload: on + rx-udp-gro-forwarding: off + + # ethtool -k enp2s0f1np1 | grep udp + tx-udp_tnl-segmentation: on + tx-udp_tnl-csum-segmentation: on + tx-udp-segmentation: on + rx-udp_tunnel-port-offload: on + rx-udp-gro-forwarding: off + + # ethtool -k bond0 | grep udp + tx-udp_tnl-segmentation: on + tx-udp_tnl-csum-segmentation: on + tx-udp-segmentation: on + rx-udp_tunnel-port-offload: off [fixed] + rx-udp-gro-forwarding: off + +Before: + + # ethtool -k bond0.100 | grep udp + tx-udp_tnl-segmentation: off [requested on] + tx-udp_tnl-csum-segmentation: off [requested on] + tx-udp-segmentation: on + rx-udp_tunnel-port-offload: off [fixed] + rx-udp-gro-forwarding: off + +After: + + # ethtool -k bond0.100 | grep udp + tx-udp_tnl-segmentation: on + tx-udp_tnl-csum-segmentation: on + tx-udp-segmentation: on + rx-udp_tunnel-port-offload: off [fixed] + rx-udp-gro-forwarding: off + +Various users have run into this reporting performance issues when +configuring Cilium in vxlan tunneling mode and having the combination +of bond & vlan for the core devices connecting the Kubernetes cluster +to the outside world. + +Fixes: a9b3ace44c7d ("bonding: fix vlan_features computing") +Signed-off-by: Daniel Borkmann +Cc: Nikolay Aleksandrov +Cc: Ido Schimmel +Cc: Jiri Pirko +Reviewed-by: Nikolay Aleksandrov +Reviewed-by: Hangbin Liu +Link: https://patch.msgid.link/20241210141245.327886-3-daniel@iogearbox.net +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/bonding/bond_main.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c +index 26a9f99882e6..ded9e369e403 100644 +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -1454,6 +1454,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev, + + #define BOND_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ + NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | \ ++ NETIF_F_GSO_ENCAP_ALL | \ + NETIF_F_HIGHDMA | NETIF_F_LRO) + + #define BOND_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ +-- +2.39.5 + diff --git a/queue-6.1/cxgb4-use-port-number-to-set-mac-addr.patch b/queue-6.1/cxgb4-use-port-number-to-set-mac-addr.patch new file mode 100644 index 00000000000..43ad813dcba --- /dev/null +++ b/queue-6.1/cxgb4-use-port-number-to-set-mac-addr.patch @@ -0,0 +1,83 @@ +From 4afa376e72f34b6e90c4192a39c08f93f8fa9bb9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 Dec 2024 11:50:14 +0530 +Subject: cxgb4: use port number to set mac addr + +From: Anumula Murali Mohan Reddy + +[ Upstream commit 356983f569c1f5991661fc0050aa263792f50616 ] + +t4_set_vf_mac_acl() uses pf to set mac addr, but t4vf_get_vf_mac_acl() +uses port number to get mac addr, this leads to error when an attempt +to set MAC address on VF's of PF2 and PF3. +This patch fixes the issue by using port number to set mac address. + +Fixes: e0cdac65ba26 ("cxgb4vf: configure ports accessible by the VF") +Signed-off-by: Anumula Murali Mohan Reddy +Signed-off-by: Potnuri Bharat Teja +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20241206062014.49414-1-anumula@chelsio.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 2 +- + drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- + drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 5 +++-- + 3 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +index 5657ac8cfca0..f1a8ae047821 100644 +--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h ++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +@@ -2084,7 +2084,7 @@ void t4_idma_monitor(struct adapter *adapter, + struct sge_idma_monitor_state *idma, + int hz, int ticks); + int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, +- unsigned int naddr, u8 *addr); ++ u8 start, unsigned int naddr, u8 *addr); + void t4_tp_pio_read(struct adapter *adap, u32 *buff, u32 nregs, + u32 start_index, bool sleep_ok); + void t4_tp_tm_pio_read(struct adapter *adap, u32 *buff, u32 nregs, +diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +index 9cbce1faab26..7ce112b95b62 100644 +--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c ++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +@@ -3247,7 +3247,7 @@ static int cxgb4_mgmt_set_vf_mac(struct net_device *dev, int vf, u8 *mac) + + dev_info(pi->adapter->pdev_dev, + "Setting MAC %pM on VF %d\n", mac, vf); +- ret = t4_set_vf_mac_acl(adap, vf + 1, 1, mac); ++ ret = t4_set_vf_mac_acl(adap, vf + 1, pi->lport, 1, mac); + if (!ret) + ether_addr_copy(adap->vfinfo[vf].vf_mac_addr, mac); + return ret; +diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +index 76de55306c4d..175bf9b13058 100644 +--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c ++++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +@@ -10215,11 +10215,12 @@ int t4_load_cfg(struct adapter *adap, const u8 *cfg_data, unsigned int size) + * t4_set_vf_mac_acl - Set MAC address for the specified VF + * @adapter: The adapter + * @vf: one of the VFs instantiated by the specified PF ++ * @start: The start port id associated with specified VF + * @naddr: the number of MAC addresses + * @addr: the MAC address(es) to be set to the specified VF + */ + int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, +- unsigned int naddr, u8 *addr) ++ u8 start, unsigned int naddr, u8 *addr) + { + struct fw_acl_mac_cmd cmd; + +@@ -10234,7 +10235,7 @@ int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, + cmd.en_to_len16 = cpu_to_be32((unsigned int)FW_LEN16(cmd)); + cmd.nmac = naddr; + +- switch (adapter->pf) { ++ switch (start) { + case 3: + memcpy(cmd.macaddr3, addr, sizeof(cmd.macaddr3)); + break; +-- +2.39.5 + diff --git a/queue-6.1/documentation-pm-clarify-pm_runtime_resume_and_get-r.patch b/queue-6.1/documentation-pm-clarify-pm_runtime_resume_and_get-r.patch new file mode 100644 index 00000000000..33731ac2126 --- /dev/null +++ b/queue-6.1/documentation-pm-clarify-pm_runtime_resume_and_get-r.patch @@ -0,0 +1,42 @@ +From 7b5598c78c2bb3de17ab91a727acdfb2182bab09 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Dec 2024 14:37:29 +0000 +Subject: Documentation: PM: Clarify pm_runtime_resume_and_get() return value + +From: Paul Barker + +[ Upstream commit ccb84dc8f4a02e7d30ffd388522996546b4d00e1 ] + +Update the documentation to match the behaviour of the code. + +pm_runtime_resume_and_get() always returns 0 on success, even if +__pm_runtime_resume() returns 1. + +Fixes: 2c412337cfe6 ("PM: runtime: Add documentation for pm_runtime_resume_and_get()") +Signed-off-by: Paul Barker +Link: https://patch.msgid.link/20241203143729.478-1-paul.barker.ct@bp.renesas.com +[ rjw: Subject and changelog edits, adjusted new comment formatting ] +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Sasha Levin +--- + Documentation/power/runtime_pm.rst | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst +index 65b86e487afe..b6d5a3a8febc 100644 +--- a/Documentation/power/runtime_pm.rst ++++ b/Documentation/power/runtime_pm.rst +@@ -347,7 +347,9 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: + + `int pm_runtime_resume_and_get(struct device *dev);` + - run pm_runtime_resume(dev) and if successful, increment the device's +- usage counter; return the result of pm_runtime_resume ++ usage counter; returns 0 on success (whether or not the device's ++ runtime PM status was already 'active') or the error code from ++ pm_runtime_resume() on failure. + + `int pm_request_idle(struct device *dev);` + - submit a request to execute the subsystem-level idle callback for the +-- +2.39.5 + diff --git a/queue-6.1/net-add-a-refcount-tracker-for-kernel-sockets.patch b/queue-6.1/net-add-a-refcount-tracker-for-kernel-sockets.patch new file mode 100644 index 00000000000..7b4131d497e --- /dev/null +++ b/queue-6.1/net-add-a-refcount-tracker-for-kernel-sockets.patch @@ -0,0 +1,200 @@ +From b13e4063fb63f721a1343f2a866d63ad13587cd3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Oct 2022 23:20:18 +0000 +Subject: net: add a refcount tracker for kernel sockets + +From: Eric Dumazet + +[ Upstream commit 0cafd77dcd032d1687efaba5598cf07bce85997f ] + +Commit ffa84b5ffb37 ("net: add netns refcount tracker to struct sock") +added a tracker to sockets, but did not track kernel sockets. + +We still have syzbot reports hinting about netns being destroyed +while some kernel TCP sockets had not been dismantled. + +This patch tracks kernel sockets, and adds a ref_tracker_dir_print() +call to net_free() right before the netns is freed. + +Normally, each layer is responsible for properly releasing its +kernel sockets before last call to net_free(). + +This debugging facility is enabled with CONFIG_NET_NS_REFCNT_TRACKER=y + +Signed-off-by: Eric Dumazet +Reviewed-by: Kuniyuki Iwashima +Tested-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Stable-dep-of: 0f6ede9fbc74 ("net: defer final 'struct net' free in netns dismantle") +Signed-off-by: Sasha Levin +--- + include/net/net_namespace.h | 30 ++++++++++++++++++++++-------- + net/core/net_namespace.c | 5 +++++ + net/core/sock.c | 14 ++++++++++++++ + net/netlink/af_netlink.c | 11 +++++++++++ + net/rds/tcp.c | 3 +++ + 5 files changed, 55 insertions(+), 8 deletions(-) + +diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h +index 8c3587d5c308..78beaa765c73 100644 +--- a/include/net/net_namespace.h ++++ b/include/net/net_namespace.h +@@ -92,7 +92,9 @@ struct net { + + struct ns_common ns; + struct ref_tracker_dir refcnt_tracker; +- ++ struct ref_tracker_dir notrefcnt_tracker; /* tracker for objects not ++ * refcounted against netns ++ */ + struct list_head dev_base_head; + struct proc_dir_entry *proc_net; + struct proc_dir_entry *proc_net_stat; +@@ -320,19 +322,31 @@ static inline int check_net(const struct net *net) + #endif + + +-static inline void netns_tracker_alloc(struct net *net, +- netns_tracker *tracker, gfp_t gfp) ++static inline void __netns_tracker_alloc(struct net *net, ++ netns_tracker *tracker, ++ bool refcounted, ++ gfp_t gfp) + { + #ifdef CONFIG_NET_NS_REFCNT_TRACKER +- ref_tracker_alloc(&net->refcnt_tracker, tracker, gfp); ++ ref_tracker_alloc(refcounted ? &net->refcnt_tracker : ++ &net->notrefcnt_tracker, ++ tracker, gfp); + #endif + } + +-static inline void netns_tracker_free(struct net *net, +- netns_tracker *tracker) ++static inline void netns_tracker_alloc(struct net *net, netns_tracker *tracker, ++ gfp_t gfp) ++{ ++ __netns_tracker_alloc(net, tracker, true, gfp); ++} ++ ++static inline void __netns_tracker_free(struct net *net, ++ netns_tracker *tracker, ++ bool refcounted) + { + #ifdef CONFIG_NET_NS_REFCNT_TRACKER +- ref_tracker_free(&net->refcnt_tracker, tracker); ++ ref_tracker_free(refcounted ? &net->refcnt_tracker : ++ &net->notrefcnt_tracker, tracker); + #endif + } + +@@ -346,7 +360,7 @@ static inline struct net *get_net_track(struct net *net, + + static inline void put_net_track(struct net *net, netns_tracker *tracker) + { +- netns_tracker_free(net, tracker); ++ __netns_tracker_free(net, tracker, true); + put_net(net); + } + +diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c +index 1d95a5adce4e..0fe9fd2cf4e2 100644 +--- a/net/core/net_namespace.c ++++ b/net/core/net_namespace.c +@@ -319,6 +319,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) + + refcount_set(&net->ns.count, 1); + ref_tracker_dir_init(&net->refcnt_tracker, 128); ++ ref_tracker_dir_init(&net->notrefcnt_tracker, 128); + + refcount_set(&net->passive, 1); + get_random_bytes(&net->hash_mix, sizeof(u32)); +@@ -439,6 +440,10 @@ static void net_free(struct net *net) + { + if (refcount_dec_and_test(&net->passive)) { + kfree(rcu_access_pointer(net->gen)); ++ ++ /* There should not be any trackers left there. */ ++ ref_tracker_dir_exit(&net->notrefcnt_tracker); ++ + kmem_cache_free(net_cachep, net); + } + } +diff --git a/net/core/sock.c b/net/core/sock.c +index dce8f878f638..2ba6385e39fd 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -2111,6 +2111,9 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, + if (likely(sk->sk_net_refcnt)) { + get_net_track(net, &sk->ns_tracker, priority); + sock_inuse_add(net, 1); ++ } else { ++ __netns_tracker_alloc(net, &sk->ns_tracker, ++ false, priority); + } + + sock_net_set(sk, net); +@@ -2166,6 +2169,9 @@ static void __sk_destruct(struct rcu_head *head) + + if (likely(sk->sk_net_refcnt)) + put_net_track(sock_net(sk), &sk->ns_tracker); ++ else ++ __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false); ++ + sk_prot_free(sk->sk_prot_creator, sk); + } + +@@ -2254,6 +2260,14 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) + if (likely(newsk->sk_net_refcnt)) { + get_net_track(sock_net(newsk), &newsk->ns_tracker, priority); + sock_inuse_add(sock_net(newsk), 1); ++ } else { ++ /* Kernel sockets are not elevating the struct net refcount. ++ * Instead, use a tracker to more easily detect if a layer ++ * is not properly dismantling its kernel sockets at netns ++ * destroy time. ++ */ ++ __netns_tracker_alloc(sock_net(newsk), &newsk->ns_tracker, ++ false, priority); + } + sk_node_init(&newsk->sk_node); + sock_lock_init(newsk); +diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c +index 8a74847dacaf..3bc862fd89a5 100644 +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -797,6 +797,17 @@ static int netlink_release(struct socket *sock) + } + + sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); ++ ++ /* Because struct net might disappear soon, do not keep a pointer. */ ++ if (!sk->sk_net_refcnt && sock_net(sk) != &init_net) { ++ __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false); ++ /* Because of deferred_put_nlk_sk and use of work queue, ++ * it is possible netns will be freed before this socket. ++ */ ++ sock_net_set(sk, &init_net); ++ __netns_tracker_alloc(&init_net, &sk->ns_tracker, ++ false, GFP_KERNEL); ++ } + call_rcu(&nlk->rcu, deferred_put_nlk_sk); + return 0; + } +diff --git a/net/rds/tcp.c b/net/rds/tcp.c +index 4444fd82b66d..c5b86066ff66 100644 +--- a/net/rds/tcp.c ++++ b/net/rds/tcp.c +@@ -503,6 +503,9 @@ bool rds_tcp_tune(struct socket *sock) + release_sock(sk); + return false; + } ++ /* Update ns_tracker to current stack trace and refcounted tracker */ ++ __netns_tracker_free(net, &sk->ns_tracker, false); ++ + sk->sk_net_refcnt = 1; + netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL); + sock_inuse_add(net, 1); +-- +2.39.5 + diff --git a/queue-6.1/net-defer-final-struct-net-free-in-netns-dismantle.patch b/queue-6.1/net-defer-final-struct-net-free-in-netns-dismantle.patch new file mode 100644 index 00000000000..14f3d139092 --- /dev/null +++ b/queue-6.1/net-defer-final-struct-net-free-in-netns-dismantle.patch @@ -0,0 +1,223 @@ +From 7c7c5115460996b63102c3458ee25b9551524d37 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Dec 2024 12:54:55 +0000 +Subject: net: defer final 'struct net' free in netns dismantle + +From: Eric Dumazet + +[ Upstream commit 0f6ede9fbc747e2553612271bce108f7517e7a45 ] + +Ilya reported a slab-use-after-free in dst_destroy [1] + +Issue is in xfrm6_net_init() and xfrm4_net_init() : + +They copy xfrm[46]_dst_ops_template into net->xfrm.xfrm[46]_dst_ops. + +But net structure might be freed before all the dst callbacks are +called. So when dst_destroy() calls later : + +if (dst->ops->destroy) + dst->ops->destroy(dst); + +dst->ops points to the old net->xfrm.xfrm[46]_dst_ops, which has been freed. + +See a relevant issue fixed in : + +ac888d58869b ("net: do not delay dst_entries_add() in dst_release()") + +A fix is to queue the 'struct net' to be freed after one +another cleanup_net() round (and existing rcu_barrier()) + +[1] + +BUG: KASAN: slab-use-after-free in dst_destroy (net/core/dst.c:112) +Read of size 8 at addr ffff8882137ccab0 by task swapper/37/0 +Dec 03 05:46:18 kernel: +CPU: 37 UID: 0 PID: 0 Comm: swapper/37 Kdump: loaded Not tainted 6.12.0 #67 +Hardware name: Red Hat KVM/RHEL, BIOS 1.16.1-1.el9 04/01/2014 +Call Trace: + +dump_stack_lvl (lib/dump_stack.c:124) +print_address_description.constprop.0 (mm/kasan/report.c:378) +? dst_destroy (net/core/dst.c:112) +print_report (mm/kasan/report.c:489) +? dst_destroy (net/core/dst.c:112) +? kasan_addr_to_slab (mm/kasan/common.c:37) +kasan_report (mm/kasan/report.c:603) +? dst_destroy (net/core/dst.c:112) +? rcu_do_batch (kernel/rcu/tree.c:2567) +dst_destroy (net/core/dst.c:112) +rcu_do_batch (kernel/rcu/tree.c:2567) +? __pfx_rcu_do_batch (kernel/rcu/tree.c:2491) +? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4339 kernel/locking/lockdep.c:4406) +rcu_core (kernel/rcu/tree.c:2825) +handle_softirqs (kernel/softirq.c:554) +__irq_exit_rcu (kernel/softirq.c:589 kernel/softirq.c:428 kernel/softirq.c:637) +irq_exit_rcu (kernel/softirq.c:651) +sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1049 arch/x86/kernel/apic/apic.c:1049) + + +asm_sysvec_apic_timer_interrupt (./arch/x86/include/asm/idtentry.h:702) +RIP: 0010:default_idle (./arch/x86/include/asm/irqflags.h:37 ./arch/x86/include/asm/irqflags.h:92 arch/x86/kernel/process.c:743) +Code: 00 4d 29 c8 4c 01 c7 4c 29 c2 e9 6e ff ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 90 0f 00 2d c7 c9 27 00 fb f4 c3 cc cc cc cc 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 90 +RSP: 0018:ffff888100d2fe00 EFLAGS: 00000246 +RAX: 00000000001870ed RBX: 1ffff110201a5fc2 RCX: ffffffffb61a3e46 +RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffb3d4d123 +RBP: 0000000000000000 R08: 0000000000000001 R09: ffffed11c7e1835d +R10: ffff888e3f0c1aeb R11: 0000000000000000 R12: 0000000000000000 +R13: ffff888100d20000 R14: dffffc0000000000 R15: 0000000000000000 +? ct_kernel_exit.constprop.0 (kernel/context_tracking.c:148) +? cpuidle_idle_call (kernel/sched/idle.c:186) +default_idle_call (./include/linux/cpuidle.h:143 kernel/sched/idle.c:118) +cpuidle_idle_call (kernel/sched/idle.c:186) +? __pfx_cpuidle_idle_call (kernel/sched/idle.c:168) +? lock_release (kernel/locking/lockdep.c:467 kernel/locking/lockdep.c:5848) +? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4347 kernel/locking/lockdep.c:4406) +? tsc_verify_tsc_adjust (arch/x86/kernel/tsc_sync.c:59) +do_idle (kernel/sched/idle.c:326) +cpu_startup_entry (kernel/sched/idle.c:423 (discriminator 1)) +start_secondary (arch/x86/kernel/smpboot.c:202 arch/x86/kernel/smpboot.c:282) +? __pfx_start_secondary (arch/x86/kernel/smpboot.c:232) +? soft_restart_cpu (arch/x86/kernel/head_64.S:452) +common_startup_64 (arch/x86/kernel/head_64.S:414) + +Dec 03 05:46:18 kernel: +Allocated by task 12184: +kasan_save_stack (mm/kasan/common.c:48) +kasan_save_track (./arch/x86/include/asm/current.h:49 mm/kasan/common.c:60 mm/kasan/common.c:69) +__kasan_slab_alloc (mm/kasan/common.c:319 mm/kasan/common.c:345) +kmem_cache_alloc_noprof (mm/slub.c:4085 mm/slub.c:4134 mm/slub.c:4141) +copy_net_ns (net/core/net_namespace.c:421 net/core/net_namespace.c:480) +create_new_namespaces (kernel/nsproxy.c:110) +unshare_nsproxy_namespaces (kernel/nsproxy.c:228 (discriminator 4)) +ksys_unshare (kernel/fork.c:3313) +__x64_sys_unshare (kernel/fork.c:3382) +do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) +entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) +Dec 03 05:46:18 kernel: +Freed by task 11: +kasan_save_stack (mm/kasan/common.c:48) +kasan_save_track (./arch/x86/include/asm/current.h:49 mm/kasan/common.c:60 mm/kasan/common.c:69) +kasan_save_free_info (mm/kasan/generic.c:582) +__kasan_slab_free (mm/kasan/common.c:271) +kmem_cache_free (mm/slub.c:4579 mm/slub.c:4681) +cleanup_net (net/core/net_namespace.c:456 net/core/net_namespace.c:446 net/core/net_namespace.c:647) +process_one_work (kernel/workqueue.c:3229) +worker_thread (kernel/workqueue.c:3304 kernel/workqueue.c:3391) +kthread (kernel/kthread.c:389) +ret_from_fork (arch/x86/kernel/process.c:147) +ret_from_fork_asm (arch/x86/entry/entry_64.S:257) +Dec 03 05:46:18 kernel: +Last potentially related work creation: +kasan_save_stack (mm/kasan/common.c:48) +__kasan_record_aux_stack (mm/kasan/generic.c:541) +insert_work (./include/linux/instrumented.h:68 ./include/asm-generic/bitops/instrumented-non-atomic.h:141 kernel/workqueue.c:788 kernel/workqueue.c:795 kernel/workqueue.c:2186) +__queue_work (kernel/workqueue.c:2340) +queue_work_on (kernel/workqueue.c:2391) +xfrm_policy_insert (net/xfrm/xfrm_policy.c:1610) +xfrm_add_policy (net/xfrm/xfrm_user.c:2116) +xfrm_user_rcv_msg (net/xfrm/xfrm_user.c:3321) +netlink_rcv_skb (net/netlink/af_netlink.c:2536) +xfrm_netlink_rcv (net/xfrm/xfrm_user.c:3344) +netlink_unicast (net/netlink/af_netlink.c:1316 net/netlink/af_netlink.c:1342) +netlink_sendmsg (net/netlink/af_netlink.c:1886) +sock_write_iter (net/socket.c:729 net/socket.c:744 net/socket.c:1165) +vfs_write (fs/read_write.c:590 fs/read_write.c:683) +ksys_write (fs/read_write.c:736) +do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) +entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) +Dec 03 05:46:18 kernel: +Second to last potentially related work creation: +kasan_save_stack (mm/kasan/common.c:48) +__kasan_record_aux_stack (mm/kasan/generic.c:541) +insert_work (./include/linux/instrumented.h:68 ./include/asm-generic/bitops/instrumented-non-atomic.h:141 kernel/workqueue.c:788 kernel/workqueue.c:795 kernel/workqueue.c:2186) +__queue_work (kernel/workqueue.c:2340) +queue_work_on (kernel/workqueue.c:2391) +__xfrm_state_insert (./include/linux/workqueue.h:723 net/xfrm/xfrm_state.c:1150 net/xfrm/xfrm_state.c:1145 net/xfrm/xfrm_state.c:1513) +xfrm_state_update (./include/linux/spinlock.h:396 net/xfrm/xfrm_state.c:1940) +xfrm_add_sa (net/xfrm/xfrm_user.c:912) +xfrm_user_rcv_msg (net/xfrm/xfrm_user.c:3321) +netlink_rcv_skb (net/netlink/af_netlink.c:2536) +xfrm_netlink_rcv (net/xfrm/xfrm_user.c:3344) +netlink_unicast (net/netlink/af_netlink.c:1316 net/netlink/af_netlink.c:1342) +netlink_sendmsg (net/netlink/af_netlink.c:1886) +sock_write_iter (net/socket.c:729 net/socket.c:744 net/socket.c:1165) +vfs_write (fs/read_write.c:590 fs/read_write.c:683) +ksys_write (fs/read_write.c:736) +do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) +entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) + +Fixes: a8a572a6b5f2 ("xfrm: dst_entries_init() per-net dst_ops") +Reported-by: Ilya Maximets +Closes: https://lore.kernel.org/netdev/CANn89iKKYDVpB=MtmfH7nyv2p=rJWSLedO5k7wSZgtY_tO8WQg@mail.gmail.com/T/#m02c98c3009fe66382b73cfb4db9cf1df6fab3fbf +Signed-off-by: Eric Dumazet +Acked-by: Paolo Abeni +Reviewed-by: Kuniyuki Iwashima +Link: https://patch.msgid.link/20241204125455.3871859-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/net/net_namespace.h | 1 + + net/core/net_namespace.c | 20 +++++++++++++++++++- + 2 files changed, 20 insertions(+), 1 deletion(-) + +diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h +index 78beaa765c73..75241d170633 100644 +--- a/include/net/net_namespace.h ++++ b/include/net/net_namespace.h +@@ -81,6 +81,7 @@ struct net { + * or to unregister pernet ops + * (pernet_ops_rwsem write locked). + */ ++ struct llist_node defer_free_list; + struct llist_node cleanup_list; /* namespaces on death row */ + + #ifdef CONFIG_KEYS +diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c +index 0fe9fd2cf4e2..ddd33dcd6658 100644 +--- a/net/core/net_namespace.c ++++ b/net/core/net_namespace.c +@@ -436,6 +436,21 @@ static struct net *net_alloc(void) + goto out; + } + ++static LLIST_HEAD(defer_free_list); ++ ++static void net_complete_free(void) ++{ ++ struct llist_node *kill_list; ++ struct net *net, *next; ++ ++ /* Get the list of namespaces to free from last round. */ ++ kill_list = llist_del_all(&defer_free_list); ++ ++ llist_for_each_entry_safe(net, next, kill_list, defer_free_list) ++ kmem_cache_free(net_cachep, net); ++ ++} ++ + static void net_free(struct net *net) + { + if (refcount_dec_and_test(&net->passive)) { +@@ -444,7 +459,8 @@ static void net_free(struct net *net) + /* There should not be any trackers left there. */ + ref_tracker_dir_exit(&net->notrefcnt_tracker); + +- kmem_cache_free(net_cachep, net); ++ /* Wait for an extra rcu_barrier() before final free. */ ++ llist_add(&net->defer_free_list, &defer_free_list); + } + } + +@@ -619,6 +635,8 @@ static void cleanup_net(struct work_struct *work) + */ + rcu_barrier(); + ++ net_complete_free(); ++ + /* Finally it is safe to free my network namespace structure */ + list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { + list_del_init(&net->exit_list); +-- +2.39.5 + diff --git a/queue-6.1/net-dsa-felix-fix-stuck-cpu-injected-packets-with-sh.patch b/queue-6.1/net-dsa-felix-fix-stuck-cpu-injected-packets-with-sh.patch new file mode 100644 index 00000000000..601bc5c8816 --- /dev/null +++ b/queue-6.1/net-dsa-felix-fix-stuck-cpu-injected-packets-with-sh.patch @@ -0,0 +1,171 @@ +From 62ecac1356f16db8e6e32f130465c0eeaff4ea35 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 Dec 2024 15:26:40 +0200 +Subject: net: dsa: felix: fix stuck CPU-injected packets with short taprio + windows + +From: Vladimir Oltean + +[ Upstream commit acfcdb78d5d4cdb78e975210c8825b9a112463f6 ] + +With this port schedule: + +tc qdisc replace dev $send_if parent root handle 100 taprio \ + num_tc 8 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + map 0 1 2 3 4 5 6 7 \ + base-time 0 cycle-time 10000 \ + sched-entry S 01 1250 \ + sched-entry S 02 1250 \ + sched-entry S 04 1250 \ + sched-entry S 08 1250 \ + sched-entry S 10 1250 \ + sched-entry S 20 1250 \ + sched-entry S 40 1250 \ + sched-entry S 80 1250 \ + flags 2 + +ptp4l would fail to take TX timestamps of Pdelay_Resp messages like this: + +increasing tx_timestamp_timeout may correct this issue, but it is likely caused by a driver bug +ptp4l[4134.168]: port 2: send peer delay response failed + +It turns out that the driver can't take their TX timestamps because it +can't transmit them in the first place. And there's nothing special +about the Pdelay_Resp packets - they're just regular 68 byte packets. +But with this taprio configuration, the switch would refuse to send even +the ETH_ZLEN minimum packet size. + +This should have definitely not been the case. When applying the taprio +config, the driver prints: + +mscc_felix 0000:00:00.5: port 0 tc 0 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 132 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 1 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 132 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 2 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 132 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 3 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 132 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 4 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 132 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 5 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 132 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 6 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 132 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 7 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 132 octets including FCS + +and thus, everything under 132 bytes - ETH_FCS_LEN should have been sent +without problems. Yet it's not. + +For the forwarding path, the configuration is fine, yet packets injected +from Linux get stuck with this schedule no matter what. + +The first hint that the static guard bands are the cause of the problem +is that reverting Michael Walle's commit 297c4de6f780 ("net: dsa: felix: +re-enable TAS guard band mode") made things work. It must be that the +guard bands are calculated incorrectly. + +I remembered that there is a magic constant in the driver, set to 33 ns +for no logical reason other than experimentation, which says "never let +the static guard bands get so large as to leave less than this amount of +remaining space in the time slot, because the queue system will refuse +to schedule packets otherwise, and they will get stuck". I had a hunch +that my previous experimentally-determined value was only good for +packets coming from the forwarding path, and that the CPU injection path +needed more. + +I came to the new value of 35 ns through binary search, after seeing +that with 544 ns (the bit time required to send the Pdelay_Resp packet +at gigabit) it works. Again, this is purely experimental, there's no +logic and the manual doesn't say anything. + +The new driver prints for this schedule look like this: + +mscc_felix 0000:00:00.5: port 0 tc 0 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 131 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 1 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 131 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 2 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 131 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 3 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 131 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 4 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 131 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 5 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 131 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 6 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 131 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 7 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 131 octets including FCS + +So yes, the maximum MTU is now even smaller by 1 byte than before. +This is maybe counter-intuitive, but makes more sense with a diagram of +one time slot. + +Before: + + Gate open Gate close + | | + v 1250 ns total time slot duration v + <----------------------------------------------------> + <----><----------------------------------------------> + 33 ns 1217 ns static guard band + useful + + Gate open Gate close + | | + v 1250 ns total time slot duration v + <----------------------------------------------------> + <-----><---------------------------------------------> + 35 ns 1215 ns static guard band + useful + +The static guard band implemented by this switch hardware directly +determines the maximum allowable MTU for that traffic class. The larger +it is, the earlier the switch will stop scheduling frames for +transmission, because otherwise they might overrun the gate close time +(and avoiding that is the entire purpose of Michael's patch). +So, we now have guard bands smaller by 2 ns, thus, in this particular +case, we lose a byte of the maximum MTU. + +Fixes: 11afdc6526de ("net: dsa: felix: tc-taprio intervals smaller than MTU should send at least one packet") +Signed-off-by: Vladimir Oltean +Reviewed-by: Michael Walle +Link: https://patch.msgid.link/20241210132640.3426788-1-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/ocelot/felix_vsc9959.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c +index 0186482194d2..391c4e3cb66f 100644 +--- a/drivers/net/dsa/ocelot/felix_vsc9959.c ++++ b/drivers/net/dsa/ocelot/felix_vsc9959.c +@@ -22,7 +22,7 @@ + #define VSC9959_NUM_PORTS 6 + + #define VSC9959_TAS_GCL_ENTRY_MAX 63 +-#define VSC9959_TAS_MIN_GATE_LEN_NS 33 ++#define VSC9959_TAS_MIN_GATE_LEN_NS 35 + #define VSC9959_VCAP_POLICER_BASE 63 + #define VSC9959_VCAP_POLICER_MAX 383 + #define VSC9959_SWITCH_PCI_BAR 4 +@@ -1057,11 +1057,15 @@ static void vsc9959_mdio_bus_free(struct ocelot *ocelot) + mdiobus_free(felix->imdio); + } + +-/* The switch considers any frame (regardless of size) as eligible for +- * transmission if the traffic class gate is open for at least 33 ns. ++/* The switch considers any frame (regardless of size) as eligible ++ * for transmission if the traffic class gate is open for at least ++ * VSC9959_TAS_MIN_GATE_LEN_NS. ++ * + * Overruns are prevented by cropping an interval at the end of the gate time +- * slot for which egress scheduling is blocked, but we need to still keep 33 ns +- * available for one packet to be transmitted, otherwise the port tc will hang. ++ * slot for which egress scheduling is blocked, but we need to still keep ++ * VSC9959_TAS_MIN_GATE_LEN_NS available for one packet to be transmitted, ++ * otherwise the port tc will hang. ++ * + * This function returns the size of a gate interval that remains available for + * setting the guard band, after reserving the space for one egress frame. + */ +@@ -1293,7 +1297,8 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port) + * per-tc static guard band lengths, so it reduces the + * useful gate interval length. Therefore, be careful + * to calculate a guard band (and therefore max_sdu) +- * that still leaves 33 ns available in the time slot. ++ * that still leaves VSC9959_TAS_MIN_GATE_LEN_NS ++ * available in the time slot. + */ + max_sdu = div_u64(remaining_gate_len_ps, picos_per_byte); + /* A TC gate may be completely closed, which is a +-- +2.39.5 + diff --git a/queue-6.1/net-lapb-increase-lapb_header_len.patch b/queue-6.1/net-lapb-increase-lapb_header_len.patch new file mode 100644 index 00000000000..887af228aee --- /dev/null +++ b/queue-6.1/net-lapb-increase-lapb_header_len.patch @@ -0,0 +1,86 @@ +From b6bc6cecc4fbaabcf44e45d7c8205f910e51caaa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Dec 2024 14:10:31 +0000 +Subject: net: lapb: increase LAPB_HEADER_LEN + +From: Eric Dumazet + +[ Upstream commit a6d75ecee2bf828ac6a1b52724aba0a977e4eaf4 ] + +It is unclear if net/lapb code is supposed to be ready for 8021q. + +We can at least avoid crashes like the following : + +skbuff: skb_under_panic: text:ffffffff8aabe1f6 len:24 put:20 head:ffff88802824a400 data:ffff88802824a3fe tail:0x16 end:0x140 dev:nr0.2 +------------[ cut here ]------------ + kernel BUG at net/core/skbuff.c:206 ! +Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI +CPU: 1 UID: 0 PID: 5508 Comm: dhcpcd Not tainted 6.12.0-rc7-syzkaller-00144-g66418447d27b #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/30/2024 + RIP: 0010:skb_panic net/core/skbuff.c:206 [inline] + RIP: 0010:skb_under_panic+0x14b/0x150 net/core/skbuff.c:216 +Code: 0d 8d 48 c7 c6 2e 9e 29 8e 48 8b 54 24 08 8b 0c 24 44 8b 44 24 04 4d 89 e9 50 41 54 41 57 41 56 e8 1a 6f 37 02 48 83 c4 20 90 <0f> 0b 0f 1f 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 +RSP: 0018:ffffc90002ddf638 EFLAGS: 00010282 +RAX: 0000000000000086 RBX: dffffc0000000000 RCX: 7a24750e538ff600 +RDX: 0000000000000000 RSI: 0000000000000201 RDI: 0000000000000000 +RBP: ffff888034a86650 R08: ffffffff8174b13c R09: 1ffff920005bbe60 +R10: dffffc0000000000 R11: fffff520005bbe61 R12: 0000000000000140 +R13: ffff88802824a400 R14: ffff88802824a3fe R15: 0000000000000016 +FS: 00007f2a5990d740(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 000000110c2631fd CR3: 0000000029504000 CR4: 00000000003526f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + skb_push+0xe5/0x100 net/core/skbuff.c:2636 + nr_header+0x36/0x320 net/netrom/nr_dev.c:69 + dev_hard_header include/linux/netdevice.h:3148 [inline] + vlan_dev_hard_header+0x359/0x480 net/8021q/vlan_dev.c:83 + dev_hard_header include/linux/netdevice.h:3148 [inline] + lapbeth_data_transmit+0x1f6/0x2a0 drivers/net/wan/lapbether.c:257 + lapb_data_transmit+0x91/0xb0 net/lapb/lapb_iface.c:447 + lapb_transmit_buffer+0x168/0x1f0 net/lapb/lapb_out.c:149 + lapb_establish_data_link+0x84/0xd0 + lapb_device_event+0x4e0/0x670 + notifier_call_chain+0x19f/0x3e0 kernel/notifier.c:93 + __dev_notify_flags+0x207/0x400 + dev_change_flags+0xf0/0x1a0 net/core/dev.c:8922 + devinet_ioctl+0xa4e/0x1aa0 net/ipv4/devinet.c:1188 + inet_ioctl+0x3d7/0x4f0 net/ipv4/af_inet.c:1003 + sock_do_ioctl+0x158/0x460 net/socket.c:1227 + sock_ioctl+0x626/0x8e0 net/socket.c:1346 + vfs_ioctl fs/ioctl.c:51 [inline] + __do_sys_ioctl fs/ioctl.c:907 [inline] + __se_sys_ioctl+0xf9/0x170 fs/ioctl.c:893 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: syzbot+fb99d1b0c0f81d94a5e2@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/netdev/67506220.050a0220.17bd51.006c.GAE@google.com/T/#u +Signed-off-by: Eric Dumazet +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20241204141031.4030267-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/net/lapb.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/net/lapb.h b/include/net/lapb.h +index 124ee122f2c8..6c07420644e4 100644 +--- a/include/net/lapb.h ++++ b/include/net/lapb.h +@@ -4,7 +4,7 @@ + #include + #include + +-#define LAPB_HEADER_LEN 20 /* LAPB over Ethernet + a bit more */ ++#define LAPB_HEADER_LEN MAX_HEADER /* LAPB over Ethernet + a bit more */ + + #define LAPB_ACK_PENDING_CONDITION 0x01 + #define LAPB_REJECT_CONDITION 0x02 +-- +2.39.5 + diff --git a/queue-6.1/net-mlx5-dr-prevent-potential-error-pointer-derefere.patch b/queue-6.1/net-mlx5-dr-prevent-potential-error-pointer-derefere.patch new file mode 100644 index 00000000000..a7d48230ee8 --- /dev/null +++ b/queue-6.1/net-mlx5-dr-prevent-potential-error-pointer-derefere.patch @@ -0,0 +1,43 @@ +From 2b8f36bc073a7b87bce748787685616643cb33b2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Dec 2024 15:06:41 +0300 +Subject: net/mlx5: DR, prevent potential error pointer dereference + +From: Dan Carpenter + +[ Upstream commit 11776cff0b563c8b8a4fa76cab620bfb633a8cb8 ] + +The dr_domain_add_vport_cap() function generally returns NULL on error +but sometimes we want it to return ERR_PTR(-EBUSY) so the caller can +retry. The problem here is that "ret" can be either -EBUSY or -ENOMEM +and if it's and -ENOMEM then the error pointer is propogated back and +eventually dereferenced in dr_ste_v0_build_src_gvmi_qpn_tag(). + +Fixes: 11a45def2e19 ("net/mlx5: DR, Add support for SF vports") +Signed-off-by: Dan Carpenter +Reviewed-by: Tariq Toukan +Link: https://patch.msgid.link/07477254-e179-43e2-b1b3-3b9db4674195@stanley.mountain +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +index fc6ae49b5ecc..d462017c6a95 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +@@ -195,7 +195,9 @@ dr_domain_add_vport_cap(struct mlx5dr_domain *dmn, u16 vport) + if (ret) { + mlx5dr_dbg(dmn, "Couldn't insert new vport into xarray (%d)\n", ret); + kvfree(vport_caps); +- return ERR_PTR(ret); ++ if (ret == -EBUSY) ++ return ERR_PTR(-EBUSY); ++ return NULL; + } + + return vport_caps; +-- +2.39.5 + diff --git a/queue-6.1/net-mscc-ocelot-be-resilient-to-loss-of-ptp-packets-.patch b/queue-6.1/net-mscc-ocelot-be-resilient-to-loss-of-ptp-packets-.patch new file mode 100644 index 00000000000..9d47d6b3ef7 --- /dev/null +++ b/queue-6.1/net-mscc-ocelot-be-resilient-to-loss-of-ptp-packets-.patch @@ -0,0 +1,404 @@ +From 352ee48d4fc460d401e1d4bb3b33fad7d8b2dc12 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 16:55:18 +0200 +Subject: net: mscc: ocelot: be resilient to loss of PTP packets during + transmission + +From: Vladimir Oltean + +[ Upstream commit b454abfab52543c44b581afc807b9f97fc1e7a3a ] + +The Felix DSA driver presents unique challenges that make the simplistic +ocelot PTP TX timestamping procedure unreliable: any transmitted packet +may be lost in hardware before it ever leaves our local system. + +This may happen because there is congestion on the DSA conduit, the +switch CPU port or even user port (Qdiscs like taprio may delay packets +indefinitely by design). + +The technical problem is that the kernel, i.e. ocelot_port_add_txtstamp_skb(), +runs out of timestamp IDs eventually, because it never detects that +packets are lost, and keeps the IDs of the lost packets on hold +indefinitely. The manifestation of the issue once the entire timestamp +ID range becomes busy looks like this in dmesg: + +mscc_felix 0000:00:00.5: port 0 delivering skb without TX timestamp +mscc_felix 0000:00:00.5: port 1 delivering skb without TX timestamp + +At the surface level, we need a timeout timer so that the kernel knows a +timestamp ID is available again. But there is a deeper problem with the +implementation, which is the monotonically increasing ocelot_port->ts_id. +In the presence of packet loss, it will be impossible to detect that and +reuse one of the holes created in the range of free timestamp IDs. + +What we actually need is a bitmap of 63 timestamp IDs tracking which one +is available. That is able to use up holes caused by packet loss, but +also gives us a unique opportunity to not implement an actual timer_list +for the timeout timer (very complicated in terms of locking). + +We could only declare a timestamp ID stale on demand (lazily), aka when +there's no other timestamp ID available. There are pros and cons to this +approach: the implementation is much more simple than per-packet timers +would be, but most of the stale packets would be quasi-leaked - not +really leaked, but blocked in driver memory, since this algorithm sees +no reason to free them. + +An improved technique would be to check for stale timestamp IDs every +time we allocate a new one. Assuming a constant flux of PTP packets, +this avoids stale packets being blocked in memory, but of course, +packets lost at the end of the flux are still blocked until the flux +resumes (nobody left to kick them out). + +Since implementing per-packet timers is way too complicated, this should +be good enough. + +Testing procedure: + +Persistently block traffic class 5 and try to run PTP on it: +$ tc qdisc replace dev swp3 parent root taprio num_tc 8 \ + map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + base-time 0 sched-entry S 0xdf 100000 flags 0x2 +[ 126.948141] mscc_felix 0000:00:00.5: port 3 tc 5 min gate length 0 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 1 octets including FCS +$ ptp4l -i swp3 -2 -P -m --socket_priority 5 --fault_reset_interval ASAP --logSyncInterval -3 +ptp4l[70.351]: port 1 (swp3): INITIALIZING to LISTENING on INIT_COMPLETE +ptp4l[70.354]: port 0 (/var/run/ptp4l): INITIALIZING to LISTENING on INIT_COMPLETE +ptp4l[70.358]: port 0 (/var/run/ptp4lro): INITIALIZING to LISTENING on INIT_COMPLETE +[ 70.394583] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +ptp4l[70.406]: timed out while polling for tx timestamp +ptp4l[70.406]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it +ptp4l[70.406]: port 1 (swp3): send peer delay response failed +ptp4l[70.407]: port 1 (swp3): clearing fault immediately +ptp4l[70.952]: port 1 (swp3): new foreign master d858d7.fffe.00ca6d-1 +[ 71.394858] mscc_felix 0000:00:00.5: port 3 timestamp id 1 +ptp4l[71.400]: timed out while polling for tx timestamp +ptp4l[71.400]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it +ptp4l[71.401]: port 1 (swp3): send peer delay response failed +ptp4l[71.401]: port 1 (swp3): clearing fault immediately +[ 72.393616] mscc_felix 0000:00:00.5: port 3 timestamp id 2 +ptp4l[72.401]: timed out while polling for tx timestamp +ptp4l[72.402]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it +ptp4l[72.402]: port 1 (swp3): send peer delay response failed +ptp4l[72.402]: port 1 (swp3): clearing fault immediately +ptp4l[72.952]: port 1 (swp3): new foreign master d858d7.fffe.00ca6d-1 +[ 73.395291] mscc_felix 0000:00:00.5: port 3 timestamp id 3 +ptp4l[73.400]: timed out while polling for tx timestamp +ptp4l[73.400]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it +ptp4l[73.400]: port 1 (swp3): send peer delay response failed +ptp4l[73.400]: port 1 (swp3): clearing fault immediately +[ 74.394282] mscc_felix 0000:00:00.5: port 3 timestamp id 4 +ptp4l[74.400]: timed out while polling for tx timestamp +ptp4l[74.401]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it +ptp4l[74.401]: port 1 (swp3): send peer delay response failed +ptp4l[74.401]: port 1 (swp3): clearing fault immediately +ptp4l[74.953]: port 1 (swp3): new foreign master d858d7.fffe.00ca6d-1 +[ 75.396830] mscc_felix 0000:00:00.5: port 3 invalidating stale timestamp ID 0 which seems lost +[ 75.405760] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +ptp4l[75.410]: timed out while polling for tx timestamp +ptp4l[75.411]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it +ptp4l[75.411]: port 1 (swp3): send peer delay response failed +ptp4l[75.411]: port 1 (swp3): clearing fault immediately +(...) + +Remove the blocking condition and see that the port recovers: +$ same tc command as above, but use "sched-entry S 0xff" instead +$ same ptp4l command as above +ptp4l[99.489]: port 1 (swp3): INITIALIZING to LISTENING on INIT_COMPLETE +ptp4l[99.490]: port 0 (/var/run/ptp4l): INITIALIZING to LISTENING on INIT_COMPLETE +ptp4l[99.492]: port 0 (/var/run/ptp4lro): INITIALIZING to LISTENING on INIT_COMPLETE +[ 100.403768] mscc_felix 0000:00:00.5: port 3 invalidating stale timestamp ID 0 which seems lost +[ 100.412545] mscc_felix 0000:00:00.5: port 3 invalidating stale timestamp ID 1 which seems lost +[ 100.421283] mscc_felix 0000:00:00.5: port 3 invalidating stale timestamp ID 2 which seems lost +[ 100.430015] mscc_felix 0000:00:00.5: port 3 invalidating stale timestamp ID 3 which seems lost +[ 100.438744] mscc_felix 0000:00:00.5: port 3 invalidating stale timestamp ID 4 which seems lost +[ 100.447470] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 100.505919] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +ptp4l[100.963]: port 1 (swp3): new foreign master d858d7.fffe.00ca6d-1 +[ 101.405077] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 101.507953] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 102.405405] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 102.509391] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 103.406003] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 103.510011] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 104.405601] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 104.510624] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +ptp4l[104.965]: selected best master clock d858d7.fffe.00ca6d +ptp4l[104.966]: port 1 (swp3): assuming the grand master role +ptp4l[104.967]: port 1 (swp3): LISTENING to GRAND_MASTER on RS_GRAND_MASTER +[ 105.106201] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 105.232420] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 105.359001] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 105.405500] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 105.485356] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 105.511220] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 105.610938] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 105.737237] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +(...) + +Notice that in this new usage pattern, a non-congested port should +basically use timestamp ID 0 all the time, progressing to higher numbers +only if there are unacknowledged timestamps in flight. Compare this to +the old usage, where the timestamp ID used to monotonically increase +modulo OCELOT_MAX_PTP_ID. + +In terms of implementation, this simplifies the bookkeeping of the +ocelot_port :: ts_id and ptp_skbs_in_flight. Since we need to traverse +the list of two-step timestampable skbs for each new packet anyway, the +information can already be computed and does not need to be stored. +Also, ocelot_port->tx_skbs is always accessed under the switch-wide +ocelot->ts_id_lock IRQ-unsafe spinlock, so we don't need the skb queue's +lock and can use the unlocked primitives safely. + +This problem was actually detected using the tc-taprio offload, and is +causing trouble in TSN scenarios, which Felix (NXP LS1028A / VSC9959) +supports but Ocelot (VSC7514) does not. Thus, I've selected the commit +to blame as the one adding initial timestamping support for the Felix +switch. + +Fixes: c0bcf537667c ("net: dsa: ocelot: add hardware timestamping support for Felix") +Signed-off-by: Vladimir Oltean +Link: https://patch.msgid.link/20241205145519.1236778-5-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mscc/ocelot_ptp.c | 134 +++++++++++++++---------- + include/linux/dsa/ocelot.h | 1 + + include/soc/mscc/ocelot.h | 2 - + 3 files changed, 80 insertions(+), 57 deletions(-) + +diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c +index c54e96ff3976..bc44aa635d49 100644 +--- a/drivers/net/ethernet/mscc/ocelot_ptp.c ++++ b/drivers/net/ethernet/mscc/ocelot_ptp.c +@@ -14,6 +14,8 @@ + #include + #include "ocelot.h" + ++#define OCELOT_PTP_TX_TSTAMP_TIMEOUT (5 * HZ) ++ + int ocelot_ptp_gettime64(struct ptp_clock_info *ptp, struct timespec64 *ts) + { + struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info); +@@ -607,34 +609,88 @@ int ocelot_get_ts_info(struct ocelot *ocelot, int port, + } + EXPORT_SYMBOL(ocelot_get_ts_info); + +-static int ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port, ++static struct sk_buff *ocelot_port_dequeue_ptp_tx_skb(struct ocelot *ocelot, ++ int port, u8 ts_id, ++ u32 seqid) ++{ ++ struct ocelot_port *ocelot_port = ocelot->ports[port]; ++ struct sk_buff *skb, *skb_tmp, *skb_match = NULL; ++ struct ptp_header *hdr; ++ ++ spin_lock(&ocelot->ts_id_lock); ++ ++ skb_queue_walk_safe(&ocelot_port->tx_skbs, skb, skb_tmp) { ++ if (OCELOT_SKB_CB(skb)->ts_id != ts_id) ++ continue; ++ ++ /* Check that the timestamp ID is for the expected PTP ++ * sequenceId. We don't have to test ptp_parse_header() against ++ * NULL, because we've pre-validated the packet's ptp_class. ++ */ ++ hdr = ptp_parse_header(skb, OCELOT_SKB_CB(skb)->ptp_class); ++ if (seqid != ntohs(hdr->sequence_id)) ++ continue; ++ ++ __skb_unlink(skb, &ocelot_port->tx_skbs); ++ ocelot->ptp_skbs_in_flight--; ++ skb_match = skb; ++ break; ++ } ++ ++ spin_unlock(&ocelot->ts_id_lock); ++ ++ return skb_match; ++} ++ ++static int ocelot_port_queue_ptp_tx_skb(struct ocelot *ocelot, int port, + struct sk_buff *clone) + { + struct ocelot_port *ocelot_port = ocelot->ports[port]; ++ DECLARE_BITMAP(ts_id_in_flight, OCELOT_MAX_PTP_ID); ++ struct sk_buff *skb, *skb_tmp; ++ unsigned long n; + + spin_lock(&ocelot->ts_id_lock); + +- if (ocelot_port->ptp_skbs_in_flight == OCELOT_MAX_PTP_ID || +- ocelot->ptp_skbs_in_flight == OCELOT_PTP_FIFO_SIZE) { ++ /* To get a better chance of acquiring a timestamp ID, first flush the ++ * stale packets still waiting in the TX timestamping queue. They are ++ * probably lost. ++ */ ++ skb_queue_walk_safe(&ocelot_port->tx_skbs, skb, skb_tmp) { ++ if (time_before(OCELOT_SKB_CB(skb)->ptp_tx_time + ++ OCELOT_PTP_TX_TSTAMP_TIMEOUT, jiffies)) { ++ dev_warn_ratelimited(ocelot->dev, ++ "port %d invalidating stale timestamp ID %u which seems lost\n", ++ port, OCELOT_SKB_CB(skb)->ts_id); ++ __skb_unlink(skb, &ocelot_port->tx_skbs); ++ kfree_skb(skb); ++ ocelot->ptp_skbs_in_flight--; ++ } else { ++ __set_bit(OCELOT_SKB_CB(skb)->ts_id, ts_id_in_flight); ++ } ++ } ++ ++ if (ocelot->ptp_skbs_in_flight == OCELOT_PTP_FIFO_SIZE) { + spin_unlock(&ocelot->ts_id_lock); + return -EBUSY; + } + +- skb_shinfo(clone)->tx_flags |= SKBTX_IN_PROGRESS; +- /* Store timestamp ID in OCELOT_SKB_CB(clone)->ts_id */ +- OCELOT_SKB_CB(clone)->ts_id = ocelot_port->ts_id; +- +- ocelot_port->ts_id++; +- if (ocelot_port->ts_id == OCELOT_MAX_PTP_ID) +- ocelot_port->ts_id = 0; ++ n = find_first_zero_bit(ts_id_in_flight, OCELOT_MAX_PTP_ID); ++ if (n == OCELOT_MAX_PTP_ID) { ++ spin_unlock(&ocelot->ts_id_lock); ++ return -EBUSY; ++ } + +- ocelot_port->ptp_skbs_in_flight++; ++ /* Found an available timestamp ID, use it */ ++ OCELOT_SKB_CB(clone)->ts_id = n; ++ OCELOT_SKB_CB(clone)->ptp_tx_time = jiffies; + ocelot->ptp_skbs_in_flight++; +- +- skb_queue_tail(&ocelot_port->tx_skbs, clone); ++ __skb_queue_tail(&ocelot_port->tx_skbs, clone); + + spin_unlock(&ocelot->ts_id_lock); + ++ dev_dbg_ratelimited(ocelot->dev, "port %d timestamp id %lu\n", port, n); ++ + return 0; + } + +@@ -690,12 +746,14 @@ int ocelot_port_txtstamp_request(struct ocelot *ocelot, int port, + if (!(*clone)) + return -ENOMEM; + +- err = ocelot_port_add_txtstamp_skb(ocelot, port, *clone); ++ /* Store timestamp ID in OCELOT_SKB_CB(clone)->ts_id */ ++ err = ocelot_port_queue_ptp_tx_skb(ocelot, port, *clone); + if (err) { + kfree_skb(*clone); + return err; + } + ++ skb_shinfo(*clone)->tx_flags |= SKBTX_IN_PROGRESS; + OCELOT_SKB_CB(skb)->ptp_cmd = ptp_cmd; + OCELOT_SKB_CB(*clone)->ptp_class = ptp_class; + } +@@ -731,26 +789,14 @@ static void ocelot_get_hwtimestamp(struct ocelot *ocelot, + spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags); + } + +-static bool ocelot_validate_ptp_skb(struct sk_buff *clone, u16 seqid) +-{ +- struct ptp_header *hdr; +- +- hdr = ptp_parse_header(clone, OCELOT_SKB_CB(clone)->ptp_class); +- if (WARN_ON(!hdr)) +- return false; +- +- return seqid == ntohs(hdr->sequence_id); +-} +- + void ocelot_get_txtstamp(struct ocelot *ocelot) + { + int budget = OCELOT_PTP_QUEUE_SZ; + + while (budget--) { +- struct sk_buff *skb, *skb_tmp, *skb_match = NULL; + struct skb_shared_hwtstamps shhwtstamps; + u32 val, id, seqid, txport; +- struct ocelot_port *port; ++ struct sk_buff *skb_match; + struct timespec64 ts; + + val = ocelot_read(ocelot, SYS_PTP_STATUS); +@@ -766,36 +812,14 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) + txport = SYS_PTP_STATUS_PTP_MESS_TXPORT_X(val); + seqid = SYS_PTP_STATUS_PTP_MESS_SEQ_ID(val); + +- port = ocelot->ports[txport]; +- +- spin_lock(&ocelot->ts_id_lock); +- port->ptp_skbs_in_flight--; +- ocelot->ptp_skbs_in_flight--; +- spin_unlock(&ocelot->ts_id_lock); +- + /* Retrieve its associated skb */ +-try_again: +- spin_lock(&port->tx_skbs.lock); +- +- skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) { +- if (OCELOT_SKB_CB(skb)->ts_id != id) +- continue; +- __skb_unlink(skb, &port->tx_skbs); +- skb_match = skb; +- break; +- } +- +- spin_unlock(&port->tx_skbs.lock); +- +- if (WARN_ON(!skb_match)) ++ skb_match = ocelot_port_dequeue_ptp_tx_skb(ocelot, txport, id, ++ seqid); ++ if (!skb_match) { ++ dev_warn_ratelimited(ocelot->dev, ++ "port %d received TX timestamp (seqid %d, ts id %u) for packet previously declared stale\n", ++ txport, seqid, id); + goto next_ts; +- +- if (!ocelot_validate_ptp_skb(skb_match, seqid)) { +- dev_err_ratelimited(ocelot->dev, +- "port %d received stale TX timestamp for seqid %d, discarding\n", +- txport, seqid); +- kfree_skb(skb); +- goto try_again; + } + + /* Get the h/w timestamp */ +diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h +index 6fbfbde68a37..620a3260fc08 100644 +--- a/include/linux/dsa/ocelot.h ++++ b/include/linux/dsa/ocelot.h +@@ -15,6 +15,7 @@ + struct ocelot_skb_cb { + struct sk_buff *clone; + unsigned int ptp_class; /* valid only for clones */ ++ unsigned long ptp_tx_time; /* valid only for clones */ + u32 tstamp_lo; + u8 ptp_cmd; + u8 ts_id; +diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h +index 9b5562f54548..99cfaa9347c9 100644 +--- a/include/soc/mscc/ocelot.h ++++ b/include/soc/mscc/ocelot.h +@@ -941,7 +941,6 @@ struct ocelot_port { + + phy_interface_t phy_mode; + +- unsigned int ptp_skbs_in_flight; + struct sk_buff_head tx_skbs; + + unsigned int trap_proto; +@@ -949,7 +948,6 @@ struct ocelot_port { + u16 mrp_ring_id; + + u8 ptp_cmd; +- u8 ts_id; + + u8 index; + +-- +2.39.5 + diff --git a/queue-6.1/net-mscc-ocelot-fix-memory-leak-on-ocelot_port_add_t.patch b/queue-6.1/net-mscc-ocelot-fix-memory-leak-on-ocelot_port_add_t.patch new file mode 100644 index 00000000000..535641fac9c --- /dev/null +++ b/queue-6.1/net-mscc-ocelot-fix-memory-leak-on-ocelot_port_add_t.patch @@ -0,0 +1,41 @@ +From 7a1c076e79a557ae5a670b82eaa6c90bcd43fa3e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 16:55:15 +0200 +Subject: net: mscc: ocelot: fix memory leak on ocelot_port_add_txtstamp_skb() + +From: Vladimir Oltean + +[ Upstream commit 4b01bec25bef62544228bce06db6a3afa5d3d6bb ] + +If ocelot_port_add_txtstamp_skb() fails, for example due to a full PTP +timestamp FIFO, we must undo the skb_clone_sk() call with kfree_skb(). +Otherwise, the reference to the skb clone is lost. + +Fixes: 52849bcf0029 ("net: mscc: ocelot: avoid overflowing the PTP timestamp FIFO") +Signed-off-by: Vladimir Oltean +Link: https://patch.msgid.link/20241205145519.1236778-2-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mscc/ocelot_ptp.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c +index cb32234a5bf1..3c22652879ac 100644 +--- a/drivers/net/ethernet/mscc/ocelot_ptp.c ++++ b/drivers/net/ethernet/mscc/ocelot_ptp.c +@@ -692,8 +692,10 @@ int ocelot_port_txtstamp_request(struct ocelot *ocelot, int port, + return -ENOMEM; + + err = ocelot_port_add_txtstamp_skb(ocelot, port, *clone); +- if (err) ++ if (err) { ++ kfree_skb(*clone); + return err; ++ } + + OCELOT_SKB_CB(skb)->ptp_cmd = ptp_cmd; + OCELOT_SKB_CB(*clone)->ptp_class = ptp_class; +-- +2.39.5 + diff --git a/queue-6.1/net-mscc-ocelot-improve-handling-of-tx-timestamp-for.patch b/queue-6.1/net-mscc-ocelot-improve-handling-of-tx-timestamp-for.patch new file mode 100644 index 00000000000..0255f1a2900 --- /dev/null +++ b/queue-6.1/net-mscc-ocelot-improve-handling-of-tx-timestamp-for.patch @@ -0,0 +1,54 @@ +From 14debaad48994d49905113ff4ec1edc07e2d6da5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 16:55:16 +0200 +Subject: net: mscc: ocelot: improve handling of TX timestamp for unknown skb + +From: Vladimir Oltean + +[ Upstream commit b6fba4b3f0becb794e274430f3a0839d8ba31262 ] + +This condition, theoretically impossible to trigger, is not really +handled well. By "continuing", we are skipping the write to SYS_PTP_NXT +which advances the timestamp FIFO to the next entry. So we are reading +the same FIFO entry all over again, printing stack traces and eventually +killing the kernel. + +No real problem has been observed here. This is part of a larger rework +of the timestamp IRQ procedure, with this logical change split out into +a patch of its own. We will need to "goto next_ts" for other conditions +as well. + +Fixes: 9fde506e0c53 ("net: mscc: ocelot: warn when a PTP IRQ is raised for an unknown skb") +Signed-off-by: Vladimir Oltean +Link: https://patch.msgid.link/20241205145519.1236778-3-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mscc/ocelot_ptp.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c +index 3c22652879ac..1386fb2ff4a9 100644 +--- a/drivers/net/ethernet/mscc/ocelot_ptp.c ++++ b/drivers/net/ethernet/mscc/ocelot_ptp.c +@@ -790,7 +790,7 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) + spin_unlock_irqrestore(&port->tx_skbs.lock, flags); + + if (WARN_ON(!skb_match)) +- continue; ++ goto next_ts; + + if (!ocelot_validate_ptp_skb(skb_match, seqid)) { + dev_err_ratelimited(ocelot->dev, +@@ -808,7 +808,7 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) + shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec); + skb_complete_tx_timestamp(skb_match, &shhwtstamps); + +- /* Next ts */ ++next_ts: + ocelot_write(ocelot, SYS_PTP_NXT_PTP_NXT, SYS_PTP_NXT); + } + } +-- +2.39.5 + diff --git a/queue-6.1/net-mscc-ocelot-ocelot-ts_id_lock-and-ocelot_port-tx.patch b/queue-6.1/net-mscc-ocelot-ocelot-ts_id_lock-and-ocelot_port-tx.patch new file mode 100644 index 00000000000..cd49c80a3d4 --- /dev/null +++ b/queue-6.1/net-mscc-ocelot-ocelot-ts_id_lock-and-ocelot_port-tx.patch @@ -0,0 +1,104 @@ +From a95982fea27e534fc95cd17e7ac12ca7dd8c59f4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 16:55:17 +0200 +Subject: net: mscc: ocelot: ocelot->ts_id_lock and ocelot_port->tx_skbs.lock + are IRQ-safe + +From: Vladimir Oltean + +[ Upstream commit 0c53cdb95eb4a604062e326636971d96dd9b1b26 ] + +ocelot_get_txtstamp() is a threaded IRQ handler, requested explicitly as +such by both ocelot_ptp_rdy_irq_handler() and vsc9959_irq_handler(). + +As such, it runs with IRQs enabled, and not in hardirq context. Thus, +ocelot_port_add_txtstamp_skb() has no reason to turn off IRQs, it cannot +be preempted by ocelot_get_txtstamp(). For the same reason, +dev_kfree_skb_any_reason() will always evaluate as kfree_skb_reason() in +this calling context, so just simplify the dev_kfree_skb_any() call to +kfree_skb(). + +Also, ocelot_port_txtstamp_request() runs from NET_TX softirq context, +not with hardirqs enabled. Thus, ocelot_get_txtstamp() which shares the +ocelot_port->tx_skbs.lock lock with it, has no reason to disable hardirqs. + +This is part of a larger rework of the TX timestamping procedure. +A logical subportion of the rework has been split into a separate +change. + +Signed-off-by: Vladimir Oltean +Link: https://patch.msgid.link/20241205145519.1236778-4-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: b454abfab525 ("net: mscc: ocelot: be resilient to loss of PTP packets during transmission") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mscc/ocelot_ptp.c | 14 ++++++-------- + 1 file changed, 6 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c +index 1386fb2ff4a9..c54e96ff3976 100644 +--- a/drivers/net/ethernet/mscc/ocelot_ptp.c ++++ b/drivers/net/ethernet/mscc/ocelot_ptp.c +@@ -611,13 +611,12 @@ static int ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port, + struct sk_buff *clone) + { + struct ocelot_port *ocelot_port = ocelot->ports[port]; +- unsigned long flags; + +- spin_lock_irqsave(&ocelot->ts_id_lock, flags); ++ spin_lock(&ocelot->ts_id_lock); + + if (ocelot_port->ptp_skbs_in_flight == OCELOT_MAX_PTP_ID || + ocelot->ptp_skbs_in_flight == OCELOT_PTP_FIFO_SIZE) { +- spin_unlock_irqrestore(&ocelot->ts_id_lock, flags); ++ spin_unlock(&ocelot->ts_id_lock); + return -EBUSY; + } + +@@ -634,7 +633,7 @@ static int ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port, + + skb_queue_tail(&ocelot_port->tx_skbs, clone); + +- spin_unlock_irqrestore(&ocelot->ts_id_lock, flags); ++ spin_unlock(&ocelot->ts_id_lock); + + return 0; + } +@@ -753,7 +752,6 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) + u32 val, id, seqid, txport; + struct ocelot_port *port; + struct timespec64 ts; +- unsigned long flags; + + val = ocelot_read(ocelot, SYS_PTP_STATUS); + +@@ -777,7 +775,7 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) + + /* Retrieve its associated skb */ + try_again: +- spin_lock_irqsave(&port->tx_skbs.lock, flags); ++ spin_lock(&port->tx_skbs.lock); + + skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) { + if (OCELOT_SKB_CB(skb)->ts_id != id) +@@ -787,7 +785,7 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) + break; + } + +- spin_unlock_irqrestore(&port->tx_skbs.lock, flags); ++ spin_unlock(&port->tx_skbs.lock); + + if (WARN_ON(!skb_match)) + goto next_ts; +@@ -796,7 +794,7 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) + dev_err_ratelimited(ocelot->dev, + "port %d received stale TX timestamp for seqid %d, discarding\n", + txport, seqid); +- dev_kfree_skb_any(skb); ++ kfree_skb(skb); + goto try_again; + } + +-- +2.39.5 + diff --git a/queue-6.1/net-mscc-ocelot-perform-error-cleanup-in-ocelot_hwst.patch b/queue-6.1/net-mscc-ocelot-perform-error-cleanup-in-ocelot_hwst.patch new file mode 100644 index 00000000000..e53a8388ce9 --- /dev/null +++ b/queue-6.1/net-mscc-ocelot-perform-error-cleanup-in-ocelot_hwst.patch @@ -0,0 +1,128 @@ +From 141d56c0e1a127b809e75911263f146b3b3620a9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 16:55:19 +0200 +Subject: net: mscc: ocelot: perform error cleanup in ocelot_hwstamp_set() + +From: Vladimir Oltean + +[ Upstream commit 43a4166349a254446e7a3db65f721c6a30daccf3 ] + +An unsupported RX filter will leave the port with TX timestamping still +applied as per the new request, rather than the old setting. When +parsing the tx_type, don't apply it just yet, but delay that until after +we've parsed the rx_filter as well (and potentially returned -ERANGE for +that). + +Similarly, copy_to_user() may fail, which is a rare occurrence, but +should still be treated by unwinding what was done. + +Fixes: 96ca08c05838 ("net: mscc: ocelot: set up traps for PTP packets") +Signed-off-by: Vladimir Oltean +Link: https://patch.msgid.link/20241205145519.1236778-6-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mscc/ocelot_ptp.c | 59 ++++++++++++++++++-------- + 1 file changed, 42 insertions(+), 17 deletions(-) + +diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c +index bc44aa635d49..34a2d8ea3b2d 100644 +--- a/drivers/net/ethernet/mscc/ocelot_ptp.c ++++ b/drivers/net/ethernet/mscc/ocelot_ptp.c +@@ -497,6 +497,28 @@ static int ocelot_traps_to_ptp_rx_filter(unsigned int proto) + return HWTSTAMP_FILTER_NONE; + } + ++static int ocelot_ptp_tx_type_to_cmd(int tx_type, int *ptp_cmd) ++{ ++ switch (tx_type) { ++ case HWTSTAMP_TX_ON: ++ *ptp_cmd = IFH_REW_OP_TWO_STEP_PTP; ++ break; ++ case HWTSTAMP_TX_ONESTEP_SYNC: ++ /* IFH_REW_OP_ONE_STEP_PTP updates the correctionField, ++ * what we need to update is the originTimestamp. ++ */ ++ *ptp_cmd = IFH_REW_OP_ORIGIN_PTP; ++ break; ++ case HWTSTAMP_TX_OFF: ++ *ptp_cmd = 0; ++ break; ++ default: ++ return -ERANGE; ++ } ++ ++ return 0; ++} ++ + int ocelot_hwstamp_get(struct ocelot *ocelot, int port, struct ifreq *ifr) + { + struct ocelot_port *ocelot_port = ocelot->ports[port]; +@@ -523,30 +545,19 @@ EXPORT_SYMBOL(ocelot_hwstamp_get); + int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr) + { + struct ocelot_port *ocelot_port = ocelot->ports[port]; ++ int ptp_cmd, old_ptp_cmd = ocelot_port->ptp_cmd; + bool l2 = false, l4 = false; + struct hwtstamp_config cfg; ++ bool old_l2, old_l4; + int err; + + if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) + return -EFAULT; + + /* Tx type sanity check */ +- switch (cfg.tx_type) { +- case HWTSTAMP_TX_ON: +- ocelot_port->ptp_cmd = IFH_REW_OP_TWO_STEP_PTP; +- break; +- case HWTSTAMP_TX_ONESTEP_SYNC: +- /* IFH_REW_OP_ONE_STEP_PTP updates the correctional field, we +- * need to update the origin time. +- */ +- ocelot_port->ptp_cmd = IFH_REW_OP_ORIGIN_PTP; +- break; +- case HWTSTAMP_TX_OFF: +- ocelot_port->ptp_cmd = 0; +- break; +- default: +- return -ERANGE; +- } ++ err = ocelot_ptp_tx_type_to_cmd(cfg.tx_type, &ptp_cmd); ++ if (err) ++ return err; + + switch (cfg.rx_filter) { + case HWTSTAMP_FILTER_NONE: +@@ -571,13 +582,27 @@ int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr) + return -ERANGE; + } + ++ old_l2 = ocelot_port->trap_proto & OCELOT_PROTO_PTP_L2; ++ old_l4 = ocelot_port->trap_proto & OCELOT_PROTO_PTP_L4; ++ + err = ocelot_setup_ptp_traps(ocelot, port, l2, l4); + if (err) + return err; + ++ ocelot_port->ptp_cmd = ptp_cmd; ++ + cfg.rx_filter = ocelot_traps_to_ptp_rx_filter(ocelot_port->trap_proto); + +- return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; ++ if (copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg))) { ++ err = -EFAULT; ++ goto out_restore_ptp_traps; ++ } ++ ++ return 0; ++out_restore_ptp_traps: ++ ocelot_setup_ptp_traps(ocelot, port, old_l2, old_l4); ++ ocelot_port->ptp_cmd = old_ptp_cmd; ++ return err; + } + EXPORT_SYMBOL(ocelot_hwstamp_set); + +-- +2.39.5 + diff --git a/queue-6.1/net-sched-netem-account-for-backlog-updates-from-chi.patch b/queue-6.1/net-sched-netem-account-for-backlog-updates-from-chi.patch new file mode 100644 index 00000000000..be830f9ba91 --- /dev/null +++ b/queue-6.1/net-sched-netem-account-for-backlog-updates-from-chi.patch @@ -0,0 +1,171 @@ +From 2ee28071e7f671cbf859beee46effae3db0a36c4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 Dec 2024 14:14:11 +0100 +Subject: net/sched: netem: account for backlog updates from child qdisc + +From: Martin Ottens + +[ Upstream commit f8d4bc455047cf3903cd6f85f49978987dbb3027 ] + +In general, 'qlen' of any classful qdisc should keep track of the +number of packets that the qdisc itself and all of its children holds. +In case of netem, 'qlen' only accounts for the packets in its internal +tfifo. When netem is used with a child qdisc, the child qdisc can use +'qdisc_tree_reduce_backlog' to inform its parent, netem, about created +or dropped SKBs. This function updates 'qlen' and the backlog statistics +of netem, but netem does not account for changes made by a child qdisc. +'qlen' then indicates the wrong number of packets in the tfifo. +If a child qdisc creates new SKBs during enqueue and informs its parent +about this, netem's 'qlen' value is increased. When netem dequeues the +newly created SKBs from the child, the 'qlen' in netem is not updated. +If 'qlen' reaches the configured sch->limit, the enqueue function stops +working, even though the tfifo is not full. + +Reproduce the bug: +Ensure that the sender machine has GSO enabled. Configure netem as root +qdisc and tbf as its child on the outgoing interface of the machine +as follows: +$ tc qdisc add dev root handle 1: netem delay 100ms limit 100 +$ tc qdisc add dev parent 1:0 tbf rate 50Mbit burst 1542 latency 50ms + +Send bulk TCP traffic out via this interface, e.g., by running an iPerf3 +client on the machine. Check the qdisc statistics: +$ tc -s qdisc show dev + +Statistics after 10s of iPerf3 TCP test before the fix (note that +netem's backlog > limit, netem stopped accepting packets): +qdisc netem 1: root refcnt 2 limit 1000 delay 100ms + Sent 2767766 bytes 1848 pkt (dropped 652, overlimits 0 requeues 0) + backlog 4294528236b 1155p requeues 0 +qdisc tbf 10: parent 1:1 rate 50Mbit burst 1537b lat 50ms + Sent 2767766 bytes 1848 pkt (dropped 327, overlimits 7601 requeues 0) + backlog 0b 0p requeues 0 + +Statistics after the fix: +qdisc netem 1: root refcnt 2 limit 1000 delay 100ms + Sent 37766372 bytes 24974 pkt (dropped 9, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 +qdisc tbf 10: parent 1:1 rate 50Mbit burst 1537b lat 50ms + Sent 37766372 bytes 24974 pkt (dropped 327, overlimits 96017 requeues 0) + backlog 0b 0p requeues 0 + +tbf segments the GSO SKBs (tbf_segment) and updates the netem's 'qlen'. +The interface fully stops transferring packets and "locks". In this case, +the child qdisc and tfifo are empty, but 'qlen' indicates the tfifo is at +its limit and no more packets are accepted. + +This patch adds a counter for the entries in the tfifo. Netem's 'qlen' is +only decreased when a packet is returned by its dequeue function, and not +during enqueuing into the child qdisc. External updates to 'qlen' are thus +accounted for and only the behavior of the backlog statistics changes. As +in other qdiscs, 'qlen' then keeps track of how many packets are held in +netem and all of its children. As before, sch->limit remains as the +maximum number of packets in the tfifo. The same applies to netem's +backlog statistics. + +Fixes: 50612537e9ab ("netem: fix classful handling") +Signed-off-by: Martin Ottens +Acked-by: Jamal Hadi Salim +Link: https://patch.msgid.link/20241210131412.1837202-1-martin.ottens@fau.de +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/sch_netem.c | 22 ++++++++++++++++------ + 1 file changed, 16 insertions(+), 6 deletions(-) + +diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c +index 0eba06613dcd..f47ab622399f 100644 +--- a/net/sched/sch_netem.c ++++ b/net/sched/sch_netem.c +@@ -77,6 +77,8 @@ struct netem_sched_data { + struct sk_buff *t_head; + struct sk_buff *t_tail; + ++ u32 t_len; ++ + /* optional qdisc for classful handling (NULL at netem init) */ + struct Qdisc *qdisc; + +@@ -373,6 +375,7 @@ static void tfifo_reset(struct Qdisc *sch) + rtnl_kfree_skbs(q->t_head, q->t_tail); + q->t_head = NULL; + q->t_tail = NULL; ++ q->t_len = 0; + } + + static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) +@@ -402,6 +405,7 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) + rb_link_node(&nskb->rbnode, parent, p); + rb_insert_color(&nskb->rbnode, &q->t_root); + } ++ q->t_len++; + sch->q.qlen++; + } + +@@ -508,7 +512,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, + 1<q.qlen >= sch->limit)) { ++ if (unlikely(q->t_len >= sch->limit)) { + /* re-link segs, so that qdisc_drop_all() frees them all */ + skb->next = segs; + qdisc_drop_all(skb, sch, to_free); +@@ -692,8 +696,8 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) + tfifo_dequeue: + skb = __qdisc_dequeue_head(&sch->q); + if (skb) { +- qdisc_qstats_backlog_dec(sch, skb); + deliver: ++ qdisc_qstats_backlog_dec(sch, skb); + qdisc_bstats_update(sch, skb); + return skb; + } +@@ -709,8 +713,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) + + if (time_to_send <= now && q->slot.slot_next <= now) { + netem_erase_head(q, skb); +- sch->q.qlen--; +- qdisc_qstats_backlog_dec(sch, skb); ++ q->t_len--; + skb->next = NULL; + skb->prev = NULL; + /* skb->dev shares skb->rbnode area, +@@ -737,16 +740,21 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) + if (net_xmit_drop_count(err)) + qdisc_qstats_drop(sch); + qdisc_tree_reduce_backlog(sch, 1, pkt_len); ++ sch->qstats.backlog -= pkt_len; ++ sch->q.qlen--; + } + goto tfifo_dequeue; + } ++ sch->q.qlen--; + goto deliver; + } + + if (q->qdisc) { + skb = q->qdisc->ops->dequeue(q->qdisc); +- if (skb) ++ if (skb) { ++ sch->q.qlen--; + goto deliver; ++ } + } + + qdisc_watchdog_schedule_ns(&q->watchdog, +@@ -756,8 +764,10 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) + + if (q->qdisc) { + skb = q->qdisc->ops->dequeue(q->qdisc); +- if (skb) ++ if (skb) { ++ sch->q.qlen--; + goto deliver; ++ } + } + return NULL; + } +-- +2.39.5 + diff --git a/queue-6.1/net-sparx5-fix-fdma-performance-issue.patch b/queue-6.1/net-sparx5-fix-fdma-performance-issue.patch new file mode 100644 index 00000000000..2cca8b2aaf3 --- /dev/null +++ b/queue-6.1/net-sparx5-fix-fdma-performance-issue.patch @@ -0,0 +1,63 @@ +From 56489734c7ce6a835b637e743d12abc183f3a39b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 14:54:26 +0100 +Subject: net: sparx5: fix FDMA performance issue + +From: Daniel Machon + +[ Upstream commit f004f2e535e2b66ccbf5ac35f8eaadeac70ad7b7 ] + +The FDMA handler is responsible for scheduling a NAPI poll, which will +eventually fetch RX packets from the FDMA queue. Currently, the FDMA +handler is run in a threaded context. For some reason, this kills +performance. Admittedly, I did not do a thorough investigation to see +exactly what causes the issue, however, I noticed that in the other +driver utilizing the same FDMA engine, we run the FDMA handler in hard +IRQ context. + +Fix this performance issue, by running the FDMA handler in hard IRQ +context, not deferring any work to a thread. + +Prior to this change, the RX UDP performance was: + +Interval Transfer Bitrate Jitter +0.00-10.20 sec 44.6 MBytes 36.7 Mbits/sec 0.027 ms + +After this change, the rx UDP performance is: + +Interval Transfer Bitrate Jitter +0.00-9.12 sec 1.01 GBytes 953 Mbits/sec 0.020 ms + +Fixes: 10615907e9b5 ("net: sparx5: switchdev: adding frame DMA functionality") +Signed-off-by: Daniel Machon +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +index 7031f41287e0..1ed69e77b895 100644 +--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c ++++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +@@ -680,12 +680,11 @@ static int sparx5_start(struct sparx5 *sparx5) + err = -ENXIO; + if (sparx5->fdma_irq >= 0) { + if (GCB_CHIP_ID_REV_ID_GET(sparx5->chip_id) > 0) +- err = devm_request_threaded_irq(sparx5->dev, +- sparx5->fdma_irq, +- NULL, +- sparx5_fdma_handler, +- IRQF_ONESHOT, +- "sparx5-fdma", sparx5); ++ err = devm_request_irq(sparx5->dev, ++ sparx5->fdma_irq, ++ sparx5_fdma_handler, ++ 0, ++ "sparx5-fdma", sparx5); + if (!err) + err = sparx5_fdma_start(sparx5); + if (err) +-- +2.39.5 + diff --git a/queue-6.1/net-sparx5-fix-the-maximum-frame-length-register.patch b/queue-6.1/net-sparx5-fix-the-maximum-frame-length-register.patch new file mode 100644 index 00000000000..e432f5def7e --- /dev/null +++ b/queue-6.1/net-sparx5-fix-the-maximum-frame-length-register.patch @@ -0,0 +1,39 @@ +From 453e70f01cd467567d3f3f7a6ca2a39335f1fff2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 14:54:28 +0100 +Subject: net: sparx5: fix the maximum frame length register + +From: Daniel Machon + +[ Upstream commit ddd7ba006078a2bef5971b2dc5f8383d47f96207 ] + +On port initialization, we configure the maximum frame length accepted +by the receive module associated with the port. This value is currently +written to the MAX_LEN field of the DEV10G_MAC_ENA_CFG register, when in +fact, it should be written to the DEV10G_MAC_MAXLEN_CFG register. Fix +this. + +Fixes: 946e7fd5053a ("net: sparx5: add port module support") +Signed-off-by: Daniel Machon +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/microchip/sparx5/sparx5_port.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c +index 212bf6f4ed72..e1df6bc86949 100644 +--- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c ++++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c +@@ -1113,7 +1113,7 @@ int sparx5_port_init(struct sparx5 *sparx5, + spx5_inst_rmw(DEV10G_MAC_MAXLEN_CFG_MAX_LEN_SET(ETH_MAXLEN), + DEV10G_MAC_MAXLEN_CFG_MAX_LEN, + devinst, +- DEV10G_MAC_ENA_CFG(0)); ++ DEV10G_MAC_MAXLEN_CFG(0)); + + /* Handle Signal Detect in 10G PCS */ + spx5_inst_wr(PCS10G_BR_PCS_SD_CFG_SD_POL_SET(sd_pol) | +-- +2.39.5 + diff --git a/queue-6.1/ptp-kvm-use-decrypted-memory-in-confidential-guest-o.patch b/queue-6.1/ptp-kvm-use-decrypted-memory-in-confidential-guest-o.patch new file mode 100644 index 00000000000..35a64cae30c --- /dev/null +++ b/queue-6.1/ptp-kvm-use-decrypted-memory-in-confidential-guest-o.patch @@ -0,0 +1,173 @@ +From 964938b57973aa98873a83f16c3bf921e8c01cdd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 8 Mar 2023 15:05:31 +0000 +Subject: ptp: kvm: Use decrypted memory in confidential guest on x86 + +From: Jeremi Piotrowski + +[ Upstream commit 6365ba64b4dbe8b59ddaeaa724b281f3787715d5 ] + +KVM_HC_CLOCK_PAIRING currently fails inside SEV-SNP guests because the +guest passes an address to static data to the host. In confidential +computing the host can't access arbitrary guest memory so handling the +hypercall runs into an "rmpfault". To make the hypercall work, the guest +needs to explicitly mark the memory as decrypted. Do that in +kvm_arch_ptp_init(), but retain the previous behavior for +non-confidential guests to save us from having to allocate memory. + +Add a new arch-specific function (kvm_arch_ptp_exit()) to free the +allocation and mark the memory as encrypted again. + +Signed-off-by: Jeremi Piotrowski +Link: https://lore.kernel.org/r/20230308150531.477741-1-jpiotrowski@linux.microsoft.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 5e7aa97c7acf ("ptp: kvm: x86: Return EOPNOTSUPP instead of ENODEV from kvm_arch_ptp_init()") +Signed-off-by: Sasha Levin +--- + drivers/ptp/ptp_kvm_arm.c | 4 +++ + drivers/ptp/ptp_kvm_common.c | 1 + + drivers/ptp/ptp_kvm_x86.c | 59 +++++++++++++++++++++++++++++------- + include/linux/ptp_kvm.h | 1 + + 4 files changed, 54 insertions(+), 11 deletions(-) + +diff --git a/drivers/ptp/ptp_kvm_arm.c b/drivers/ptp/ptp_kvm_arm.c +index b7d28c8dfb84..e68e6943167b 100644 +--- a/drivers/ptp/ptp_kvm_arm.c ++++ b/drivers/ptp/ptp_kvm_arm.c +@@ -22,6 +22,10 @@ int kvm_arch_ptp_init(void) + return 0; + } + ++void kvm_arch_ptp_exit(void) ++{ ++} ++ + int kvm_arch_ptp_get_clock(struct timespec64 *ts) + { + return kvm_arch_ptp_get_crosststamp(NULL, ts, NULL); +diff --git a/drivers/ptp/ptp_kvm_common.c b/drivers/ptp/ptp_kvm_common.c +index fcae32f56f25..051114a59286 100644 +--- a/drivers/ptp/ptp_kvm_common.c ++++ b/drivers/ptp/ptp_kvm_common.c +@@ -130,6 +130,7 @@ static struct kvm_ptp_clock kvm_ptp_clock; + static void __exit ptp_kvm_exit(void) + { + ptp_clock_unregister(kvm_ptp_clock.ptp_clock); ++ kvm_arch_ptp_exit(); + } + + static int __init ptp_kvm_init(void) +diff --git a/drivers/ptp/ptp_kvm_x86.c b/drivers/ptp/ptp_kvm_x86.c +index 4991054a2135..902844cc1a17 100644 +--- a/drivers/ptp/ptp_kvm_x86.c ++++ b/drivers/ptp/ptp_kvm_x86.c +@@ -14,27 +14,64 @@ + #include + #include + #include ++#include + + static phys_addr_t clock_pair_gpa; +-static struct kvm_clock_pairing clock_pair; ++static struct kvm_clock_pairing clock_pair_glbl; ++static struct kvm_clock_pairing *clock_pair; + + int kvm_arch_ptp_init(void) + { ++ struct page *p; + long ret; + + if (!kvm_para_available()) + return -ENODEV; + +- clock_pair_gpa = slow_virt_to_phys(&clock_pair); +- if (!pvclock_get_pvti_cpu0_va()) +- return -ENODEV; ++ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { ++ p = alloc_page(GFP_KERNEL | __GFP_ZERO); ++ if (!p) ++ return -ENOMEM; ++ ++ clock_pair = page_address(p); ++ ret = set_memory_decrypted((unsigned long)clock_pair, 1); ++ if (ret) { ++ __free_page(p); ++ clock_pair = NULL; ++ goto nofree; ++ } ++ } else { ++ clock_pair = &clock_pair_glbl; ++ } ++ ++ clock_pair_gpa = slow_virt_to_phys(clock_pair); ++ if (!pvclock_get_pvti_cpu0_va()) { ++ ret = -ENODEV; ++ goto err; ++ } + + ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING, clock_pair_gpa, + KVM_CLOCK_PAIRING_WALLCLOCK); +- if (ret == -KVM_ENOSYS) +- return -ENODEV; ++ if (ret == -KVM_ENOSYS) { ++ ret = -ENODEV; ++ goto err; ++ } + + return ret; ++ ++err: ++ kvm_arch_ptp_exit(); ++nofree: ++ return ret; ++} ++ ++void kvm_arch_ptp_exit(void) ++{ ++ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { ++ WARN_ON(set_memory_encrypted((unsigned long)clock_pair, 1)); ++ free_page((unsigned long)clock_pair); ++ clock_pair = NULL; ++ } + } + + int kvm_arch_ptp_get_clock(struct timespec64 *ts) +@@ -49,8 +86,8 @@ int kvm_arch_ptp_get_clock(struct timespec64 *ts) + return -EOPNOTSUPP; + } + +- ts->tv_sec = clock_pair.sec; +- ts->tv_nsec = clock_pair.nsec; ++ ts->tv_sec = clock_pair->sec; ++ ts->tv_nsec = clock_pair->nsec; + + return 0; + } +@@ -81,9 +118,9 @@ int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *tspec, + pr_err_ratelimited("clock pairing hypercall ret %lu\n", ret); + return -EOPNOTSUPP; + } +- tspec->tv_sec = clock_pair.sec; +- tspec->tv_nsec = clock_pair.nsec; +- *cycle = __pvclock_read_cycles(src, clock_pair.tsc); ++ tspec->tv_sec = clock_pair->sec; ++ tspec->tv_nsec = clock_pair->nsec; ++ *cycle = __pvclock_read_cycles(src, clock_pair->tsc); + } while (pvclock_read_retry(src, version)); + + *cs = &kvm_clock; +diff --git a/include/linux/ptp_kvm.h b/include/linux/ptp_kvm.h +index c2e28deef33a..746fd67c3480 100644 +--- a/include/linux/ptp_kvm.h ++++ b/include/linux/ptp_kvm.h +@@ -14,6 +14,7 @@ struct timespec64; + struct clocksource; + + int kvm_arch_ptp_init(void); ++void kvm_arch_ptp_exit(void); + int kvm_arch_ptp_get_clock(struct timespec64 *ts); + int kvm_arch_ptp_get_crosststamp(u64 *cycle, + struct timespec64 *tspec, struct clocksource **cs); +-- +2.39.5 + diff --git a/queue-6.1/ptp-kvm-x86-return-eopnotsupp-instead-of-enodev-from.patch b/queue-6.1/ptp-kvm-x86-return-eopnotsupp-instead-of-enodev-from.patch new file mode 100644 index 00000000000..989d70fefb3 --- /dev/null +++ b/queue-6.1/ptp-kvm-x86-return-eopnotsupp-instead-of-enodev-from.patch @@ -0,0 +1,63 @@ +From a64243b9ae34dca615cfb81d0ef7542c9d1efe58 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Dec 2024 18:09:55 +0100 +Subject: ptp: kvm: x86: Return EOPNOTSUPP instead of ENODEV from + kvm_arch_ptp_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Thomas Weißschuh + +[ Upstream commit 5e7aa97c7acf171275ac02a8bb018c31b8918d13 ] + +The caller, ptp_kvm_init(), emits a warning if kvm_arch_ptp_init() exits +with any error which is not EOPNOTSUPP: + + "fail to initialize ptp_kvm" + +Replace ENODEV with EOPNOTSUPP to avoid this spurious warning, +aligning with the ARM implementation. + +Fixes: a86ed2cfa13c ("ptp: Don't print an error if ptp_kvm is not supported") +Signed-off-by: Thomas Weißschuh +Link: https://patch.msgid.link/20241203-kvm_ptp-eopnotsuppp-v2-1-d1d060f27aa6@weissschuh.net +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/ptp/ptp_kvm_x86.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/ptp/ptp_kvm_x86.c b/drivers/ptp/ptp_kvm_x86.c +index 902844cc1a17..5e5b2ef78547 100644 +--- a/drivers/ptp/ptp_kvm_x86.c ++++ b/drivers/ptp/ptp_kvm_x86.c +@@ -26,7 +26,7 @@ int kvm_arch_ptp_init(void) + long ret; + + if (!kvm_para_available()) +- return -ENODEV; ++ return -EOPNOTSUPP; + + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { + p = alloc_page(GFP_KERNEL | __GFP_ZERO); +@@ -46,14 +46,14 @@ int kvm_arch_ptp_init(void) + + clock_pair_gpa = slow_virt_to_phys(clock_pair); + if (!pvclock_get_pvti_cpu0_va()) { +- ret = -ENODEV; ++ ret = -EOPNOTSUPP; + goto err; + } + + ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING, clock_pair_gpa, + KVM_CLOCK_PAIRING_WALLCLOCK); + if (ret == -KVM_ENOSYS) { +- ret = -ENODEV; ++ ret = -EOPNOTSUPP; + goto err; + } + +-- +2.39.5 + diff --git a/queue-6.1/qca_spi-fix-clock-speed-for-multiple-qca7000.patch b/queue-6.1/qca_spi-fix-clock-speed-for-multiple-qca7000.patch new file mode 100644 index 00000000000..5de0a57652b --- /dev/null +++ b/queue-6.1/qca_spi-fix-clock-speed-for-multiple-qca7000.patch @@ -0,0 +1,98 @@ +From 155edcb83f5bc65dc240fe044422de4e8ab15236 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 Dec 2024 19:46:42 +0100 +Subject: qca_spi: Fix clock speed for multiple QCA7000 + +From: Stefan Wahren + +[ Upstream commit 4dba406fac06b009873fe7a28231b9b7e4288b09 ] + +Storing the maximum clock speed in module parameter qcaspi_clkspeed +has the unintended side effect that the first probed instance +defines the value for all other instances. Fix this issue by storing +it in max_speed_hz of the relevant SPI device. + +This fix keeps the priority of the speed parameter (module parameter, +device tree property, driver default). Btw this uses the opportunity +to get the rid of the unused member clkspeed. + +Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000") +Signed-off-by: Stefan Wahren +Link: https://patch.msgid.link/20241206184643.123399-2-wahrenst@gmx.net +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/qualcomm/qca_spi.c | 24 ++++++++++-------------- + drivers/net/ethernet/qualcomm/qca_spi.h | 1 - + 2 files changed, 10 insertions(+), 15 deletions(-) + +diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c +index 926a087ae1c6..95e0a5237359 100644 +--- a/drivers/net/ethernet/qualcomm/qca_spi.c ++++ b/drivers/net/ethernet/qualcomm/qca_spi.c +@@ -829,7 +829,6 @@ qcaspi_netdev_init(struct net_device *dev) + + dev->mtu = QCAFRM_MAX_MTU; + dev->type = ARPHRD_ETHER; +- qca->clkspeed = qcaspi_clkspeed; + qca->burst_len = qcaspi_burst_len; + qca->spi_thread = NULL; + qca->buffer_size = (dev->mtu + VLAN_ETH_HLEN + QCAFRM_HEADER_LEN + +@@ -918,17 +917,15 @@ qca_spi_probe(struct spi_device *spi) + legacy_mode = of_property_read_bool(spi->dev.of_node, + "qca,legacy-mode"); + +- if (qcaspi_clkspeed == 0) { +- if (spi->max_speed_hz) +- qcaspi_clkspeed = spi->max_speed_hz; +- else +- qcaspi_clkspeed = QCASPI_CLK_SPEED; +- } ++ if (qcaspi_clkspeed) ++ spi->max_speed_hz = qcaspi_clkspeed; ++ else if (!spi->max_speed_hz) ++ spi->max_speed_hz = QCASPI_CLK_SPEED; + +- if ((qcaspi_clkspeed < QCASPI_CLK_SPEED_MIN) || +- (qcaspi_clkspeed > QCASPI_CLK_SPEED_MAX)) { +- dev_err(&spi->dev, "Invalid clkspeed: %d\n", +- qcaspi_clkspeed); ++ if (spi->max_speed_hz < QCASPI_CLK_SPEED_MIN || ++ spi->max_speed_hz > QCASPI_CLK_SPEED_MAX) { ++ dev_err(&spi->dev, "Invalid clkspeed: %u\n", ++ spi->max_speed_hz); + return -EINVAL; + } + +@@ -953,14 +950,13 @@ qca_spi_probe(struct spi_device *spi) + return -EINVAL; + } + +- dev_info(&spi->dev, "ver=%s, clkspeed=%d, burst_len=%d, pluggable=%d\n", ++ dev_info(&spi->dev, "ver=%s, clkspeed=%u, burst_len=%d, pluggable=%d\n", + QCASPI_DRV_VERSION, +- qcaspi_clkspeed, ++ spi->max_speed_hz, + qcaspi_burst_len, + qcaspi_pluggable); + + spi->mode = SPI_MODE_3; +- spi->max_speed_hz = qcaspi_clkspeed; + if (spi_setup(spi) < 0) { + dev_err(&spi->dev, "Unable to setup SPI device\n"); + return -EFAULT; +diff --git a/drivers/net/ethernet/qualcomm/qca_spi.h b/drivers/net/ethernet/qualcomm/qca_spi.h +index 58ad910068d4..b3b17bd46e12 100644 +--- a/drivers/net/ethernet/qualcomm/qca_spi.h ++++ b/drivers/net/ethernet/qualcomm/qca_spi.h +@@ -101,7 +101,6 @@ struct qcaspi { + #endif + + /* user configurable options */ +- u32 clkspeed; + u8 legacy_mode; + u16 burst_len; + }; +-- +2.39.5 + diff --git a/queue-6.1/qca_spi-make-driver-probing-reliable.patch b/queue-6.1/qca_spi-make-driver-probing-reliable.patch new file mode 100644 index 00000000000..84c662e8b13 --- /dev/null +++ b/queue-6.1/qca_spi-make-driver-probing-reliable.patch @@ -0,0 +1,40 @@ +From e8f6daefc2a651dac6d548ace095b5651d254cdd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 Dec 2024 19:46:43 +0100 +Subject: qca_spi: Make driver probing reliable + +From: Stefan Wahren + +[ Upstream commit becc6399ce3b724cffe9ccb7ef0bff440bb1b62b ] + +The module parameter qcaspi_pluggable controls if QCA7000 signature +should be checked at driver probe (current default) or not. Unfortunately +this could fail in case the chip is temporary in reset, which isn't under +total control by the Linux host. So disable this check per default +in order to avoid unexpected probe failures. + +Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000") +Signed-off-by: Stefan Wahren +Link: https://patch.msgid.link/20241206184643.123399-3-wahrenst@gmx.net +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/qualcomm/qca_spi.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c +index 95e0a5237359..b7af824116ea 100644 +--- a/drivers/net/ethernet/qualcomm/qca_spi.c ++++ b/drivers/net/ethernet/qualcomm/qca_spi.c +@@ -67,7 +67,7 @@ MODULE_PARM_DESC(qcaspi_burst_len, "Number of data bytes per burst. Use 1-5000." + + #define QCASPI_PLUGGABLE_MIN 0 + #define QCASPI_PLUGGABLE_MAX 1 +-static int qcaspi_pluggable = QCASPI_PLUGGABLE_MIN; ++static int qcaspi_pluggable = QCASPI_PLUGGABLE_MAX; + module_param(qcaspi_pluggable, int, 0); + MODULE_PARM_DESC(qcaspi_pluggable, "Pluggable SPI connection (yes/no)."); + +-- +2.39.5 + diff --git a/queue-6.1/selftests-mlxsw-sharedbuffer-ensure-no-extra-packets.patch b/queue-6.1/selftests-mlxsw-sharedbuffer-ensure-no-extra-packets.patch new file mode 100644 index 00000000000..51af90c2701 --- /dev/null +++ b/queue-6.1/selftests-mlxsw-sharedbuffer-ensure-no-extra-packets.patch @@ -0,0 +1,140 @@ +From ceec01152ae0eecc9f2d62dbd042d1426e592f33 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 17:36:01 +0100 +Subject: selftests: mlxsw: sharedbuffer: Ensure no extra packets are counted + +From: Danielle Ratson + +[ Upstream commit 5f2c7ab15fd806043db1a7d54b5ec36be0bd93b1 ] + +The test assumes that the packet it is sending is the only packet being +passed to the device. + +However, it is not the case and so other packets are filling the buffers +as well. Therefore, the test sometimes fails because it is reading a +maximum occupancy that is larger than expected. + +Add egress filters on $h1 and $h2 that will guarantee the above. + +Fixes: a865ad999603 ("selftests: mlxsw: Add shared buffer traffic test") +Signed-off-by: Danielle Ratson +Reviewed-by: Ido Schimmel +Signed-off-by: Ido Schimmel +Signed-off-by: Petr Machata +Link: https://patch.msgid.link/64c28bc9b1cc1d78c4a73feda7cedbe9526ccf8b.1733414773.git.petrm@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + .../drivers/net/mlxsw/sharedbuffer.sh | 40 +++++++++++++++++++ + 1 file changed, 40 insertions(+) + +diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh +index 21bebc5726f6..c068e6c2a580 100755 +--- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh ++++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh +@@ -22,20 +22,34 @@ SB_ITC=0 + h1_create() + { + simple_if_init $h1 192.0.1.1/24 ++ tc qdisc add dev $h1 clsact ++ ++ # Add egress filter on $h1 that will guarantee that the packet sent, ++ # will be the only packet being passed to the device. ++ tc filter add dev $h1 egress pref 2 handle 102 matchall action drop + } + + h1_destroy() + { ++ tc filter del dev $h1 egress pref 2 handle 102 matchall action drop ++ tc qdisc del dev $h1 clsact + simple_if_fini $h1 192.0.1.1/24 + } + + h2_create() + { + simple_if_init $h2 192.0.1.2/24 ++ tc qdisc add dev $h2 clsact ++ ++ # Add egress filter on $h2 that will guarantee that the packet sent, ++ # will be the only packet being passed to the device. ++ tc filter add dev $h2 egress pref 1 handle 101 matchall action drop + } + + h2_destroy() + { ++ tc filter del dev $h2 egress pref 1 handle 101 matchall action drop ++ tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.1.2/24 + } + +@@ -101,6 +115,11 @@ port_pool_test() + local exp_max_occ=$(devlink_cell_size_get) + local max_occ + ++ tc filter add dev $h1 egress protocol ip pref 1 handle 101 flower \ ++ src_mac $h1mac dst_mac $h2mac \ ++ src_ip 192.0.1.1 dst_ip 192.0.1.2 \ ++ action pass ++ + devlink sb occupancy clearmax $DEVLINK_DEV + + $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ +@@ -117,6 +136,11 @@ port_pool_test() + max_occ=$(sb_occ_pool_check $cpu_dl_port $SB_POOL_EGR_CPU $exp_max_occ) + check_err $? "Expected ePool($SB_POOL_EGR_CPU) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "CPU port's egress pool" ++ ++ tc filter del dev $h1 egress protocol ip pref 1 handle 101 flower \ ++ src_mac $h1mac dst_mac $h2mac \ ++ src_ip 192.0.1.1 dst_ip 192.0.1.2 \ ++ action pass + } + + port_tc_ip_test() +@@ -124,6 +148,11 @@ port_tc_ip_test() + local exp_max_occ=$(devlink_cell_size_get) + local max_occ + ++ tc filter add dev $h1 egress protocol ip pref 1 handle 101 flower \ ++ src_mac $h1mac dst_mac $h2mac \ ++ src_ip 192.0.1.1 dst_ip 192.0.1.2 \ ++ action pass ++ + devlink sb occupancy clearmax $DEVLINK_DEV + + $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ +@@ -140,6 +169,11 @@ port_tc_ip_test() + max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_IP $exp_max_occ) + check_err $? "Expected egress TC($SB_ITC_CPU_IP) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "CPU port's egress TC - IP packet" ++ ++ tc filter del dev $h1 egress protocol ip pref 1 handle 101 flower \ ++ src_mac $h1mac dst_mac $h2mac \ ++ src_ip 192.0.1.1 dst_ip 192.0.1.2 \ ++ action pass + } + + port_tc_arp_test() +@@ -147,6 +181,9 @@ port_tc_arp_test() + local exp_max_occ=$(devlink_cell_size_get) + local max_occ + ++ tc filter add dev $h1 egress protocol arp pref 1 handle 101 flower \ ++ src_mac $h1mac action pass ++ + devlink sb occupancy clearmax $DEVLINK_DEV + + $MZ $h1 -c 1 -p 10 -a $h1mac -A 192.0.1.1 -t arp -q +@@ -162,6 +199,9 @@ port_tc_arp_test() + max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_ARP $exp_max_occ) + check_err $? "Expected egress TC($SB_ITC_IP2ME) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "CPU port's egress TC - ARP packet" ++ ++ tc filter del dev $h1 egress protocol arp pref 1 handle 101 flower \ ++ src_mac $h1mac action pass + } + + setup_prepare() +-- +2.39.5 + diff --git a/queue-6.1/selftests-mlxsw-sharedbuffer-remove-duplicate-test-c.patch b/queue-6.1/selftests-mlxsw-sharedbuffer-remove-duplicate-test-c.patch new file mode 100644 index 00000000000..9b3bd75697d --- /dev/null +++ b/queue-6.1/selftests-mlxsw-sharedbuffer-remove-duplicate-test-c.patch @@ -0,0 +1,58 @@ +From c28a4562fd0a01c3dbe595215a5673598c26da56 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 17:36:00 +0100 +Subject: selftests: mlxsw: sharedbuffer: Remove duplicate test cases + +From: Danielle Ratson + +[ Upstream commit 6c46ad4d1bb2e8ec2265296e53765190f6e32f33 ] + +On both port_tc_ip_test() and port_tc_arp_test(), the max occupancy is +checked on $h2 twice, when only the error message is different and does not +match the check itself. + +Remove the two duplicated test cases from the test. + +Fixes: a865ad999603 ("selftests: mlxsw: Add shared buffer traffic test") +Signed-off-by: Danielle Ratson +Reviewed-by: Ido Schimmel +Signed-off-by: Ido Schimmel +Signed-off-by: Petr Machata +Link: https://patch.msgid.link/d9eb26f6fc16a06a30b5c2c16ad80caf502bc561.1733414773.git.petrm@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + .../selftests/drivers/net/mlxsw/sharedbuffer.sh | 10 ---------- + 1 file changed, 10 deletions(-) + +diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh +index a7b3d6cf3185..21bebc5726f6 100755 +--- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh ++++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh +@@ -131,11 +131,6 @@ port_tc_ip_test() + + devlink sb occupancy snapshot $DEVLINK_DEV + +- RET=0 +- max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) +- check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" +- log_test "physical port's($h1) ingress TC - IP packet" +- + RET=0 + max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) + check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" +@@ -158,11 +153,6 @@ port_tc_arp_test() + + devlink sb occupancy snapshot $DEVLINK_DEV + +- RET=0 +- max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) +- check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" +- log_test "physical port's($h1) ingress TC - ARP packet" +- + RET=0 + max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) + check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" +-- +2.39.5 + diff --git a/queue-6.1/selftests-mlxsw-sharedbuffer-remove-h1-ingress-test-.patch b/queue-6.1/selftests-mlxsw-sharedbuffer-remove-h1-ingress-test-.patch new file mode 100644 index 00000000000..f5f66134a44 --- /dev/null +++ b/queue-6.1/selftests-mlxsw-sharedbuffer-remove-h1-ingress-test-.patch @@ -0,0 +1,48 @@ +From f81e58b7e2babba109a29067a675ec4609f15244 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 17:35:59 +0100 +Subject: selftests: mlxsw: sharedbuffer: Remove h1 ingress test case + +From: Danielle Ratson + +[ Upstream commit cf3515c556907b4da290967a2a6cbbd9ee0ee723 ] + +The test is sending only one packet generated with mausezahn from $h1 to +$h2. However, for some reason, it is testing for non-zero maximum occupancy +in both the ingress pool of $h1 and $h2. The former only passes when $h2 +happens to send a packet. + +Avoid intermittent failures by removing unintentional test case +regarding the ingress pool of $h1. + +Fixes: a865ad999603 ("selftests: mlxsw: Add shared buffer traffic test") +Signed-off-by: Danielle Ratson +Reviewed-by: Ido Schimmel +Signed-off-by: Ido Schimmel +Signed-off-by: Petr Machata +Link: https://patch.msgid.link/5b7344608d5e06f38209e48d8af8c92fa11b6742.1733414773.git.petrm@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh +index 0c47faff9274..a7b3d6cf3185 100755 +--- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh ++++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh +@@ -108,11 +108,6 @@ port_pool_test() + + devlink sb occupancy snapshot $DEVLINK_DEV + +- RET=0 +- max_occ=$(sb_occ_pool_check $dl_port1 $SB_POOL_ING $exp_max_occ) +- check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ" +- log_test "physical port's($h1) ingress pool" +- + RET=0 + max_occ=$(sb_occ_pool_check $dl_port2 $SB_POOL_ING $exp_max_occ) + check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ" +-- +2.39.5 + diff --git a/queue-6.1/series b/queue-6.1/series index b917bb80d5d..1a65513e3b9 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -25,3 +25,43 @@ bpf-sockmap-fix-update-element-with-same.patch smb-client-fix-uaf-in-smb2_reconnect_server.patch exfat-support-dynamic-allocate-bh-for-exfat_entry_set_cache.patch exfat-fix-potential-deadlock-on-__exfat_get_dentry_set.patch +wifi-nl80211-fix-nl80211_attr_mlo_link_id-off-by-one.patch +wifi-mac80211-clean-up-ret-in-sta_link_apply_paramet.patch +wifi-mac80211-fix-station-nss-capability-initializat.patch +acpi-nfit-vmalloc-out-of-bounds-read-in-acpi_nfit_ct.patch +amdgpu-uvd-get-ring-reference-from-rq-scheduler.patch +batman-adv-do-not-send-uninitialized-tt-changes.patch +batman-adv-remove-uninitialized-data-in-full-table-t.patch +batman-adv-do-not-let-tt-changes-list-grows-indefini.patch +tipc-fix-null-deref-in-cleanup_bearer.patch +net-mlx5-dr-prevent-potential-error-pointer-derefere.patch +selftests-mlxsw-sharedbuffer-remove-h1-ingress-test-.patch +selftests-mlxsw-sharedbuffer-remove-duplicate-test-c.patch +selftests-mlxsw-sharedbuffer-ensure-no-extra-packets.patch +ptp-kvm-use-decrypted-memory-in-confidential-guest-o.patch +ptp-kvm-x86-return-eopnotsupp-instead-of-enodev-from.patch +net-lapb-increase-lapb_header_len.patch +net-add-a-refcount-tracker-for-kernel-sockets.patch +net-defer-final-struct-net-free-in-netns-dismantle.patch +net-mscc-ocelot-fix-memory-leak-on-ocelot_port_add_t.patch +net-mscc-ocelot-improve-handling-of-tx-timestamp-for.patch +net-mscc-ocelot-ocelot-ts_id_lock-and-ocelot_port-tx.patch +net-mscc-ocelot-be-resilient-to-loss-of-ptp-packets-.patch +net-mscc-ocelot-perform-error-cleanup-in-ocelot_hwst.patch +spi-aspeed-fix-an-error-handling-path-in-aspeed_spi_.patch +net-sparx5-fix-fdma-performance-issue.patch +net-sparx5-fix-the-maximum-frame-length-register.patch +acpi-resource-fix-memory-resource-type-union-access.patch +cxgb4-use-port-number-to-set-mac-addr.patch +qca_spi-fix-clock-speed-for-multiple-qca7000.patch +qca_spi-make-driver-probing-reliable.patch +asoc-amd-yc-fix-the-wrong-return-value.patch +documentation-pm-clarify-pm_runtime_resume_and_get-r.patch +net-dsa-felix-fix-stuck-cpu-injected-packets-with-sh.patch +net-sched-netem-account-for-backlog-updates-from-chi.patch +bonding-fix-feature-propagation-of-netif_f_gso_encap.patch +team-fix-feature-propagation-of-netif_f_gso_encap_al.patch +acpica-events-evxfregn-don-t-release-the-contextmute.patch +bluetooth-iso-fix-recursive-locking-warning.patch +bluetooth-sco-add-support-for-16-bits-transparent-vo.patch +blk-iocost-avoid-using-clamp-on-inuse-in-__propagate.patch diff --git a/queue-6.1/spi-aspeed-fix-an-error-handling-path-in-aspeed_spi_.patch b/queue-6.1/spi-aspeed-fix-an-error-handling-path-in-aspeed_spi_.patch new file mode 100644 index 00000000000..00d805c1d66 --- /dev/null +++ b/queue-6.1/spi-aspeed-fix-an-error-handling-path-in-aspeed_spi_.patch @@ -0,0 +1,64 @@ +From 7f744e3b30b804aa26dd14ac97a69eb0be265aef Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Nov 2024 22:30:29 +0100 +Subject: spi: aspeed: Fix an error handling path in + aspeed_spi_[read|write]_user() + +From: Christophe JAILLET + +[ Upstream commit c84dda3751e945a67d71cbe3af4474aad24a5794 ] + +A aspeed_spi_start_user() is not balanced by a corresponding +aspeed_spi_stop_user(). +Add the missing call. + +Fixes: e3228ed92893 ("spi: spi-mem: Convert Aspeed SMC driver to spi-mem") +Signed-off-by: Christophe JAILLET +Link: https://patch.msgid.link/4052aa2f9a9ea342fa6af83fa991b55ce5d5819e.1732051814.git.christophe.jaillet@wanadoo.fr +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + drivers/spi/spi-aspeed-smc.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/drivers/spi/spi-aspeed-smc.c b/drivers/spi/spi-aspeed-smc.c +index b90571396a60..5015c2f6fd9f 100644 +--- a/drivers/spi/spi-aspeed-smc.c ++++ b/drivers/spi/spi-aspeed-smc.c +@@ -239,7 +239,7 @@ static ssize_t aspeed_spi_read_user(struct aspeed_spi_chip *chip, + + ret = aspeed_spi_send_cmd_addr(chip, op->addr.nbytes, offset, op->cmd.opcode); + if (ret < 0) +- return ret; ++ goto stop_user; + + if (op->dummy.buswidth && op->dummy.nbytes) { + for (i = 0; i < op->dummy.nbytes / op->dummy.buswidth; i++) +@@ -249,8 +249,9 @@ static ssize_t aspeed_spi_read_user(struct aspeed_spi_chip *chip, + aspeed_spi_set_io_mode(chip, io_mode); + + aspeed_spi_read_from_ahb(buf, chip->ahb_base, len); ++stop_user: + aspeed_spi_stop_user(chip); +- return 0; ++ return ret; + } + + static ssize_t aspeed_spi_write_user(struct aspeed_spi_chip *chip, +@@ -261,10 +262,11 @@ static ssize_t aspeed_spi_write_user(struct aspeed_spi_chip *chip, + aspeed_spi_start_user(chip); + ret = aspeed_spi_send_cmd_addr(chip, op->addr.nbytes, op->addr.val, op->cmd.opcode); + if (ret < 0) +- return ret; ++ goto stop_user; + aspeed_spi_write_to_ahb(chip->ahb_base, op->data.buf.out, op->data.nbytes); ++stop_user: + aspeed_spi_stop_user(chip); +- return 0; ++ return ret; + } + + /* support for 1-1-1, 1-1-2 or 1-1-4 */ +-- +2.39.5 + diff --git a/queue-6.1/team-fix-feature-propagation-of-netif_f_gso_encap_al.patch b/queue-6.1/team-fix-feature-propagation-of-netif_f_gso_encap_al.patch new file mode 100644 index 00000000000..b68c7287c9c --- /dev/null +++ b/queue-6.1/team-fix-feature-propagation-of-netif_f_gso_encap_al.patch @@ -0,0 +1,44 @@ +From 14e91fc205108d160022edb956e6a0a465c4c585 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 Dec 2024 15:12:45 +0100 +Subject: team: Fix feature propagation of NETIF_F_GSO_ENCAP_ALL + +From: Daniel Borkmann + +[ Upstream commit 98712844589e06d9aa305b5077169942139fd75c ] + +Similar to bonding driver, add NETIF_F_GSO_ENCAP_ALL to TEAM_VLAN_FEATURES +in order to support slave devices which propagate NETIF_F_GSO_UDP_TUNNEL & +NETIF_F_GSO_UDP_TUNNEL_CSUM as vlan_features. + +Fixes: 3625920b62c3 ("teaming: fix vlan_features computing") +Signed-off-by: Daniel Borkmann +Cc: Nikolay Aleksandrov +Cc: Ido Schimmel +Cc: Jiri Pirko +Reviewed-by: Nikolay Aleksandrov +Reviewed-by: Hangbin Liu +Link: https://patch.msgid.link/20241210141245.327886-5-daniel@iogearbox.net +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/team/team.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c +index 293eaf6b3ec9..872640a9e73a 100644 +--- a/drivers/net/team/team.c ++++ b/drivers/net/team/team.c +@@ -984,7 +984,8 @@ static void team_port_disable(struct team *team, + + #define TEAM_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ + NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | \ +- NETIF_F_HIGHDMA | NETIF_F_LRO) ++ NETIF_F_HIGHDMA | NETIF_F_LRO | \ ++ NETIF_F_GSO_ENCAP_ALL) + + #define TEAM_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ + NETIF_F_RXCSUM | NETIF_F_GSO_SOFTWARE) +-- +2.39.5 + diff --git a/queue-6.1/tipc-fix-null-deref-in-cleanup_bearer.patch b/queue-6.1/tipc-fix-null-deref-in-cleanup_bearer.patch new file mode 100644 index 00000000000..041babd8d70 --- /dev/null +++ b/queue-6.1/tipc-fix-null-deref-in-cleanup_bearer.patch @@ -0,0 +1,82 @@ +From df06e808eb1fe529946a46030ecb752970cbb85a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Dec 2024 17:05:48 +0000 +Subject: tipc: fix NULL deref in cleanup_bearer() + +From: Eric Dumazet + +[ Upstream commit b04d86fff66b15c07505d226431f808c15b1703c ] + +syzbot found [1] that after blamed commit, ub->ubsock->sk +was NULL when attempting the atomic_dec() : + +atomic_dec(&tipc_net(sock_net(ub->ubsock->sk))->wq_count); + +Fix this by caching the tipc_net pointer. + +[1] + +Oops: general protection fault, probably for non-canonical address 0xdffffc0000000006: 0000 [#1] PREEMPT SMP KASAN PTI +KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037] +CPU: 0 UID: 0 PID: 5896 Comm: kworker/0:3 Not tainted 6.13.0-rc1-next-20241203-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 +Workqueue: events cleanup_bearer + RIP: 0010:read_pnet include/net/net_namespace.h:387 [inline] + RIP: 0010:sock_net include/net/sock.h:655 [inline] + RIP: 0010:cleanup_bearer+0x1f7/0x280 net/tipc/udp_media.c:820 +Code: 18 48 89 d8 48 c1 e8 03 42 80 3c 28 00 74 08 48 89 df e8 3c f7 99 f6 48 8b 1b 48 83 c3 30 e8 f0 e4 60 00 48 89 d8 48 c1 e8 03 <42> 80 3c 28 00 74 08 48 89 df e8 1a f7 99 f6 49 83 c7 e8 48 8b 1b +RSP: 0018:ffffc9000410fb70 EFLAGS: 00010206 +RAX: 0000000000000006 RBX: 0000000000000030 RCX: ffff88802fe45a00 +RDX: 0000000000000001 RSI: 0000000000000008 RDI: ffffc9000410f900 +RBP: ffff88807e1f0908 R08: ffffc9000410f907 R09: 1ffff92000821f20 +R10: dffffc0000000000 R11: fffff52000821f21 R12: ffff888031d19980 +R13: dffffc0000000000 R14: dffffc0000000000 R15: ffff88807e1f0918 +FS: 0000000000000000(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000556ca050b000 CR3: 0000000031c0c000 CR4: 00000000003526f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + +Fixes: 6a2fa13312e5 ("tipc: Fix use-after-free of kernel socket in cleanup_bearer().") +Reported-by: syzbot+46aa5474f179dacd1a3b@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/netdev/67508b5f.050a0220.17bd51.0070.GAE@google.com/T/#u +Signed-off-by: Eric Dumazet +Reviewed-by: Kuniyuki Iwashima +Link: https://patch.msgid.link/20241204170548.4152658-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/tipc/udp_media.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c +index 3f5a12b85b2d..f5bd75d931c1 100644 +--- a/net/tipc/udp_media.c ++++ b/net/tipc/udp_media.c +@@ -811,6 +811,7 @@ static void cleanup_bearer(struct work_struct *work) + { + struct udp_bearer *ub = container_of(work, struct udp_bearer, work); + struct udp_replicast *rcast, *tmp; ++ struct tipc_net *tn; + + list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) { + dst_cache_destroy(&rcast->dst_cache); +@@ -818,10 +819,14 @@ static void cleanup_bearer(struct work_struct *work) + kfree_rcu(rcast, rcu); + } + ++ tn = tipc_net(sock_net(ub->ubsock->sk)); ++ + dst_cache_destroy(&ub->rcast.dst_cache); + udp_tunnel_sock_release(ub->ubsock); ++ ++ /* Note: could use a call_rcu() to avoid another synchronize_net() */ + synchronize_net(); +- atomic_dec(&tipc_net(sock_net(ub->ubsock->sk))->wq_count); ++ atomic_dec(&tn->wq_count); + kfree(ub); + } + +-- +2.39.5 + diff --git a/queue-6.1/wifi-mac80211-clean-up-ret-in-sta_link_apply_paramet.patch b/queue-6.1/wifi-mac80211-clean-up-ret-in-sta_link_apply_paramet.patch new file mode 100644 index 00000000000..7f3a02f643c --- /dev/null +++ b/queue-6.1/wifi-mac80211-clean-up-ret-in-sta_link_apply_paramet.patch @@ -0,0 +1,55 @@ +From bfa42bb92f51e1ac4dcaccefccd505dca9baf510 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Jun 2024 13:57:19 +0300 +Subject: wifi: mac80211: clean up 'ret' in sta_link_apply_parameters() + +From: Johannes Berg + +[ Upstream commit 642508a42f74d7467aae7c56dff3016db64a25bd ] + +There's no need to have the always-zero ret variable in +the function scope, move it into the inner scope only. + +Signed-off-by: Johannes Berg +Signed-off-by: Miri Korenblit +Link: https://msgid.link/20240605135233.eb7a24632d98.I72d7fe1da89d4b89bcfd0f5fb9057e3e69355cfe@changeid +Signed-off-by: Johannes Berg +Stable-dep-of: 819e0f1e58e0 ("wifi: mac80211: fix station NSS capability initialization order") +Signed-off-by: Sasha Levin +--- + net/mac80211/cfg.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c +index c0dccaceb05e..aa5daa2fad11 100644 +--- a/net/mac80211/cfg.c ++++ b/net/mac80211/cfg.c +@@ -1683,7 +1683,6 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, + struct sta_info *sta, bool new_link, + struct link_station_parameters *params) + { +- int ret = 0; + struct ieee80211_supported_band *sband; + struct ieee80211_sub_if_data *sdata = sta->sdata; + u32 link_id = params->link_id < 0 ? 0 : params->link_id; +@@ -1725,6 +1724,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, + } + + if (params->txpwr_set) { ++ int ret; ++ + link_sta->pub->txpwr.type = params->txpwr.type; + if (params->txpwr.type == NL80211_TX_POWER_LIMITED) + link_sta->pub->txpwr.power = params->txpwr.power; +@@ -1777,7 +1778,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, + + ieee80211_sta_init_nss(link_sta); + +- return ret; ++ return 0; + } + + static int sta_apply_parameters(struct ieee80211_local *local, +-- +2.39.5 + diff --git a/queue-6.1/wifi-mac80211-fix-station-nss-capability-initializat.patch b/queue-6.1/wifi-mac80211-fix-station-nss-capability-initializat.patch new file mode 100644 index 00000000000..f6bb32cd46b --- /dev/null +++ b/queue-6.1/wifi-mac80211-fix-station-nss-capability-initializat.patch @@ -0,0 +1,47 @@ +From b148a9221ba8d4a8747370cb893aa6ca2f283297 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 18 Nov 2024 16:07:22 +0800 +Subject: wifi: mac80211: fix station NSS capability initialization order + +From: Benjamin Lin + +[ Upstream commit 819e0f1e58e0ba3800cd9eb96b2a39e44e49df97 ] + +Station's spatial streaming capability should be initialized before +handling VHT OMN, because the handling requires the capability information. + +Fixes: a8bca3e9371d ("wifi: mac80211: track capability/opmode NSS separately") +Signed-off-by: Benjamin Lin +Link: https://patch.msgid.link/20241118080722.9603-1-benjamin-jw.lin@mediatek.com +[rewrite subject] +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/mac80211/cfg.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c +index aa5daa2fad11..be48d3f7ffcd 100644 +--- a/net/mac80211/cfg.c ++++ b/net/mac80211/cfg.c +@@ -1767,6 +1767,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, + params->eht_capa_len, + link_sta); + ++ ieee80211_sta_init_nss(link_sta); ++ + if (params->opmode_notif_used) { + /* returned value is only needed for rc update, but the + * rc isn't initialized here yet, so ignore it +@@ -1776,8 +1778,6 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, + sband->band); + } + +- ieee80211_sta_init_nss(link_sta); +- + return 0; + } + +-- +2.39.5 + diff --git a/queue-6.1/wifi-nl80211-fix-nl80211_attr_mlo_link_id-off-by-one.patch b/queue-6.1/wifi-nl80211-fix-nl80211_attr_mlo_link_id-off-by-one.patch new file mode 100644 index 00000000000..0ea090c5cb8 --- /dev/null +++ b/queue-6.1/wifi-nl80211-fix-nl80211_attr_mlo_link_id-off-by-one.patch @@ -0,0 +1,77 @@ +From 8cea18269ec04dd829c37d7d8f5297a8bbb2c706 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 1 Dec 2024 01:05:26 +0800 +Subject: wifi: nl80211: fix NL80211_ATTR_MLO_LINK_ID off-by-one + +From: Lin Ma + +[ Upstream commit 2e3dbf938656986cce73ac4083500d0bcfbffe24 ] + +Since the netlink attribute range validation provides inclusive +checking, the *max* of attribute NL80211_ATTR_MLO_LINK_ID should be +IEEE80211_MLD_MAX_NUM_LINKS - 1 otherwise causing an off-by-one. + +One crash stack for demonstration: +================================================================== +BUG: KASAN: wild-memory-access in ieee80211_tx_control_port+0x3b6/0xca0 net/mac80211/tx.c:5939 +Read of size 6 at addr 001102080000000c by task fuzzer.386/9508 + +CPU: 1 PID: 9508 Comm: syz.1.386 Not tainted 6.1.70 #2 +Call Trace: + + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0x177/0x231 lib/dump_stack.c:106 + print_report+0xe0/0x750 mm/kasan/report.c:398 + kasan_report+0x139/0x170 mm/kasan/report.c:495 + kasan_check_range+0x287/0x290 mm/kasan/generic.c:189 + memcpy+0x25/0x60 mm/kasan/shadow.c:65 + ieee80211_tx_control_port+0x3b6/0xca0 net/mac80211/tx.c:5939 + rdev_tx_control_port net/wireless/rdev-ops.h:761 [inline] + nl80211_tx_control_port+0x7b3/0xc40 net/wireless/nl80211.c:15453 + genl_family_rcv_msg_doit+0x22e/0x320 net/netlink/genetlink.c:756 + genl_family_rcv_msg net/netlink/genetlink.c:833 [inline] + genl_rcv_msg+0x539/0x740 net/netlink/genetlink.c:850 + netlink_rcv_skb+0x1de/0x420 net/netlink/af_netlink.c:2508 + genl_rcv+0x24/0x40 net/netlink/genetlink.c:861 + netlink_unicast_kernel net/netlink/af_netlink.c:1326 [inline] + netlink_unicast+0x74b/0x8c0 net/netlink/af_netlink.c:1352 + netlink_sendmsg+0x882/0xb90 net/netlink/af_netlink.c:1874 + sock_sendmsg_nosec net/socket.c:716 [inline] + __sock_sendmsg net/socket.c:728 [inline] + ____sys_sendmsg+0x5cc/0x8f0 net/socket.c:2499 + ___sys_sendmsg+0x21c/0x290 net/socket.c:2553 + __sys_sendmsg net/socket.c:2582 [inline] + __do_sys_sendmsg net/socket.c:2591 [inline] + __se_sys_sendmsg+0x19e/0x270 net/socket.c:2589 + do_syscall_x64 arch/x86/entry/common.c:51 [inline] + do_syscall_64+0x45/0x90 arch/x86/entry/common.c:81 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + +Update the policy to ensure correct validation. + +Fixes: 7b0a0e3c3a88 ("wifi: cfg80211: do some rework towards MLO link APIs") +Signed-off-by: Lin Ma +Suggested-by: Cengiz Can +Link: https://patch.msgid.link/20241130170526.96698-1-linma@zju.edu.cn +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/wireless/nl80211.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c +index 3e1c4e23484d..0ba824c3fd1b 100644 +--- a/net/wireless/nl80211.c ++++ b/net/wireless/nl80211.c +@@ -806,7 +806,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { + [NL80211_ATTR_MLO_LINKS] = + NLA_POLICY_NESTED_ARRAY(nl80211_policy), + [NL80211_ATTR_MLO_LINK_ID] = +- NLA_POLICY_RANGE(NLA_U8, 0, IEEE80211_MLD_MAX_NUM_LINKS), ++ NLA_POLICY_RANGE(NLA_U8, 0, IEEE80211_MLD_MAX_NUM_LINKS - 1), + [NL80211_ATTR_MLD_ADDR] = NLA_POLICY_EXACT_LEN(ETH_ALEN), + [NL80211_ATTR_MLO_SUPPORT] = { .type = NLA_FLAG }, + [NL80211_ATTR_MAX_NUM_AKM_SUITES] = { .type = NLA_REJECT }, +-- +2.39.5 +