From: Sasha Levin Date: Sun, 15 Dec 2024 16:52:00 +0000 (-0500) Subject: Fixes for 6.6 X-Git-Tag: v5.4.288~41 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=5739ee747cbd2310f7f2b1fea93aa4f73baa94fa;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.6 Signed-off-by: Sasha Levin --- diff --git a/queue-6.6/acpi-nfit-vmalloc-out-of-bounds-read-in-acpi_nfit_ct.patch b/queue-6.6/acpi-nfit-vmalloc-out-of-bounds-read-in-acpi_nfit_ct.patch new file mode 100644 index 00000000000..a59eef9e4b5 --- /dev/null +++ b/queue-6.6/acpi-nfit-vmalloc-out-of-bounds-read-in-acpi_nfit_ct.patch @@ -0,0 +1,63 @@ +From 5695b645beee2ab5b033984b2bb46f647c7daf01 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 18 Nov 2024 21:56:09 +0530 +Subject: acpi: nfit: vmalloc-out-of-bounds Read in acpi_nfit_ctl + +From: Suraj Sonawane + +[ Upstream commit 265e98f72bac6c41a4492d3e30a8e5fd22fe0779 ] + +Fix an issue detected by syzbot with KASAN: + +BUG: KASAN: vmalloc-out-of-bounds in cmd_to_func drivers/acpi/nfit/ +core.c:416 [inline] +BUG: KASAN: vmalloc-out-of-bounds in acpi_nfit_ctl+0x20e8/0x24a0 +drivers/acpi/nfit/core.c:459 + +The issue occurs in cmd_to_func when the call_pkg->nd_reserved2 +array is accessed without verifying that call_pkg points to a buffer +that is appropriately sized as a struct nd_cmd_pkg. This can lead +to out-of-bounds access and undefined behavior if the buffer does not +have sufficient space. + +To address this, a check was added in acpi_nfit_ctl() to ensure that +buf is not NULL and that buf_len is less than sizeof(*call_pkg) +before accessing it. This ensures safe access to the members of +call_pkg, including the nd_reserved2 array. + +Reported-by: syzbot+7534f060ebda6b8b51b3@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=7534f060ebda6b8b51b3 +Tested-by: syzbot+7534f060ebda6b8b51b3@syzkaller.appspotmail.com +Fixes: ebe9f6f19d80 ("acpi/nfit: Fix bus command validation") +Signed-off-by: Suraj Sonawane +Reviewed-by: Alison Schofield +Reviewed-by: Dave Jiang +Link: https://patch.msgid.link/20241118162609.29063-1-surajsonawane0215@gmail.com +Signed-off-by: Ira Weiny +Signed-off-by: Sasha Levin +--- + drivers/acpi/nfit/core.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c +index 7d88db451cfb..7918923e3b74 100644 +--- a/drivers/acpi/nfit/core.c ++++ b/drivers/acpi/nfit/core.c +@@ -454,8 +454,13 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, + if (cmd_rc) + *cmd_rc = -EINVAL; + +- if (cmd == ND_CMD_CALL) ++ if (cmd == ND_CMD_CALL) { ++ if (!buf || buf_len < sizeof(*call_pkg)) ++ return -EINVAL; ++ + call_pkg = buf; ++ } ++ + func = cmd_to_func(nfit_mem, cmd, call_pkg, &family); + if (func < 0) + return func; +-- +2.39.5 + diff --git a/queue-6.6/acpi-resource-fix-memory-resource-type-union-access.patch b/queue-6.6/acpi-resource-fix-memory-resource-type-union-access.patch new file mode 100644 index 00000000000..1e73e617845 --- /dev/null +++ b/queue-6.6/acpi-resource-fix-memory-resource-type-union-access.patch @@ -0,0 +1,55 @@ +From 2ab36fefdbdff74b64583b5c20b658d9c3c27497 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 Dec 2024 12:06:13 +0200 +Subject: ACPI: resource: Fix memory resource type union access +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ilpo Järvinen + +[ Upstream commit 7899ca9f3bd2b008e9a7c41f2a9f1986052d7e96 ] + +In acpi_decode_space() addr->info.mem.caching is checked on main level +for any resource type but addr->info.mem is part of union and thus +valid only if the resource type is memory range. + +Move the check inside the preceeding switch/case to only execute it +when the union is of correct type. + +Fixes: fcb29bbcd540 ("ACPI: Add prefetch decoding to the address space parser") +Signed-off-by: Ilpo Järvinen +Link: https://patch.msgid.link/20241202100614.20731-1-ilpo.jarvinen@linux.intel.com +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Sasha Levin +--- + drivers/acpi/resource.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c +index d3d776d4fb5a..df598de0cb18 100644 +--- a/drivers/acpi/resource.c ++++ b/drivers/acpi/resource.c +@@ -250,6 +250,9 @@ static bool acpi_decode_space(struct resource_win *win, + switch (addr->resource_type) { + case ACPI_MEMORY_RANGE: + acpi_dev_memresource_flags(res, len, wp); ++ ++ if (addr->info.mem.caching == ACPI_PREFETCHABLE_MEMORY) ++ res->flags |= IORESOURCE_PREFETCH; + break; + case ACPI_IO_RANGE: + acpi_dev_ioresource_flags(res, len, iodec, +@@ -265,9 +268,6 @@ static bool acpi_decode_space(struct resource_win *win, + if (addr->producer_consumer == ACPI_PRODUCER) + res->flags |= IORESOURCE_WINDOW; + +- if (addr->info.mem.caching == ACPI_PREFETCHABLE_MEMORY) +- res->flags |= IORESOURCE_PREFETCH; +- + return !(res->flags & IORESOURCE_DISABLED); + } + +-- +2.39.5 + diff --git a/queue-6.6/acpica-events-evxfregn-don-t-release-the-contextmute.patch b/queue-6.6/acpica-events-evxfregn-don-t-release-the-contextmute.patch new file mode 100644 index 00000000000..d2e90c87f86 --- /dev/null +++ b/queue-6.6/acpica-events-evxfregn-don-t-release-the-contextmute.patch @@ -0,0 +1,41 @@ +From 6dd3103e4907b580a14d56c20e901b72fdc87ca6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Nov 2024 11:29:54 +0300 +Subject: ACPICA: events/evxfregn: don't release the ContextMutex that was + never acquired + +From: Daniil Tatianin + +[ Upstream commit c53d96a4481f42a1635b96d2c1acbb0a126bfd54 ] + +This bug was first introduced in c27f3d011b08, where the author of the +patch probably meant to do DeleteMutex instead of ReleaseMutex. The +mutex leak was noticed later on and fixed in e4dfe108371, but the bogus +MutexRelease line was never removed, so do it now. + +Link: https://github.com/acpica/acpica/pull/982 +Fixes: c27f3d011b08 ("ACPICA: Fix race in generic_serial_bus (I2C) and GPIO op_region parameter handling") +Signed-off-by: Daniil Tatianin +Link: https://patch.msgid.link/20241122082954.658356-1-d-tatianin@yandex-team.ru +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Sasha Levin +--- + drivers/acpi/acpica/evxfregn.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c +index 95f78383bbdb..bff2d099f469 100644 +--- a/drivers/acpi/acpica/evxfregn.c ++++ b/drivers/acpi/acpica/evxfregn.c +@@ -232,8 +232,6 @@ acpi_remove_address_space_handler(acpi_handle device, + + /* Now we can delete the handler object */ + +- acpi_os_release_mutex(handler_obj->address_space. +- context_mutex); + acpi_ut_remove_reference(handler_obj); + goto unlock_and_exit; + } +-- +2.39.5 + diff --git a/queue-6.6/alsa-control-avoid-warn-for-symlink-errors.patch b/queue-6.6/alsa-control-avoid-warn-for-symlink-errors.patch new file mode 100644 index 00000000000..9c95e33de30 --- /dev/null +++ b/queue-6.6/alsa-control-avoid-warn-for-symlink-errors.patch @@ -0,0 +1,57 @@ +From 01d8bb57d22bc6d55afbaebf1ee23b5b68395711 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 9 Dec 2024 10:56:12 +0100 +Subject: ALSA: control: Avoid WARN() for symlink errors + +From: Takashi Iwai + +[ Upstream commit b2e538a9827dd04ab5273bf4be8eb2edb84357b0 ] + +Using WARN() for showing the error of symlink creations don't give +more information than telling that something goes wrong, since the +usual code path is a lregister callback from each control element +creation. More badly, the use of WARN() rather confuses fuzzer as if +it were serious issues. + +This patch downgrades the warning messages to use the normal dev_err() +instead of WARN(). For making it clearer, add the function name to +the prefix, too. + +Fixes: a135dfb5de15 ("ALSA: led control - add sysfs kcontrol LED marking layer") +Reported-by: syzbot+4e7919b09c67ffd198ae@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/675664c7.050a0220.a30f1.018c.GAE@google.com +Link: https://patch.msgid.link/20241209095614.4273-1-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/core/control_led.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +diff --git a/sound/core/control_led.c b/sound/core/control_led.c +index a78eb48927c7..ba984ed00972 100644 +--- a/sound/core/control_led.c ++++ b/sound/core/control_led.c +@@ -688,10 +688,16 @@ static void snd_ctl_led_sysfs_add(struct snd_card *card) + goto cerr; + led->cards[card->number] = led_card; + snprintf(link_name, sizeof(link_name), "led-%s", led->name); +- WARN(sysfs_create_link(&card->ctl_dev->kobj, &led_card->dev.kobj, link_name), +- "can't create symlink to controlC%i device\n", card->number); +- WARN(sysfs_create_link(&led_card->dev.kobj, &card->card_dev.kobj, "card"), +- "can't create symlink to card%i\n", card->number); ++ if (sysfs_create_link(&card->ctl_dev->kobj, &led_card->dev.kobj, ++ link_name)) ++ dev_err(card->dev, ++ "%s: can't create symlink to controlC%i device\n", ++ __func__, card->number); ++ if (sysfs_create_link(&led_card->dev.kobj, &card->card_dev.kobj, ++ "card")) ++ dev_err(card->dev, ++ "%s: can't create symlink to card%i\n", ++ __func__, card->number); + + continue; + cerr: +-- +2.39.5 + diff --git a/queue-6.6/amdgpu-uvd-get-ring-reference-from-rq-scheduler.patch b/queue-6.6/amdgpu-uvd-get-ring-reference-from-rq-scheduler.patch new file mode 100644 index 00000000000..bc18203b133 --- /dev/null +++ b/queue-6.6/amdgpu-uvd-get-ring-reference-from-rq-scheduler.patch @@ -0,0 +1,40 @@ +From dbe33a4146c885c1884a36ba78361f0763cc3134 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Dec 2024 11:30:01 -0500 +Subject: amdgpu/uvd: get ring reference from rq scheduler +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: David (Ming Qiang) Wu + +[ Upstream commit 47f402a3e08113e0f5d8e1e6fcc197667a16022f ] + +base.sched may not be set for each instance and should not +be used for cases such as non-IB tests. + +Fixes: 2320c9e6a768 ("drm/sched: memset() 'job' in drm_sched_job_init()") +Signed-off-by: David (Ming Qiang) Wu +Reviewed-by: Christian König +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +index 86d1d46e1e5e..4fba0b3d10f1 100644 +--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +@@ -1286,7 +1286,7 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) + { +- struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); ++ struct amdgpu_ring *ring = amdgpu_job_ring(job); + unsigned i; + + /* No patching necessary for the first instance */ +-- +2.39.5 + diff --git a/queue-6.6/asoc-amd-yc-fix-the-wrong-return-value.patch b/queue-6.6/asoc-amd-yc-fix-the-wrong-return-value.patch new file mode 100644 index 00000000000..fa434124a58 --- /dev/null +++ b/queue-6.6/asoc-amd-yc-fix-the-wrong-return-value.patch @@ -0,0 +1,57 @@ +From 1bde94d67a39f3b1324f7cb82bbe29049775cb01 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 Dec 2024 14:40:25 +0530 +Subject: ASoC: amd: yc: Fix the wrong return value + +From: Venkata Prasad Potturu + +[ Upstream commit 984795e76def5c903724b8d6a8228e356bbdf2af ] + +With the current implementation, when ACP driver fails to read +ACPI _WOV entry then the DMI overrides code won't invoke, +may cause regressions for some BIOS versions. + +Add a condition check to jump to check the DMI entries incase of +ACP driver fail to read ACPI _WOV method. + +Fixes: 4095cf872084 (ASoC: amd: yc: Fix for enabling DMIC on acp6x via _DSD entry) + +Signed-off-by: Venkata Prasad Potturu +Link: https://patch.msgid.link/20241210091026.996860-1-venkataprasad.potturu@amd.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/amd/yc/acp6x-mach.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c +index 39f151d073a6..f7fbde1bc2ed 100644 +--- a/sound/soc/amd/yc/acp6x-mach.c ++++ b/sound/soc/amd/yc/acp6x-mach.c +@@ -578,14 +578,19 @@ static int acp6x_probe(struct platform_device *pdev) + + handle = ACPI_HANDLE(pdev->dev.parent); + ret = acpi_evaluate_integer(handle, "_WOV", NULL, &dmic_status); +- if (!ACPI_FAILURE(ret)) ++ if (!ACPI_FAILURE(ret)) { + wov_en = dmic_status; ++ if (!wov_en) ++ return -ENODEV; ++ } else { ++ /* Incase of ACPI method read failure then jump to check_dmi_entry */ ++ goto check_dmi_entry; ++ } + +- if (is_dmic_enable && wov_en) ++ if (is_dmic_enable) + platform_set_drvdata(pdev, &acp6x_card); +- else +- return 0; + ++check_dmi_entry: + /* check for any DMI overrides */ + dmi_id = dmi_first_match(yc_acp_quirk_table); + if (dmi_id) +-- +2.39.5 + diff --git a/queue-6.6/batman-adv-do-not-let-tt-changes-list-grows-indefini.patch b/queue-6.6/batman-adv-do-not-let-tt-changes-list-grows-indefini.patch new file mode 100644 index 00000000000..451598a4987 --- /dev/null +++ b/queue-6.6/batman-adv-do-not-let-tt-changes-list-grows-indefini.patch @@ -0,0 +1,77 @@ +From 57e05c8c1a316c427d760e70933a88c6bdaa7e41 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Nov 2024 16:52:50 +0100 +Subject: batman-adv: Do not let TT changes list grows indefinitely + +From: Remi Pommarel + +[ Upstream commit fff8f17c1a6fc802ca23bbd3a276abfde8cc58e6 ] + +When TT changes list is too big to fit in packet due to MTU size, an +empty OGM is sent expected other node to send TT request to get the +changes. The issue is that tt.last_changeset was not built thus the +originator was responding with previous changes to those TT requests +(see batadv_send_my_tt_response). Also the changes list was never +cleaned up effectively never ending growing from this point onwards, +repeatedly sending the same TT response changes over and over, and +creating a new empty OGM every OGM interval expecting for the local +changes to be purged. + +When there is more TT changes that can fit in packet, drop all changes, +send empty OGM and wait for TT request so we can respond with a full +table instead. + +Fixes: e1bf0c14096f ("batman-adv: tvlv - convert tt data sent within OGMs") +Signed-off-by: Remi Pommarel +Acked-by: Antonio Quartulli +Signed-off-by: Sven Eckelmann +Signed-off-by: Simon Wunderlich +Signed-off-by: Sasha Levin +--- + net/batman-adv/translation-table.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c +index bbab7491c83f..53dea8ae96e4 100644 +--- a/net/batman-adv/translation-table.c ++++ b/net/batman-adv/translation-table.c +@@ -990,6 +990,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) + int tt_diff_len, tt_change_len = 0; + int tt_diff_entries_num = 0; + int tt_diff_entries_count = 0; ++ bool drop_changes = false; + size_t tt_extra_len = 0; + u16 tvlv_len; + +@@ -997,10 +998,17 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) + tt_diff_len = batadv_tt_len(tt_diff_entries_num); + + /* if we have too many changes for one packet don't send any +- * and wait for the tt table request which will be fragmented ++ * and wait for the tt table request so we can reply with the full ++ * (fragmented) table. ++ * ++ * The local change history should still be cleaned up so the next ++ * TT round can start again with a clean state. + */ +- if (tt_diff_len > bat_priv->soft_iface->mtu) ++ if (tt_diff_len > bat_priv->soft_iface->mtu) { + tt_diff_len = 0; ++ tt_diff_entries_num = 0; ++ drop_changes = true; ++ } + + tvlv_len = batadv_tt_prepare_tvlv_local_data(bat_priv, &tt_data, + &tt_change, &tt_diff_len); +@@ -1009,7 +1017,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) + + tt_data->flags = BATADV_TT_OGM_DIFF; + +- if (tt_diff_len == 0) ++ if (!drop_changes && tt_diff_len == 0) + goto container_register; + + spin_lock_bh(&bat_priv->tt.changes_list_lock); +-- +2.39.5 + diff --git a/queue-6.6/batman-adv-do-not-send-uninitialized-tt-changes.patch b/queue-6.6/batman-adv-do-not-send-uninitialized-tt-changes.patch new file mode 100644 index 00000000000..1df25b8383b --- /dev/null +++ b/queue-6.6/batman-adv-do-not-send-uninitialized-tt-changes.patch @@ -0,0 +1,78 @@ +From 72b9f325e61f20088b793d4eb70336ca0af5ae55 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Nov 2024 16:52:48 +0100 +Subject: batman-adv: Do not send uninitialized TT changes + +From: Remi Pommarel + +[ Upstream commit f2f7358c3890e7366cbcb7512b4bc8b4394b2d61 ] + +The number of TT changes can be less than initially expected in +batadv_tt_tvlv_container_update() (changes can be removed by +batadv_tt_local_event() in ADD+DEL sequence between reading +tt_diff_entries_num and actually iterating the change list under lock). + +Thus tt_diff_len could be bigger than the actual changes size that need +to be sent. Because batadv_send_my_tt_response sends the whole +packet, uninitialized data can be interpreted as TT changes on other +nodes leading to weird TT global entries on those nodes such as: + + * 00:00:00:00:00:00 -1 [....] ( 0) 88:12:4e:ad:7e:ba (179) (0x45845380) + * 00:00:00:00:78:79 4092 [.W..] ( 0) 88:12:4e:ad:7e:3c (145) (0x8ebadb8b) + +All of the above also applies to OGM tvlv container buffer's tvlv_len. + +Remove the extra allocated space to avoid sending uninitialized TT +changes in batadv_send_my_tt_response() and batadv_v_ogm_send_softif(). + +Fixes: e1bf0c14096f ("batman-adv: tvlv - convert tt data sent within OGMs") +Signed-off-by: Remi Pommarel +Signed-off-by: Sven Eckelmann +Signed-off-by: Simon Wunderlich +Signed-off-by: Sasha Levin +--- + net/batman-adv/translation-table.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c +index 2243cec18ecc..f0590f9bc2b1 100644 +--- a/net/batman-adv/translation-table.c ++++ b/net/batman-adv/translation-table.c +@@ -990,6 +990,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) + int tt_diff_len, tt_change_len = 0; + int tt_diff_entries_num = 0; + int tt_diff_entries_count = 0; ++ size_t tt_extra_len = 0; + u16 tvlv_len; + + tt_diff_entries_num = atomic_read(&bat_priv->tt.local_changes); +@@ -1027,6 +1028,9 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) + } + spin_unlock_bh(&bat_priv->tt.changes_list_lock); + ++ tt_extra_len = batadv_tt_len(tt_diff_entries_num - ++ tt_diff_entries_count); ++ + /* Keep the buffer for possible tt_request */ + spin_lock_bh(&bat_priv->tt.last_changeset_lock); + kfree(bat_priv->tt.last_changeset); +@@ -1035,6 +1039,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) + tt_change_len = batadv_tt_len(tt_diff_entries_count); + /* check whether this new OGM has no changes due to size problems */ + if (tt_diff_entries_count > 0) { ++ tt_diff_len -= tt_extra_len; + /* if kmalloc() fails we will reply with the full table + * instead of providing the diff + */ +@@ -1047,6 +1052,8 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) + } + spin_unlock_bh(&bat_priv->tt.last_changeset_lock); + ++ /* Remove extra packet space for OGM */ ++ tvlv_len -= tt_extra_len; + container_register: + batadv_tvlv_container_register(bat_priv, BATADV_TVLV_TT, 1, tt_data, + tvlv_len); +-- +2.39.5 + diff --git a/queue-6.6/batman-adv-remove-uninitialized-data-in-full-table-t.patch b/queue-6.6/batman-adv-remove-uninitialized-data-in-full-table-t.patch new file mode 100644 index 00000000000..dac48d81fe4 --- /dev/null +++ b/queue-6.6/batman-adv-remove-uninitialized-data-in-full-table-t.patch @@ -0,0 +1,115 @@ +From bf3b9d0f2be997676ccf531aa14544fda15d6405 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Nov 2024 16:52:49 +0100 +Subject: batman-adv: Remove uninitialized data in full table TT response + +From: Remi Pommarel + +[ Upstream commit 8038806db64da15721775d6b834990cacbfcf0b2 ] + +The number of entries filled by batadv_tt_tvlv_generate() can be less +than initially expected in batadv_tt_prepare_tvlv_{global,local}_data() +(changes can be removed by batadv_tt_local_event() in ADD+DEL sequence +in the meantime as the lock held during the whole tvlv global/local data +generation). + +Thus tvlv_len could be bigger than the actual TT entry size that need +to be sent so full table TT_RESPONSE could hold invalid TT entries such +as below. + + * 00:00:00:00:00:00 -1 [....] ( 0) 88:12:4e:ad:7e:ba (179) (0x45845380) + * 00:00:00:00:78:79 4092 [.W..] ( 0) 88:12:4e:ad:7e:3c (145) (0x8ebadb8b) + +Remove the extra allocated space to avoid sending uninitialized entries +for full table TT_RESPONSE in both batadv_send_other_tt_response() and +batadv_send_my_tt_response(). + +Fixes: 7ea7b4a14275 ("batman-adv: make the TT CRC logic VLAN specific") +Signed-off-by: Remi Pommarel +Signed-off-by: Sven Eckelmann +Signed-off-by: Simon Wunderlich +Signed-off-by: Sasha Levin +--- + net/batman-adv/translation-table.c | 37 ++++++++++++++++++------------ + 1 file changed, 22 insertions(+), 15 deletions(-) + +diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c +index f0590f9bc2b1..bbab7491c83f 100644 +--- a/net/batman-adv/translation-table.c ++++ b/net/batman-adv/translation-table.c +@@ -2754,14 +2754,16 @@ static bool batadv_tt_global_valid(const void *entry_ptr, + * + * Fills the tvlv buff with the tt entries from the specified hash. If valid_cb + * is not provided then this becomes a no-op. ++ * ++ * Return: Remaining unused length in tvlv_buff. + */ +-static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, +- struct batadv_hashtable *hash, +- void *tvlv_buff, u16 tt_len, +- bool (*valid_cb)(const void *, +- const void *, +- u8 *flags), +- void *cb_data) ++static u16 batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, ++ struct batadv_hashtable *hash, ++ void *tvlv_buff, u16 tt_len, ++ bool (*valid_cb)(const void *, ++ const void *, ++ u8 *flags), ++ void *cb_data) + { + struct batadv_tt_common_entry *tt_common_entry; + struct batadv_tvlv_tt_change *tt_change; +@@ -2775,7 +2777,7 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, + tt_change = tvlv_buff; + + if (!valid_cb) +- return; ++ return tt_len; + + rcu_read_lock(); + for (i = 0; i < hash->size; i++) { +@@ -2801,6 +2803,8 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, + } + } + rcu_read_unlock(); ++ ++ return batadv_tt_len(tt_tot - tt_num_entries); + } + + /** +@@ -3076,10 +3080,11 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv, + goto out; + + /* fill the rest of the tvlv with the real TT entries */ +- batadv_tt_tvlv_generate(bat_priv, bat_priv->tt.global_hash, +- tt_change, tt_len, +- batadv_tt_global_valid, +- req_dst_orig_node); ++ tvlv_len -= batadv_tt_tvlv_generate(bat_priv, ++ bat_priv->tt.global_hash, ++ tt_change, tt_len, ++ batadv_tt_global_valid, ++ req_dst_orig_node); + } + + /* Don't send the response, if larger than fragmented packet. */ +@@ -3203,9 +3208,11 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv, + goto out; + + /* fill the rest of the tvlv with the real TT entries */ +- batadv_tt_tvlv_generate(bat_priv, bat_priv->tt.local_hash, +- tt_change, tt_len, +- batadv_tt_local_valid, NULL); ++ tvlv_len -= batadv_tt_tvlv_generate(bat_priv, ++ bat_priv->tt.local_hash, ++ tt_change, tt_len, ++ batadv_tt_local_valid, ++ NULL); + } + + tvlv_tt_data->flags = BATADV_TT_RESPONSE; +-- +2.39.5 + diff --git a/queue-6.6/blk-iocost-avoid-using-clamp-on-inuse-in-__propagate.patch b/queue-6.6/blk-iocost-avoid-using-clamp-on-inuse-in-__propagate.patch new file mode 100644 index 00000000000..2daf19f3ebd --- /dev/null +++ b/queue-6.6/blk-iocost-avoid-using-clamp-on-inuse-in-__propagate.patch @@ -0,0 +1,77 @@ +From 362f731d55649454cbc2a5b21f06e0ae788b383b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Dec 2024 10:13:29 -0700 +Subject: blk-iocost: Avoid using clamp() on inuse in __propagate_weights() + +From: Nathan Chancellor + +[ Upstream commit 57e420c84f9ab55ba4c5e2ae9c5f6c8e1ea834d2 ] + +After a recent change to clamp() and its variants [1] that increases the +coverage of the check that high is greater than low because it can be +done through inlining, certain build configurations (such as s390 +defconfig) fail to build with clang with: + + block/blk-iocost.c:1101:11: error: call to '__compiletime_assert_557' declared with 'error' attribute: clamp() low limit 1 greater than high limit active + 1101 | inuse = clamp_t(u32, inuse, 1, active); + | ^ + include/linux/minmax.h:218:36: note: expanded from macro 'clamp_t' + 218 | #define clamp_t(type, val, lo, hi) __careful_clamp(type, val, lo, hi) + | ^ + include/linux/minmax.h:195:2: note: expanded from macro '__careful_clamp' + 195 | __clamp_once(type, val, lo, hi, __UNIQUE_ID(v_), __UNIQUE_ID(l_), __UNIQUE_ID(h_)) + | ^ + include/linux/minmax.h:188:2: note: expanded from macro '__clamp_once' + 188 | BUILD_BUG_ON_MSG(statically_true(ulo > uhi), \ + | ^ + +__propagate_weights() is called with an active value of zero in +ioc_check_iocgs(), which results in the high value being less than the +low value, which is undefined because the value returned depends on the +order of the comparisons. + +The purpose of this expression is to ensure inuse is not more than +active and at least 1. This could be written more simply with a ternary +expression that uses min(inuse, active) as the condition so that the +value of that condition can be used if it is not zero and one if it is. +Do this conversion to resolve the error and add a comment to deter +people from turning this back into clamp(). + +Fixes: 7caa47151ab2 ("blkcg: implement blk-iocost") +Link: https://lore.kernel.org/r/34d53778977747f19cce2abb287bb3e6@AcuMS.aculab.com/ [1] +Suggested-by: David Laight +Reported-by: Linux Kernel Functional Testing +Closes: https://lore.kernel.org/llvm/CA+G9fYsD7mw13wredcZn0L-KBA3yeoVSTuxnss-AEWMN3ha0cA@mail.gmail.com/ +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202412120322.3GfVe3vF-lkp@intel.com/ +Signed-off-by: Nathan Chancellor +Acked-by: Tejun Heo +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + block/blk-iocost.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/block/blk-iocost.c b/block/blk-iocost.c +index c3cb9c20b306..129732a8d0dd 100644 +--- a/block/blk-iocost.c ++++ b/block/blk-iocost.c +@@ -1098,7 +1098,14 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse, + inuse = DIV64_U64_ROUND_UP(active * iocg->child_inuse_sum, + iocg->child_active_sum); + } else { +- inuse = clamp_t(u32, inuse, 1, active); ++ /* ++ * It may be tempting to turn this into a clamp expression with ++ * a lower limit of 1 but active may be 0, which cannot be used ++ * as an upper limit in that situation. This expression allows ++ * active to clamp inuse unless it is 0, in which case inuse ++ * becomes 1. ++ */ ++ inuse = min(inuse, active) ?: 1; + } + + iocg->last_inuse = iocg->inuse; +-- +2.39.5 + diff --git a/queue-6.6/bluetooth-btmtk-avoid-uaf-in-btmtk_process_coredump.patch b/queue-6.6/bluetooth-btmtk-avoid-uaf-in-btmtk_process_coredump.patch new file mode 100644 index 00000000000..a728d340dc1 --- /dev/null +++ b/queue-6.6/bluetooth-btmtk-avoid-uaf-in-btmtk_process_coredump.patch @@ -0,0 +1,141 @@ +From 71187ac4570518727609d6967829dc31501b03a5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 Dec 2024 16:36:10 -0300 +Subject: Bluetooth: btmtk: avoid UAF in btmtk_process_coredump + +From: Thadeu Lima de Souza Cascardo + +[ Upstream commit b548f5e9456c568155499d9ebac675c0d7a296e8 ] + +hci_devcd_append may lead to the release of the skb, so it cannot be +accessed once it is called. + +================================================================== +BUG: KASAN: slab-use-after-free in btmtk_process_coredump+0x2a7/0x2d0 [btmtk] +Read of size 4 at addr ffff888033cfabb0 by task kworker/0:3/82 + +CPU: 0 PID: 82 Comm: kworker/0:3 Tainted: G U 6.6.40-lockdep-03464-g1d8b4eb3060e #1 b0b3c1cc0c842735643fb411799d97921d1f688c +Hardware name: Google Yaviks_Ufs/Yaviks_Ufs, BIOS Google_Yaviks_Ufs.15217.552.0 05/07/2024 +Workqueue: events btusb_rx_work [btusb] +Call Trace: + + dump_stack_lvl+0xfd/0x150 + print_report+0x131/0x780 + kasan_report+0x177/0x1c0 + btmtk_process_coredump+0x2a7/0x2d0 [btmtk 03edd567dd71a65958807c95a65db31d433e1d01] + btusb_recv_acl_mtk+0x11c/0x1a0 [btusb 675430d1e87c4f24d0c1f80efe600757a0f32bec] + btusb_rx_work+0x9e/0xe0 [btusb 675430d1e87c4f24d0c1f80efe600757a0f32bec] + worker_thread+0xe44/0x2cc0 + kthread+0x2ff/0x3a0 + ret_from_fork+0x51/0x80 + ret_from_fork_asm+0x1b/0x30 + + +Allocated by task 82: + stack_trace_save+0xdc/0x190 + kasan_set_track+0x4e/0x80 + __kasan_slab_alloc+0x4e/0x60 + kmem_cache_alloc+0x19f/0x360 + skb_clone+0x132/0xf70 + btusb_recv_acl_mtk+0x104/0x1a0 [btusb] + btusb_rx_work+0x9e/0xe0 [btusb] + worker_thread+0xe44/0x2cc0 + kthread+0x2ff/0x3a0 + ret_from_fork+0x51/0x80 + ret_from_fork_asm+0x1b/0x30 + +Freed by task 1733: + stack_trace_save+0xdc/0x190 + kasan_set_track+0x4e/0x80 + kasan_save_free_info+0x28/0xb0 + ____kasan_slab_free+0xfd/0x170 + kmem_cache_free+0x183/0x3f0 + hci_devcd_rx+0x91a/0x2060 [bluetooth] + worker_thread+0xe44/0x2cc0 + kthread+0x2ff/0x3a0 + ret_from_fork+0x51/0x80 + ret_from_fork_asm+0x1b/0x30 + +The buggy address belongs to the object at ffff888033cfab40 + which belongs to the cache skbuff_head_cache of size 232 +The buggy address is located 112 bytes inside of + freed 232-byte region [ffff888033cfab40, ffff888033cfac28) + +The buggy address belongs to the physical page: +page:00000000a174ba93 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x33cfa +head:00000000a174ba93 order:1 entire_mapcount:0 nr_pages_mapped:0 pincount:0 +anon flags: 0x4000000000000840(slab|head|zone=1) +page_type: 0xffffffff() +raw: 4000000000000840 ffff888100848a00 0000000000000000 0000000000000001 +raw: 0000000000000000 0000000080190019 00000001ffffffff 0000000000000000 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff888033cfaa80: fb fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc + ffff888033cfab00: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb +>ffff888033cfab80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ^ + ffff888033cfac00: fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc + ffff888033cfac80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +================================================================== + +Check if we need to call hci_devcd_complete before calling +hci_devcd_append. That requires that we check data->cd_info.cnt >= +MTK_COREDUMP_NUM instead of data->cd_info.cnt > MTK_COREDUMP_NUM, as we +increment data->cd_info.cnt only once the call to hci_devcd_append +succeeds. + +Fixes: 0b7015132878 ("Bluetooth: btusb: mediatek: add MediaTek devcoredump support") +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + drivers/bluetooth/btmtk.c | 20 ++++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c +index 812fd2a8f853..4c53ab22d09b 100644 +--- a/drivers/bluetooth/btmtk.c ++++ b/drivers/bluetooth/btmtk.c +@@ -371,6 +371,7 @@ int btmtk_process_coredump(struct hci_dev *hdev, struct sk_buff *skb) + { + struct btmediatek_data *data = hci_get_priv(hdev); + int err; ++ bool complete = false; + + if (!IS_ENABLED(CONFIG_DEV_COREDUMP)) { + kfree_skb(skb); +@@ -392,19 +393,22 @@ int btmtk_process_coredump(struct hci_dev *hdev, struct sk_buff *skb) + fallthrough; + case HCI_DEVCOREDUMP_ACTIVE: + default: ++ /* Mediatek coredump data would be more than MTK_COREDUMP_NUM */ ++ if (data->cd_info.cnt >= MTK_COREDUMP_NUM && ++ skb->len > MTK_COREDUMP_END_LEN) ++ if (!memcmp((char *)&skb->data[skb->len - MTK_COREDUMP_END_LEN], ++ MTK_COREDUMP_END, MTK_COREDUMP_END_LEN - 1)) ++ complete = true; ++ + err = hci_devcd_append(hdev, skb); + if (err < 0) + break; + data->cd_info.cnt++; + +- /* Mediatek coredump data would be more than MTK_COREDUMP_NUM */ +- if (data->cd_info.cnt > MTK_COREDUMP_NUM && +- skb->len > MTK_COREDUMP_END_LEN) +- if (!memcmp((char *)&skb->data[skb->len - MTK_COREDUMP_END_LEN], +- MTK_COREDUMP_END, MTK_COREDUMP_END_LEN - 1)) { +- bt_dev_info(hdev, "Mediatek coredump end"); +- hci_devcd_complete(hdev); +- } ++ if (complete) { ++ bt_dev_info(hdev, "Mediatek coredump end"); ++ hci_devcd_complete(hdev); ++ } + + break; + } +-- +2.39.5 + diff --git a/queue-6.6/bluetooth-hci_event-fix-using-rcu_read_-un-lock-whil.patch b/queue-6.6/bluetooth-hci_event-fix-using-rcu_read_-un-lock-whil.patch new file mode 100644 index 00000000000..f7913d146d9 --- /dev/null +++ b/queue-6.6/bluetooth-hci_event-fix-using-rcu_read_-un-lock-whil.patch @@ -0,0 +1,89 @@ +From e7276e1b9bf1cebb74670c1dfe47286e33147e82 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Dec 2024 11:40:59 -0500 +Subject: Bluetooth: hci_event: Fix using rcu_read_(un)lock while iterating + +From: Luiz Augusto von Dentz + +[ Upstream commit 581dd2dc168fe0ed2a7a5534a724f0d3751c93ae ] + +The usage of rcu_read_(un)lock while inside list_for_each_entry_rcu is +not safe since for the most part entries fetched this way shall be +treated as rcu_dereference: + + Note that the value returned by rcu_dereference() is valid + only within the enclosing RCU read-side critical section [1]_. + For example, the following is **not** legal:: + + rcu_read_lock(); + p = rcu_dereference(head.next); + rcu_read_unlock(); + x = p->address; /* BUG!!! */ + rcu_read_lock(); + y = p->data; /* BUG!!! */ + rcu_read_unlock(); + +Fixes: a0bfde167b50 ("Bluetooth: ISO: Add support for connecting multiple BISes") +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + net/bluetooth/hci_event.c | 33 +++++++++++---------------------- + 1 file changed, 11 insertions(+), 22 deletions(-) + +diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c +index 141b4fce55e3..01e51e1dc9b3 100644 +--- a/net/bluetooth/hci_event.c ++++ b/net/bluetooth/hci_event.c +@@ -6821,38 +6821,27 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data, + return; + + hci_dev_lock(hdev); +- rcu_read_lock(); + + /* Connect all BISes that are bound to the BIG */ +- list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { +- if (bacmp(&conn->dst, BDADDR_ANY) || +- conn->type != ISO_LINK || +- conn->iso_qos.bcast.big != ev->handle) ++ while ((conn = hci_conn_hash_lookup_big_state(hdev, ev->handle, ++ BT_BOUND))) { ++ if (ev->status) { ++ hci_connect_cfm(conn, ev->status); ++ hci_conn_del(conn); + continue; ++ } + + if (hci_conn_set_handle(conn, + __le16_to_cpu(ev->bis_handle[i++]))) + continue; + +- if (!ev->status) { +- conn->state = BT_CONNECTED; +- set_bit(HCI_CONN_BIG_CREATED, &conn->flags); +- rcu_read_unlock(); +- hci_debugfs_create_conn(conn); +- hci_conn_add_sysfs(conn); +- hci_iso_setup_path(conn); +- rcu_read_lock(); +- continue; +- } +- +- hci_connect_cfm(conn, ev->status); +- rcu_read_unlock(); +- hci_conn_del(conn); +- rcu_read_lock(); ++ conn->state = BT_CONNECTED; ++ set_bit(HCI_CONN_BIG_CREATED, &conn->flags); ++ hci_debugfs_create_conn(conn); ++ hci_conn_add_sysfs(conn); ++ hci_iso_setup_path(conn); + } + +- rcu_read_unlock(); +- + if (!ev->status && !i) + /* If no BISes have been connected for the BIG, + * terminate. This is in case all bound connections +-- +2.39.5 + diff --git a/queue-6.6/bluetooth-iso-fix-recursive-locking-warning.patch b/queue-6.6/bluetooth-iso-fix-recursive-locking-warning.patch new file mode 100644 index 00000000000..d4913847c36 --- /dev/null +++ b/queue-6.6/bluetooth-iso-fix-recursive-locking-warning.patch @@ -0,0 +1,78 @@ +From e4c0e40dd359dd818d8485fbe57c8536a2473f88 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Dec 2024 14:28:49 +0200 +Subject: Bluetooth: iso: Fix recursive locking warning + +From: Iulia Tanasescu + +[ Upstream commit 9bde7c3b3ad0e1f39d6df93dd1c9caf63e19e50f ] + +This updates iso_sock_accept to use nested locking for the parent +socket, to avoid lockdep warnings caused because the parent and +child sockets are locked by the same thread: + +[ 41.585683] ============================================ +[ 41.585688] WARNING: possible recursive locking detected +[ 41.585694] 6.12.0-rc6+ #22 Not tainted +[ 41.585701] -------------------------------------------- +[ 41.585705] iso-tester/3139 is trying to acquire lock: +[ 41.585711] ffff988b29530a58 (sk_lock-AF_BLUETOOTH) + at: bt_accept_dequeue+0xe3/0x280 [bluetooth] +[ 41.585905] + but task is already holding lock: +[ 41.585909] ffff988b29533a58 (sk_lock-AF_BLUETOOTH) + at: iso_sock_accept+0x61/0x2d0 [bluetooth] +[ 41.586064] + other info that might help us debug this: +[ 41.586069] Possible unsafe locking scenario: + +[ 41.586072] CPU0 +[ 41.586076] ---- +[ 41.586079] lock(sk_lock-AF_BLUETOOTH); +[ 41.586086] lock(sk_lock-AF_BLUETOOTH); +[ 41.586093] + *** DEADLOCK *** + +[ 41.586097] May be due to missing lock nesting notation + +[ 41.586101] 1 lock held by iso-tester/3139: +[ 41.586107] #0: ffff988b29533a58 (sk_lock-AF_BLUETOOTH) + at: iso_sock_accept+0x61/0x2d0 [bluetooth] + +Fixes: ccf74f2390d6 ("Bluetooth: Add BTPROTO_ISO socket type") +Signed-off-by: Iulia Tanasescu +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + net/bluetooth/iso.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c +index 83597b3c0a8d..b94d202bf374 100644 +--- a/net/bluetooth/iso.c ++++ b/net/bluetooth/iso.c +@@ -1120,7 +1120,11 @@ static int iso_sock_accept(struct socket *sock, struct socket *newsock, + long timeo; + int err = 0; + +- lock_sock(sk); ++ /* Use explicit nested locking to avoid lockdep warnings generated ++ * because the parent socket and the child socket are locked on the ++ * same thread. ++ */ ++ lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + + timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + +@@ -1151,7 +1155,7 @@ static int iso_sock_accept(struct socket *sock, struct socket *newsock, + release_sock(sk); + + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); +- lock_sock(sk); ++ lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + } + remove_wait_queue(sk_sleep(sk), &wait); + +-- +2.39.5 + diff --git a/queue-6.6/bluetooth-iso-reassociate-a-socket-with-an-active-bi.patch b/queue-6.6/bluetooth-iso-reassociate-a-socket-with-an-active-bi.patch new file mode 100644 index 00000000000..0729f9d75eb --- /dev/null +++ b/queue-6.6/bluetooth-iso-reassociate-a-socket-with-an-active-bi.patch @@ -0,0 +1,265 @@ +From 8d08bf944b36ea33aa56705f4b9c1125da00be8b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Nov 2023 17:38:00 +0200 +Subject: Bluetooth: ISO: Reassociate a socket with an active BIS + +From: Iulia Tanasescu + +[ Upstream commit fa224d0c094a458e9ebf5ea9b1c696136b7af427 ] + +For ISO Broadcast, all BISes from a BIG have the same lifespan - they +cannot be created or terminated independently from each other. + +This links together all BIS hcons that are part of the same BIG, so all +hcons are kept alive as long as the BIG is active. + +If multiple BIS sockets are opened for a BIG handle, and only part of +them are closed at some point, the associated hcons will be marked as +open. If new sockets will later be opened for the same BIG, they will +be reassociated with the open BIS hcons. + +All BIS hcons will be cleaned up and the BIG will be terminated when +the last BIS socket is closed from userspace. + +Signed-off-by: Iulia Tanasescu +Signed-off-by: Luiz Augusto von Dentz +Stable-dep-of: 581dd2dc168f ("Bluetooth: hci_event: Fix using rcu_read_(un)lock while iterating") +Signed-off-by: Sasha Levin +--- + include/net/bluetooth/hci_core.h | 24 ++++++++++ + net/bluetooth/hci_conn.c | 32 ++++++++++++- + net/bluetooth/iso.c | 79 +++++++++++++++++++++++++++++++- + 3 files changed, 131 insertions(+), 4 deletions(-) + +diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h +index 4185eb679180..e9214ccfde2d 100644 +--- a/include/net/bluetooth/hci_core.h ++++ b/include/net/bluetooth/hci_core.h +@@ -1294,6 +1294,30 @@ static inline struct hci_conn *hci_conn_hash_lookup_big_any_dst(struct hci_dev * + return NULL; + } + ++static inline struct hci_conn * ++hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state) ++{ ++ struct hci_conn_hash *h = &hdev->conn_hash; ++ struct hci_conn *c; ++ ++ rcu_read_lock(); ++ ++ list_for_each_entry_rcu(c, &h->list, list) { ++ if (bacmp(&c->dst, BDADDR_ANY) || c->type != ISO_LINK || ++ c->state != state) ++ continue; ++ ++ if (handle == c->iso_qos.bcast.big) { ++ rcu_read_unlock(); ++ return c; ++ } ++ } ++ ++ rcu_read_unlock(); ++ ++ return NULL; ++} ++ + static inline struct hci_conn * + hci_conn_hash_lookup_pa_sync_big_handle(struct hci_dev *hdev, __u8 big) + { +diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c +index 35d739988ce3..6178ae8feafc 100644 +--- a/net/bluetooth/hci_conn.c ++++ b/net/bluetooth/hci_conn.c +@@ -1054,8 +1054,9 @@ static void hci_conn_cleanup_child(struct hci_conn *conn, u8 reason) + hci_conn_failed(conn, reason); + break; + case ISO_LINK: +- if (conn->state != BT_CONNECTED && +- !test_bit(HCI_CONN_CREATE_CIS, &conn->flags)) ++ if ((conn->state != BT_CONNECTED && ++ !test_bit(HCI_CONN_CREATE_CIS, &conn->flags)) || ++ test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) + hci_conn_failed(conn, reason); + break; + } +@@ -2134,7 +2135,17 @@ struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, + __u8 base_len, __u8 *base) + { + struct hci_conn *conn; ++ struct hci_conn *parent; + __u8 eir[HCI_MAX_PER_AD_LENGTH]; ++ struct hci_link *link; ++ ++ /* Look for any BIS that is open for rebinding */ ++ conn = hci_conn_hash_lookup_big_state(hdev, qos->bcast.big, BT_OPEN); ++ if (conn) { ++ memcpy(qos, &conn->iso_qos, sizeof(*qos)); ++ conn->state = BT_CONNECTED; ++ return conn; ++ } + + if (base_len && base) + base_len = eir_append_service_data(eir, 0, 0x1851, +@@ -2162,6 +2173,20 @@ struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, + conn->iso_qos = *qos; + conn->state = BT_BOUND; + ++ /* Link BISes together */ ++ parent = hci_conn_hash_lookup_big(hdev, ++ conn->iso_qos.bcast.big); ++ if (parent && parent != conn) { ++ link = hci_conn_link(parent, conn); ++ if (!link) { ++ hci_conn_drop(conn); ++ return ERR_PTR(-ENOLINK); ++ } ++ ++ /* Link takes the refcount */ ++ hci_conn_drop(conn); ++ } ++ + return conn; + } + +@@ -2193,6 +2218,9 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, + if (IS_ERR(conn)) + return conn; + ++ if (conn->state == BT_CONNECTED) ++ return conn; ++ + data.big = qos->bcast.big; + data.bis = qos->bcast.bis; + +diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c +index c2c80d600083..83597b3c0a8d 100644 +--- a/net/bluetooth/iso.c ++++ b/net/bluetooth/iso.c +@@ -612,19 +612,68 @@ static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst, + continue; + + /* Exact match. */ +- if (!bacmp(&iso_pi(sk)->src, src)) ++ if (!bacmp(&iso_pi(sk)->src, src)) { ++ sock_hold(sk); + break; ++ } + + /* Closest match */ +- if (!bacmp(&iso_pi(sk)->src, BDADDR_ANY)) ++ if (!bacmp(&iso_pi(sk)->src, BDADDR_ANY)) { ++ if (sk1) ++ sock_put(sk1); ++ + sk1 = sk; ++ sock_hold(sk1); ++ } + } + ++ if (sk && sk1) ++ sock_put(sk1); ++ + read_unlock(&iso_sk_list.lock); + + return sk ? sk : sk1; + } + ++static struct sock *iso_get_sock_big(struct sock *match_sk, bdaddr_t *src, ++ bdaddr_t *dst, uint8_t big) ++{ ++ struct sock *sk = NULL; ++ ++ read_lock(&iso_sk_list.lock); ++ ++ sk_for_each(sk, &iso_sk_list.head) { ++ if (match_sk == sk) ++ continue; ++ ++ /* Look for sockets that have already been ++ * connected to the BIG ++ */ ++ if (sk->sk_state != BT_CONNECTED && ++ sk->sk_state != BT_CONNECT) ++ continue; ++ ++ /* Match Broadcast destination */ ++ if (bacmp(&iso_pi(sk)->dst, dst)) ++ continue; ++ ++ /* Match BIG handle */ ++ if (iso_pi(sk)->qos.bcast.big != big) ++ continue; ++ ++ /* Match source address */ ++ if (bacmp(&iso_pi(sk)->src, src)) ++ continue; ++ ++ sock_hold(sk); ++ break; ++ } ++ ++ read_unlock(&iso_sk_list.lock); ++ ++ return sk; ++} ++ + static void iso_sock_destruct(struct sock *sk) + { + BT_DBG("sk %p", sk); +@@ -677,6 +726,28 @@ static void iso_sock_kill(struct sock *sk) + + static void iso_sock_disconn(struct sock *sk) + { ++ struct sock *bis_sk; ++ struct hci_conn *hcon = iso_pi(sk)->conn->hcon; ++ ++ if (test_bit(HCI_CONN_BIG_CREATED, &hcon->flags)) { ++ bis_sk = iso_get_sock_big(sk, &iso_pi(sk)->src, ++ &iso_pi(sk)->dst, ++ iso_pi(sk)->qos.bcast.big); ++ ++ /* If there are any other connected sockets for the ++ * same BIG, just delete the sk and leave the bis ++ * hcon active, in case later rebinding is needed. ++ */ ++ if (bis_sk) { ++ hcon->state = BT_OPEN; ++ iso_pi(sk)->conn->hcon = NULL; ++ iso_sock_clear_timer(sk); ++ iso_chan_del(sk, bt_to_errno(hcon->abort_reason)); ++ sock_put(bis_sk); ++ return; ++ } ++ } ++ + sk->sk_state = BT_DISCONN; + iso_sock_set_timer(sk, ISO_DISCONN_TIMEOUT); + iso_conn_lock(iso_pi(sk)->conn); +@@ -1724,6 +1795,7 @@ static void iso_conn_ready(struct iso_conn *conn) + parent->sk_data_ready(parent); + + release_sock(parent); ++ sock_put(parent); + } + } + +@@ -1819,6 +1891,7 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) + if (err) { + bt_dev_err(hdev, "hci_le_big_create_sync: %d", + err); ++ sock_put(sk); + sk = NULL; + } + } +@@ -1847,6 +1920,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) + if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) + *flags |= HCI_PROTO_DEFER; + ++ sock_put(sk); ++ + return lm; + } + +-- +2.39.5 + diff --git a/queue-6.6/bluetooth-sco-add-support-for-16-bits-transparent-vo.patch b/queue-6.6/bluetooth-sco-add-support-for-16-bits-transparent-vo.patch new file mode 100644 index 00000000000..5490be1e0c4 --- /dev/null +++ b/queue-6.6/bluetooth-sco-add-support-for-16-bits-transparent-vo.patch @@ -0,0 +1,104 @@ +From 707c4c55f82cefdfa977694c0bfd5d1f37eec905 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 16:51:59 +0100 +Subject: Bluetooth: SCO: Add support for 16 bits transparent voice setting +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Frédéric Danis + +[ Upstream commit 29a651451e6c264f58cd9d9a26088e579d17b242 ] + +The voice setting is used by sco_connect() or sco_conn_defer_accept() +after being set by sco_sock_setsockopt(). + +The PCM part of the voice setting is used for offload mode through PCM +chipset port. +This commits add support for mSBC 16 bits offloading, i.e. audio data +not transported over HCI. + +The BCM4349B1 supports 16 bits transparent data on its I2S port. +If BT_VOICE_TRANSPARENT is used when accepting a SCO connection, this +gives only garbage audio while using BT_VOICE_TRANSPARENT_16BIT gives +correct audio. +This has been tested with connection to iPhone 14 and Samsung S24. + +Fixes: ad10b1a48754 ("Bluetooth: Add Bluetooth socket voice option") +Signed-off-by: Frédéric Danis +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + include/net/bluetooth/bluetooth.h | 1 + + net/bluetooth/sco.c | 29 +++++++++++++++-------------- + 2 files changed, 16 insertions(+), 14 deletions(-) + +diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h +index 4763a47bf8c8..c25f9f4cac80 100644 +--- a/include/net/bluetooth/bluetooth.h ++++ b/include/net/bluetooth/bluetooth.h +@@ -123,6 +123,7 @@ struct bt_voice { + + #define BT_VOICE_TRANSPARENT 0x0003 + #define BT_VOICE_CVSD_16BIT 0x0060 ++#define BT_VOICE_TRANSPARENT_16BIT 0x0063 + + #define BT_SNDMTU 12 + #define BT_RCVMTU 13 +diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c +index fb368540139a..64d4d57c7033 100644 +--- a/net/bluetooth/sco.c ++++ b/net/bluetooth/sco.c +@@ -267,10 +267,13 @@ static int sco_connect(struct sock *sk) + else + type = SCO_LINK; + +- if (sco_pi(sk)->setting == BT_VOICE_TRANSPARENT && +- (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) { +- err = -EOPNOTSUPP; +- goto unlock; ++ switch (sco_pi(sk)->setting & SCO_AIRMODE_MASK) { ++ case SCO_AIRMODE_TRANSP: ++ if (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev)) { ++ err = -EOPNOTSUPP; ++ goto unlock; ++ } ++ break; + } + + hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst, +@@ -876,13 +879,6 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, + if (err) + break; + +- /* Explicitly check for these values */ +- if (voice.setting != BT_VOICE_TRANSPARENT && +- voice.setting != BT_VOICE_CVSD_16BIT) { +- err = -EINVAL; +- break; +- } +- + sco_pi(sk)->setting = voice.setting; + hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, + BDADDR_BREDR); +@@ -890,9 +886,14 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, + err = -EBADFD; + break; + } +- if (enhanced_sync_conn_capable(hdev) && +- voice.setting == BT_VOICE_TRANSPARENT) +- sco_pi(sk)->codec.id = BT_CODEC_TRANSPARENT; ++ ++ switch (sco_pi(sk)->setting & SCO_AIRMODE_MASK) { ++ case SCO_AIRMODE_TRANSP: ++ if (enhanced_sync_conn_capable(hdev)) ++ sco_pi(sk)->codec.id = BT_CODEC_TRANSPARENT; ++ break; ++ } ++ + hci_dev_put(hdev); + break; + +-- +2.39.5 + diff --git a/queue-6.6/bonding-fix-feature-propagation-of-netif_f_gso_encap.patch b/queue-6.6/bonding-fix-feature-propagation-of-netif_f_gso_encap.patch new file mode 100644 index 00000000000..7eb30a4e21b --- /dev/null +++ b/queue-6.6/bonding-fix-feature-propagation-of-netif_f_gso_encap.patch @@ -0,0 +1,101 @@ +From a90a5a97b5235a319219e9445e25014dfb2a0ace Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 Dec 2024 15:12:43 +0100 +Subject: bonding: Fix feature propagation of NETIF_F_GSO_ENCAP_ALL + +From: Daniel Borkmann + +[ Upstream commit 77b11c8bf3a228d1c63464534c2dcc8d9c8bf7ff ] + +Drivers like mlx5 expose NIC's vlan_features such as +NETIF_F_GSO_UDP_TUNNEL & NETIF_F_GSO_UDP_TUNNEL_CSUM which are +later not propagated when the underlying devices are bonded and +a vlan device created on top of the bond. + +Right now, the more cumbersome workaround for this is to create +the vlan on top of the mlx5 and then enslave the vlan devices +to a bond. + +To fix this, add NETIF_F_GSO_ENCAP_ALL to BOND_VLAN_FEATURES +such that bond_compute_features() can probe and propagate the +vlan_features from the slave devices up to the vlan device. + +Given the following bond: + + # ethtool -i enp2s0f{0,1}np{0,1} + driver: mlx5_core + [...] + + # ethtool -k enp2s0f0np0 | grep udp + tx-udp_tnl-segmentation: on + tx-udp_tnl-csum-segmentation: on + tx-udp-segmentation: on + rx-udp_tunnel-port-offload: on + rx-udp-gro-forwarding: off + + # ethtool -k enp2s0f1np1 | grep udp + tx-udp_tnl-segmentation: on + tx-udp_tnl-csum-segmentation: on + tx-udp-segmentation: on + rx-udp_tunnel-port-offload: on + rx-udp-gro-forwarding: off + + # ethtool -k bond0 | grep udp + tx-udp_tnl-segmentation: on + tx-udp_tnl-csum-segmentation: on + tx-udp-segmentation: on + rx-udp_tunnel-port-offload: off [fixed] + rx-udp-gro-forwarding: off + +Before: + + # ethtool -k bond0.100 | grep udp + tx-udp_tnl-segmentation: off [requested on] + tx-udp_tnl-csum-segmentation: off [requested on] + tx-udp-segmentation: on + rx-udp_tunnel-port-offload: off [fixed] + rx-udp-gro-forwarding: off + +After: + + # ethtool -k bond0.100 | grep udp + tx-udp_tnl-segmentation: on + tx-udp_tnl-csum-segmentation: on + tx-udp-segmentation: on + rx-udp_tunnel-port-offload: off [fixed] + rx-udp-gro-forwarding: off + +Various users have run into this reporting performance issues when +configuring Cilium in vxlan tunneling mode and having the combination +of bond & vlan for the core devices connecting the Kubernetes cluster +to the outside world. + +Fixes: a9b3ace44c7d ("bonding: fix vlan_features computing") +Signed-off-by: Daniel Borkmann +Cc: Nikolay Aleksandrov +Cc: Ido Schimmel +Cc: Jiri Pirko +Reviewed-by: Nikolay Aleksandrov +Reviewed-by: Hangbin Liu +Link: https://patch.msgid.link/20241210141245.327886-3-daniel@iogearbox.net +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/bonding/bond_main.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c +index bee93a437f99..7eb62fe55947 100644 +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -1462,6 +1462,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev, + + #define BOND_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ + NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | \ ++ NETIF_F_GSO_ENCAP_ALL | \ + NETIF_F_HIGHDMA | NETIF_F_LRO) + + #define BOND_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ +-- +2.39.5 + diff --git a/queue-6.6/cxgb4-use-port-number-to-set-mac-addr.patch b/queue-6.6/cxgb4-use-port-number-to-set-mac-addr.patch new file mode 100644 index 00000000000..1b142b7094b --- /dev/null +++ b/queue-6.6/cxgb4-use-port-number-to-set-mac-addr.patch @@ -0,0 +1,83 @@ +From bb1095039492b6cd9867569d2d1d2d0ec47eb715 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 Dec 2024 11:50:14 +0530 +Subject: cxgb4: use port number to set mac addr + +From: Anumula Murali Mohan Reddy + +[ Upstream commit 356983f569c1f5991661fc0050aa263792f50616 ] + +t4_set_vf_mac_acl() uses pf to set mac addr, but t4vf_get_vf_mac_acl() +uses port number to get mac addr, this leads to error when an attempt +to set MAC address on VF's of PF2 and PF3. +This patch fixes the issue by using port number to set mac address. + +Fixes: e0cdac65ba26 ("cxgb4vf: configure ports accessible by the VF") +Signed-off-by: Anumula Murali Mohan Reddy +Signed-off-by: Potnuri Bharat Teja +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20241206062014.49414-1-anumula@chelsio.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 2 +- + drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- + drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 5 +++-- + 3 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +index fca9533bc011..2ed72c3fab42 100644 +--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h ++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +@@ -2082,7 +2082,7 @@ void t4_idma_monitor(struct adapter *adapter, + struct sge_idma_monitor_state *idma, + int hz, int ticks); + int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, +- unsigned int naddr, u8 *addr); ++ u8 start, unsigned int naddr, u8 *addr); + void t4_tp_pio_read(struct adapter *adap, u32 *buff, u32 nregs, + u32 start_index, bool sleep_ok); + void t4_tp_tm_pio_read(struct adapter *adap, u32 *buff, u32 nregs, +diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +index 2eb33a727bba..b215ff14da1b 100644 +--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c ++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +@@ -3246,7 +3246,7 @@ static int cxgb4_mgmt_set_vf_mac(struct net_device *dev, int vf, u8 *mac) + + dev_info(pi->adapter->pdev_dev, + "Setting MAC %pM on VF %d\n", mac, vf); +- ret = t4_set_vf_mac_acl(adap, vf + 1, 1, mac); ++ ret = t4_set_vf_mac_acl(adap, vf + 1, pi->lport, 1, mac); + if (!ret) + ether_addr_copy(adap->vfinfo[vf].vf_mac_addr, mac); + return ret; +diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +index 76de55306c4d..175bf9b13058 100644 +--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c ++++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +@@ -10215,11 +10215,12 @@ int t4_load_cfg(struct adapter *adap, const u8 *cfg_data, unsigned int size) + * t4_set_vf_mac_acl - Set MAC address for the specified VF + * @adapter: The adapter + * @vf: one of the VFs instantiated by the specified PF ++ * @start: The start port id associated with specified VF + * @naddr: the number of MAC addresses + * @addr: the MAC address(es) to be set to the specified VF + */ + int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, +- unsigned int naddr, u8 *addr) ++ u8 start, unsigned int naddr, u8 *addr) + { + struct fw_acl_mac_cmd cmd; + +@@ -10234,7 +10235,7 @@ int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, + cmd.en_to_len16 = cpu_to_be32((unsigned int)FW_LEN16(cmd)); + cmd.nmac = naddr; + +- switch (adapter->pf) { ++ switch (start) { + case 3: + memcpy(cmd.macaddr3, addr, sizeof(cmd.macaddr3)); + break; +-- +2.39.5 + diff --git a/queue-6.6/documentation-pm-clarify-pm_runtime_resume_and_get-r.patch b/queue-6.6/documentation-pm-clarify-pm_runtime_resume_and_get-r.patch new file mode 100644 index 00000000000..ca77c9951c5 --- /dev/null +++ b/queue-6.6/documentation-pm-clarify-pm_runtime_resume_and_get-r.patch @@ -0,0 +1,42 @@ +From a2eca798175db9baeb7630ea0362fe9940967601 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Dec 2024 14:37:29 +0000 +Subject: Documentation: PM: Clarify pm_runtime_resume_and_get() return value + +From: Paul Barker + +[ Upstream commit ccb84dc8f4a02e7d30ffd388522996546b4d00e1 ] + +Update the documentation to match the behaviour of the code. + +pm_runtime_resume_and_get() always returns 0 on success, even if +__pm_runtime_resume() returns 1. + +Fixes: 2c412337cfe6 ("PM: runtime: Add documentation for pm_runtime_resume_and_get()") +Signed-off-by: Paul Barker +Link: https://patch.msgid.link/20241203143729.478-1-paul.barker.ct@bp.renesas.com +[ rjw: Subject and changelog edits, adjusted new comment formatting ] +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Sasha Levin +--- + Documentation/power/runtime_pm.rst | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst +index 65b86e487afe..b6d5a3a8febc 100644 +--- a/Documentation/power/runtime_pm.rst ++++ b/Documentation/power/runtime_pm.rst +@@ -347,7 +347,9 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: + + `int pm_runtime_resume_and_get(struct device *dev);` + - run pm_runtime_resume(dev) and if successful, increment the device's +- usage counter; return the result of pm_runtime_resume ++ usage counter; returns 0 on success (whether or not the device's ++ runtime PM status was already 'active') or the error code from ++ pm_runtime_resume() on failure. + + `int pm_request_idle(struct device *dev);` + - submit a request to execute the subsystem-level idle callback for the +-- +2.39.5 + diff --git a/queue-6.6/kselftest-arm64-abi-fix-svcr-detection.patch b/queue-6.6/kselftest-arm64-abi-fix-svcr-detection.patch new file mode 100644 index 00000000000..85283b58371 --- /dev/null +++ b/queue-6.6/kselftest-arm64-abi-fix-svcr-detection.patch @@ -0,0 +1,130 @@ +From b81c78c173d61677e877be8f80a76ae5b0cda37a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Dec 2024 19:16:39 +0800 +Subject: kselftest/arm64: abi: fix SVCR detection + +From: Weizhao Ouyang + +[ Upstream commit ce03573a1917532da06057da9f8e74a2ee9e2ac9 ] + +When using svcr_in to check ZA and Streaming Mode, we should make sure +that the value in x2 is correct, otherwise it may trigger an Illegal +instruction if FEAT_SVE and !FEAT_SME. + +Fixes: 43e3f85523e4 ("kselftest/arm64: Add SME support to syscall ABI test") +Signed-off-by: Weizhao Ouyang +Reviewed-by: Mark Brown +Link: https://lore.kernel.org/r/20241211111639.12344-1-o451686892@gmail.com +Signed-off-by: Catalin Marinas +Signed-off-by: Sasha Levin +--- + .../selftests/arm64/abi/syscall-abi-asm.S | 32 +++++++++---------- + 1 file changed, 15 insertions(+), 17 deletions(-) + +diff --git a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S +index df3230fdac39..66ab2e0bae5f 100644 +--- a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S ++++ b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S +@@ -81,32 +81,31 @@ do_syscall: + stp x27, x28, [sp, #96] + + // Set SVCR if we're doing SME +- cbz x1, 1f ++ cbz x1, load_gpr + adrp x2, svcr_in + ldr x2, [x2, :lo12:svcr_in] + msr S3_3_C4_C2_2, x2 +-1: + + // Load ZA and ZT0 if enabled - uses x12 as scratch due to SME LDR +- tbz x2, #SVCR_ZA_SHIFT, 1f ++ tbz x2, #SVCR_ZA_SHIFT, load_gpr + mov w12, #0 + ldr x2, =za_in +-2: _ldr_za 12, 2 ++1: _ldr_za 12, 2 + add x2, x2, x1 + add x12, x12, #1 + cmp x1, x12 +- bne 2b ++ bne 1b + + // ZT0 + mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1 + ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \ + #ID_AA64SMFR0_EL1_SMEver_WIDTH +- cbz x2, 1f ++ cbz x2, load_gpr + adrp x2, zt_in + add x2, x2, :lo12:zt_in + _ldr_zt 2 +-1: + ++load_gpr: + // Load GPRs x8-x28, and save our SP/FP for later comparison + ldr x2, =gpr_in + add x2, x2, #64 +@@ -125,9 +124,9 @@ do_syscall: + str x30, [x2], #8 // LR + + // Load FPRs if we're not doing neither SVE nor streaming SVE +- cbnz x0, 1f ++ cbnz x0, check_sve_in + ldr x2, =svcr_in +- tbnz x2, #SVCR_SM_SHIFT, 1f ++ tbnz x2, #SVCR_SM_SHIFT, check_sve_in + + ldr x2, =fpr_in + ldp q0, q1, [x2] +@@ -148,8 +147,8 @@ do_syscall: + ldp q30, q31, [x2, #16 * 30] + + b 2f +-1: + ++check_sve_in: + // Load the SVE registers if we're doing SVE/SME + + ldr x2, =z_in +@@ -256,32 +255,31 @@ do_syscall: + stp q30, q31, [x2, #16 * 30] + + // Save SVCR if we're doing SME +- cbz x1, 1f ++ cbz x1, check_sve_out + mrs x2, S3_3_C4_C2_2 + adrp x3, svcr_out + str x2, [x3, :lo12:svcr_out] +-1: + + // Save ZA if it's enabled - uses x12 as scratch due to SME STR +- tbz x2, #SVCR_ZA_SHIFT, 1f ++ tbz x2, #SVCR_ZA_SHIFT, check_sve_out + mov w12, #0 + ldr x2, =za_out +-2: _str_za 12, 2 ++1: _str_za 12, 2 + add x2, x2, x1 + add x12, x12, #1 + cmp x1, x12 +- bne 2b ++ bne 1b + + // ZT0 + mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1 + ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \ + #ID_AA64SMFR0_EL1_SMEver_WIDTH +- cbz x2, 1f ++ cbz x2, check_sve_out + adrp x2, zt_out + add x2, x2, :lo12:zt_out + _str_zt 2 +-1: + ++check_sve_out: + // Save the SVE state if we have some + cbz x0, 1f + +-- +2.39.5 + diff --git a/queue-6.6/libperf-evlist-fix-cpu-argument-on-hybrid-platform.patch b/queue-6.6/libperf-evlist-fix-cpu-argument-on-hybrid-platform.patch new file mode 100644 index 00000000000..21071886bf6 --- /dev/null +++ b/queue-6.6/libperf-evlist-fix-cpu-argument-on-hybrid-platform.patch @@ -0,0 +1,93 @@ +From 001ba9206e4fa7b3d737ddded8cf58be98620f97 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 14 Nov 2024 16:04:48 +0000 +Subject: libperf: evlist: Fix --cpu argument on hybrid platform + +From: James Clark + +[ Upstream commit f7e36d02d771ee14acae1482091718460cffb321 ] + +Since the linked fixes: commit, specifying a CPU on hybrid platforms +results in an error because Perf tries to open an extended type event +on "any" CPU which isn't valid. Extended type events can only be opened +on CPUs that match the type. + +Before (working): + + $ perf record --cpu 1 -- true + [ perf record: Woken up 1 times to write data ] + [ perf record: Captured and wrote 2.385 MB perf.data (7 samples) ] + +After (not working): + + $ perf record -C 1 -- true + WARNING: A requested CPU in '1' is not supported by PMU 'cpu_atom' (CPUs 16-27) for event 'cycles:P' + Error: + The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (cpu_atom/cycles:P/). + /bin/dmesg | grep -i perf may provide additional information. + +(Ignore the warning message, that's expected and not particularly +relevant to this issue). + +This is because perf_cpu_map__intersect() of the user specified CPU (1) +and one of the PMU's CPUs (16-27) correctly results in an empty (NULL) +CPU map. However for the purposes of opening an event, libperf converts +empty CPU maps into an any CPU (-1) which the kernel rejects. + +Fix it by deleting evsels with empty CPU maps in the specific case where +user requested CPU maps are evaluated. + +Fixes: 251aa040244a ("perf parse-events: Wildcard most "numeric" events") +Reviewed-by: Ian Rogers +Tested-by: Thomas Falcon +Signed-off-by: James Clark +Tested-by: Arnaldo Carvalho de Melo +Link: https://lore.kernel.org/r/20241114160450.295844-2-james.clark@linaro.org +Signed-off-by: Namhyung Kim +Signed-off-by: Sasha Levin +--- + tools/lib/perf/evlist.c | 18 ++++++++++++++++-- + 1 file changed, 16 insertions(+), 2 deletions(-) + +diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c +index fad607789d1e..00ada8acee61 100644 +--- a/tools/lib/perf/evlist.c ++++ b/tools/lib/perf/evlist.c +@@ -47,6 +47,20 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, + */ + perf_cpu_map__put(evsel->cpus); + evsel->cpus = perf_cpu_map__intersect(evlist->user_requested_cpus, evsel->own_cpus); ++ ++ /* ++ * Empty cpu lists would eventually get opened as "any" so remove ++ * genuinely empty ones before they're opened in the wrong place. ++ */ ++ if (perf_cpu_map__is_empty(evsel->cpus)) { ++ struct perf_evsel *next = perf_evlist__next(evlist, evsel); ++ ++ perf_evlist__remove(evlist, evsel); ++ /* Keep idx contiguous */ ++ if (next) ++ list_for_each_entry_from(next, &evlist->entries, node) ++ next->idx--; ++ } + } else if (!evsel->own_cpus || evlist->has_user_cpus || + (!evsel->requires_cpu && perf_cpu_map__has_any_cpu(evlist->user_requested_cpus))) { + /* +@@ -80,11 +94,11 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, + + static void perf_evlist__propagate_maps(struct perf_evlist *evlist) + { +- struct perf_evsel *evsel; ++ struct perf_evsel *evsel, *n; + + evlist->needs_map_propagation = true; + +- perf_evlist__for_each_evsel(evlist, evsel) ++ list_for_each_entry_safe(evsel, n, &evlist->entries, node) + __perf_evlist__propagate_maps(evlist, evsel); + } + +-- +2.39.5 + diff --git a/queue-6.6/net-defer-final-struct-net-free-in-netns-dismantle.patch b/queue-6.6/net-defer-final-struct-net-free-in-netns-dismantle.patch new file mode 100644 index 00000000000..48f337343b6 --- /dev/null +++ b/queue-6.6/net-defer-final-struct-net-free-in-netns-dismantle.patch @@ -0,0 +1,223 @@ +From e4456ef5d4d03c4d10ebde8f7226e51ee47d5f7e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Dec 2024 12:54:55 +0000 +Subject: net: defer final 'struct net' free in netns dismantle + +From: Eric Dumazet + +[ Upstream commit 0f6ede9fbc747e2553612271bce108f7517e7a45 ] + +Ilya reported a slab-use-after-free in dst_destroy [1] + +Issue is in xfrm6_net_init() and xfrm4_net_init() : + +They copy xfrm[46]_dst_ops_template into net->xfrm.xfrm[46]_dst_ops. + +But net structure might be freed before all the dst callbacks are +called. So when dst_destroy() calls later : + +if (dst->ops->destroy) + dst->ops->destroy(dst); + +dst->ops points to the old net->xfrm.xfrm[46]_dst_ops, which has been freed. + +See a relevant issue fixed in : + +ac888d58869b ("net: do not delay dst_entries_add() in dst_release()") + +A fix is to queue the 'struct net' to be freed after one +another cleanup_net() round (and existing rcu_barrier()) + +[1] + +BUG: KASAN: slab-use-after-free in dst_destroy (net/core/dst.c:112) +Read of size 8 at addr ffff8882137ccab0 by task swapper/37/0 +Dec 03 05:46:18 kernel: +CPU: 37 UID: 0 PID: 0 Comm: swapper/37 Kdump: loaded Not tainted 6.12.0 #67 +Hardware name: Red Hat KVM/RHEL, BIOS 1.16.1-1.el9 04/01/2014 +Call Trace: + +dump_stack_lvl (lib/dump_stack.c:124) +print_address_description.constprop.0 (mm/kasan/report.c:378) +? dst_destroy (net/core/dst.c:112) +print_report (mm/kasan/report.c:489) +? dst_destroy (net/core/dst.c:112) +? kasan_addr_to_slab (mm/kasan/common.c:37) +kasan_report (mm/kasan/report.c:603) +? dst_destroy (net/core/dst.c:112) +? rcu_do_batch (kernel/rcu/tree.c:2567) +dst_destroy (net/core/dst.c:112) +rcu_do_batch (kernel/rcu/tree.c:2567) +? __pfx_rcu_do_batch (kernel/rcu/tree.c:2491) +? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4339 kernel/locking/lockdep.c:4406) +rcu_core (kernel/rcu/tree.c:2825) +handle_softirqs (kernel/softirq.c:554) +__irq_exit_rcu (kernel/softirq.c:589 kernel/softirq.c:428 kernel/softirq.c:637) +irq_exit_rcu (kernel/softirq.c:651) +sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1049 arch/x86/kernel/apic/apic.c:1049) + + +asm_sysvec_apic_timer_interrupt (./arch/x86/include/asm/idtentry.h:702) +RIP: 0010:default_idle (./arch/x86/include/asm/irqflags.h:37 ./arch/x86/include/asm/irqflags.h:92 arch/x86/kernel/process.c:743) +Code: 00 4d 29 c8 4c 01 c7 4c 29 c2 e9 6e ff ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 90 0f 00 2d c7 c9 27 00 fb f4 c3 cc cc cc cc 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 90 +RSP: 0018:ffff888100d2fe00 EFLAGS: 00000246 +RAX: 00000000001870ed RBX: 1ffff110201a5fc2 RCX: ffffffffb61a3e46 +RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffb3d4d123 +RBP: 0000000000000000 R08: 0000000000000001 R09: ffffed11c7e1835d +R10: ffff888e3f0c1aeb R11: 0000000000000000 R12: 0000000000000000 +R13: ffff888100d20000 R14: dffffc0000000000 R15: 0000000000000000 +? ct_kernel_exit.constprop.0 (kernel/context_tracking.c:148) +? cpuidle_idle_call (kernel/sched/idle.c:186) +default_idle_call (./include/linux/cpuidle.h:143 kernel/sched/idle.c:118) +cpuidle_idle_call (kernel/sched/idle.c:186) +? __pfx_cpuidle_idle_call (kernel/sched/idle.c:168) +? lock_release (kernel/locking/lockdep.c:467 kernel/locking/lockdep.c:5848) +? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4347 kernel/locking/lockdep.c:4406) +? tsc_verify_tsc_adjust (arch/x86/kernel/tsc_sync.c:59) +do_idle (kernel/sched/idle.c:326) +cpu_startup_entry (kernel/sched/idle.c:423 (discriminator 1)) +start_secondary (arch/x86/kernel/smpboot.c:202 arch/x86/kernel/smpboot.c:282) +? __pfx_start_secondary (arch/x86/kernel/smpboot.c:232) +? soft_restart_cpu (arch/x86/kernel/head_64.S:452) +common_startup_64 (arch/x86/kernel/head_64.S:414) + +Dec 03 05:46:18 kernel: +Allocated by task 12184: +kasan_save_stack (mm/kasan/common.c:48) +kasan_save_track (./arch/x86/include/asm/current.h:49 mm/kasan/common.c:60 mm/kasan/common.c:69) +__kasan_slab_alloc (mm/kasan/common.c:319 mm/kasan/common.c:345) +kmem_cache_alloc_noprof (mm/slub.c:4085 mm/slub.c:4134 mm/slub.c:4141) +copy_net_ns (net/core/net_namespace.c:421 net/core/net_namespace.c:480) +create_new_namespaces (kernel/nsproxy.c:110) +unshare_nsproxy_namespaces (kernel/nsproxy.c:228 (discriminator 4)) +ksys_unshare (kernel/fork.c:3313) +__x64_sys_unshare (kernel/fork.c:3382) +do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) +entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) +Dec 03 05:46:18 kernel: +Freed by task 11: +kasan_save_stack (mm/kasan/common.c:48) +kasan_save_track (./arch/x86/include/asm/current.h:49 mm/kasan/common.c:60 mm/kasan/common.c:69) +kasan_save_free_info (mm/kasan/generic.c:582) +__kasan_slab_free (mm/kasan/common.c:271) +kmem_cache_free (mm/slub.c:4579 mm/slub.c:4681) +cleanup_net (net/core/net_namespace.c:456 net/core/net_namespace.c:446 net/core/net_namespace.c:647) +process_one_work (kernel/workqueue.c:3229) +worker_thread (kernel/workqueue.c:3304 kernel/workqueue.c:3391) +kthread (kernel/kthread.c:389) +ret_from_fork (arch/x86/kernel/process.c:147) +ret_from_fork_asm (arch/x86/entry/entry_64.S:257) +Dec 03 05:46:18 kernel: +Last potentially related work creation: +kasan_save_stack (mm/kasan/common.c:48) +__kasan_record_aux_stack (mm/kasan/generic.c:541) +insert_work (./include/linux/instrumented.h:68 ./include/asm-generic/bitops/instrumented-non-atomic.h:141 kernel/workqueue.c:788 kernel/workqueue.c:795 kernel/workqueue.c:2186) +__queue_work (kernel/workqueue.c:2340) +queue_work_on (kernel/workqueue.c:2391) +xfrm_policy_insert (net/xfrm/xfrm_policy.c:1610) +xfrm_add_policy (net/xfrm/xfrm_user.c:2116) +xfrm_user_rcv_msg (net/xfrm/xfrm_user.c:3321) +netlink_rcv_skb (net/netlink/af_netlink.c:2536) +xfrm_netlink_rcv (net/xfrm/xfrm_user.c:3344) +netlink_unicast (net/netlink/af_netlink.c:1316 net/netlink/af_netlink.c:1342) +netlink_sendmsg (net/netlink/af_netlink.c:1886) +sock_write_iter (net/socket.c:729 net/socket.c:744 net/socket.c:1165) +vfs_write (fs/read_write.c:590 fs/read_write.c:683) +ksys_write (fs/read_write.c:736) +do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) +entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) +Dec 03 05:46:18 kernel: +Second to last potentially related work creation: +kasan_save_stack (mm/kasan/common.c:48) +__kasan_record_aux_stack (mm/kasan/generic.c:541) +insert_work (./include/linux/instrumented.h:68 ./include/asm-generic/bitops/instrumented-non-atomic.h:141 kernel/workqueue.c:788 kernel/workqueue.c:795 kernel/workqueue.c:2186) +__queue_work (kernel/workqueue.c:2340) +queue_work_on (kernel/workqueue.c:2391) +__xfrm_state_insert (./include/linux/workqueue.h:723 net/xfrm/xfrm_state.c:1150 net/xfrm/xfrm_state.c:1145 net/xfrm/xfrm_state.c:1513) +xfrm_state_update (./include/linux/spinlock.h:396 net/xfrm/xfrm_state.c:1940) +xfrm_add_sa (net/xfrm/xfrm_user.c:912) +xfrm_user_rcv_msg (net/xfrm/xfrm_user.c:3321) +netlink_rcv_skb (net/netlink/af_netlink.c:2536) +xfrm_netlink_rcv (net/xfrm/xfrm_user.c:3344) +netlink_unicast (net/netlink/af_netlink.c:1316 net/netlink/af_netlink.c:1342) +netlink_sendmsg (net/netlink/af_netlink.c:1886) +sock_write_iter (net/socket.c:729 net/socket.c:744 net/socket.c:1165) +vfs_write (fs/read_write.c:590 fs/read_write.c:683) +ksys_write (fs/read_write.c:736) +do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) +entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) + +Fixes: a8a572a6b5f2 ("xfrm: dst_entries_init() per-net dst_ops") +Reported-by: Ilya Maximets +Closes: https://lore.kernel.org/netdev/CANn89iKKYDVpB=MtmfH7nyv2p=rJWSLedO5k7wSZgtY_tO8WQg@mail.gmail.com/T/#m02c98c3009fe66382b73cfb4db9cf1df6fab3fbf +Signed-off-by: Eric Dumazet +Acked-by: Paolo Abeni +Reviewed-by: Kuniyuki Iwashima +Link: https://patch.msgid.link/20241204125455.3871859-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/net/net_namespace.h | 1 + + net/core/net_namespace.c | 20 +++++++++++++++++++- + 2 files changed, 20 insertions(+), 1 deletion(-) + +diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h +index eb6cd43b1746..958c805df191 100644 +--- a/include/net/net_namespace.h ++++ b/include/net/net_namespace.h +@@ -82,6 +82,7 @@ struct net { + * or to unregister pernet ops + * (pernet_ops_rwsem write locked). + */ ++ struct llist_node defer_free_list; + struct llist_node cleanup_list; /* namespaces on death row */ + + #ifdef CONFIG_KEYS +diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c +index 018e213185a1..92b7fea4d495 100644 +--- a/net/core/net_namespace.c ++++ b/net/core/net_namespace.c +@@ -442,6 +442,21 @@ static struct net *net_alloc(void) + goto out; + } + ++static LLIST_HEAD(defer_free_list); ++ ++static void net_complete_free(void) ++{ ++ struct llist_node *kill_list; ++ struct net *net, *next; ++ ++ /* Get the list of namespaces to free from last round. */ ++ kill_list = llist_del_all(&defer_free_list); ++ ++ llist_for_each_entry_safe(net, next, kill_list, defer_free_list) ++ kmem_cache_free(net_cachep, net); ++ ++} ++ + static void net_free(struct net *net) + { + if (refcount_dec_and_test(&net->passive)) { +@@ -450,7 +465,8 @@ static void net_free(struct net *net) + /* There should not be any trackers left there. */ + ref_tracker_dir_exit(&net->notrefcnt_tracker); + +- kmem_cache_free(net_cachep, net); ++ /* Wait for an extra rcu_barrier() before final free. */ ++ llist_add(&net->defer_free_list, &defer_free_list); + } + } + +@@ -627,6 +643,8 @@ static void cleanup_net(struct work_struct *work) + */ + rcu_barrier(); + ++ net_complete_free(); ++ + /* Finally it is safe to free my network namespace structure */ + list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { + list_del_init(&net->exit_list); +-- +2.39.5 + diff --git a/queue-6.6/net-dsa-felix-fix-stuck-cpu-injected-packets-with-sh.patch b/queue-6.6/net-dsa-felix-fix-stuck-cpu-injected-packets-with-sh.patch new file mode 100644 index 00000000000..2ae30474b95 --- /dev/null +++ b/queue-6.6/net-dsa-felix-fix-stuck-cpu-injected-packets-with-sh.patch @@ -0,0 +1,171 @@ +From af98fd987c5a5ef3f313d0b799793c27f71cc2df Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 Dec 2024 15:26:40 +0200 +Subject: net: dsa: felix: fix stuck CPU-injected packets with short taprio + windows + +From: Vladimir Oltean + +[ Upstream commit acfcdb78d5d4cdb78e975210c8825b9a112463f6 ] + +With this port schedule: + +tc qdisc replace dev $send_if parent root handle 100 taprio \ + num_tc 8 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + map 0 1 2 3 4 5 6 7 \ + base-time 0 cycle-time 10000 \ + sched-entry S 01 1250 \ + sched-entry S 02 1250 \ + sched-entry S 04 1250 \ + sched-entry S 08 1250 \ + sched-entry S 10 1250 \ + sched-entry S 20 1250 \ + sched-entry S 40 1250 \ + sched-entry S 80 1250 \ + flags 2 + +ptp4l would fail to take TX timestamps of Pdelay_Resp messages like this: + +increasing tx_timestamp_timeout may correct this issue, but it is likely caused by a driver bug +ptp4l[4134.168]: port 2: send peer delay response failed + +It turns out that the driver can't take their TX timestamps because it +can't transmit them in the first place. And there's nothing special +about the Pdelay_Resp packets - they're just regular 68 byte packets. +But with this taprio configuration, the switch would refuse to send even +the ETH_ZLEN minimum packet size. + +This should have definitely not been the case. When applying the taprio +config, the driver prints: + +mscc_felix 0000:00:00.5: port 0 tc 0 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 132 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 1 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 132 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 2 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 132 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 3 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 132 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 4 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 132 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 5 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 132 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 6 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 132 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 7 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 132 octets including FCS + +and thus, everything under 132 bytes - ETH_FCS_LEN should have been sent +without problems. Yet it's not. + +For the forwarding path, the configuration is fine, yet packets injected +from Linux get stuck with this schedule no matter what. + +The first hint that the static guard bands are the cause of the problem +is that reverting Michael Walle's commit 297c4de6f780 ("net: dsa: felix: +re-enable TAS guard band mode") made things work. It must be that the +guard bands are calculated incorrectly. + +I remembered that there is a magic constant in the driver, set to 33 ns +for no logical reason other than experimentation, which says "never let +the static guard bands get so large as to leave less than this amount of +remaining space in the time slot, because the queue system will refuse +to schedule packets otherwise, and they will get stuck". I had a hunch +that my previous experimentally-determined value was only good for +packets coming from the forwarding path, and that the CPU injection path +needed more. + +I came to the new value of 35 ns through binary search, after seeing +that with 544 ns (the bit time required to send the Pdelay_Resp packet +at gigabit) it works. Again, this is purely experimental, there's no +logic and the manual doesn't say anything. + +The new driver prints for this schedule look like this: + +mscc_felix 0000:00:00.5: port 0 tc 0 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 131 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 1 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 131 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 2 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 131 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 3 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 131 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 4 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 131 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 5 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 131 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 6 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 131 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 7 min gate length 1250 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 131 octets including FCS + +So yes, the maximum MTU is now even smaller by 1 byte than before. +This is maybe counter-intuitive, but makes more sense with a diagram of +one time slot. + +Before: + + Gate open Gate close + | | + v 1250 ns total time slot duration v + <----------------------------------------------------> + <----><----------------------------------------------> + 33 ns 1217 ns static guard band + useful + + Gate open Gate close + | | + v 1250 ns total time slot duration v + <----------------------------------------------------> + <-----><---------------------------------------------> + 35 ns 1215 ns static guard band + useful + +The static guard band implemented by this switch hardware directly +determines the maximum allowable MTU for that traffic class. The larger +it is, the earlier the switch will stop scheduling frames for +transmission, because otherwise they might overrun the gate close time +(and avoiding that is the entire purpose of Michael's patch). +So, we now have guard bands smaller by 2 ns, thus, in this particular +case, we lose a byte of the maximum MTU. + +Fixes: 11afdc6526de ("net: dsa: felix: tc-taprio intervals smaller than MTU should send at least one packet") +Signed-off-by: Vladimir Oltean +Reviewed-by: Michael Walle +Link: https://patch.msgid.link/20241210132640.3426788-1-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/ocelot/felix_vsc9959.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c +index afb5dae4439c..8d27933c3733 100644 +--- a/drivers/net/dsa/ocelot/felix_vsc9959.c ++++ b/drivers/net/dsa/ocelot/felix_vsc9959.c +@@ -24,7 +24,7 @@ + #define VSC9959_NUM_PORTS 6 + + #define VSC9959_TAS_GCL_ENTRY_MAX 63 +-#define VSC9959_TAS_MIN_GATE_LEN_NS 33 ++#define VSC9959_TAS_MIN_GATE_LEN_NS 35 + #define VSC9959_VCAP_POLICER_BASE 63 + #define VSC9959_VCAP_POLICER_MAX 383 + #define VSC9959_SWITCH_PCI_BAR 4 +@@ -1056,11 +1056,15 @@ static void vsc9959_mdio_bus_free(struct ocelot *ocelot) + mdiobus_free(felix->imdio); + } + +-/* The switch considers any frame (regardless of size) as eligible for +- * transmission if the traffic class gate is open for at least 33 ns. ++/* The switch considers any frame (regardless of size) as eligible ++ * for transmission if the traffic class gate is open for at least ++ * VSC9959_TAS_MIN_GATE_LEN_NS. ++ * + * Overruns are prevented by cropping an interval at the end of the gate time +- * slot for which egress scheduling is blocked, but we need to still keep 33 ns +- * available for one packet to be transmitted, otherwise the port tc will hang. ++ * slot for which egress scheduling is blocked, but we need to still keep ++ * VSC9959_TAS_MIN_GATE_LEN_NS available for one packet to be transmitted, ++ * otherwise the port tc will hang. ++ * + * This function returns the size of a gate interval that remains available for + * setting the guard band, after reserving the space for one egress frame. + */ +@@ -1303,7 +1307,8 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port) + * per-tc static guard band lengths, so it reduces the + * useful gate interval length. Therefore, be careful + * to calculate a guard band (and therefore max_sdu) +- * that still leaves 33 ns available in the time slot. ++ * that still leaves VSC9959_TAS_MIN_GATE_LEN_NS ++ * available in the time slot. + */ + max_sdu = div_u64(remaining_gate_len_ps, picos_per_byte); + /* A TC gate may be completely closed, which is a +-- +2.39.5 + diff --git a/queue-6.6/net-dsa-microchip-ksz9896-register-regmap-alignment-.patch b/queue-6.6/net-dsa-microchip-ksz9896-register-regmap-alignment-.patch new file mode 100644 index 00000000000..23613fdfa4a --- /dev/null +++ b/queue-6.6/net-dsa-microchip-ksz9896-register-regmap-alignment-.patch @@ -0,0 +1,140 @@ +From e5fea8fd7d9c1c9f3c329dc2f41de77a6508c524 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Dec 2024 10:29:32 +0100 +Subject: net: dsa: microchip: KSZ9896 register regmap alignment to 32 bit + boundaries + +From: Jesse Van Gavere + +[ Upstream commit 5af53577c64fa84da032d490b701127fe8d1a6aa ] + +Commit 8d7ae22ae9f8 ("net: dsa: microchip: KSZ9477 register regmap +alignment to 32 bit boundaries") fixed an issue whereby regmap_reg_range +did not allow writes as 32 bit words to KSZ9477 PHY registers, this fix +for KSZ9896 is adapted from there as the same errata is present in +KSZ9896C as "Module 5: Certain PHY registers must be written as pairs +instead of singly" the explanation below is likewise taken from this +commit. + +The commit provided code +to apply "Module 6: Certain PHY registers must be written as pairs instead +of singly" errata for KSZ9477 as this chip for certain PHY registers +(0xN120 to 0xN13F, N=1,2,3,4,5) must be accessed as 32 bit words instead +of 16 or 8 bit access. +Otherwise, adjacent registers (no matter if reserved or not) are +overwritten with 0x0. + +Without this patch some registers (e.g. 0x113c or 0x1134) required for 32 +bit access are out of valid regmap ranges. + +As a result, following error is observed and KSZ9896 is not properly +configured: + +ksz-switch spi1.0: can't rmw 32bit reg 0x113c: -EIO +ksz-switch spi1.0: can't rmw 32bit reg 0x1134: -EIO +ksz-switch spi1.0 lan1 (uninitialized): failed to connect to PHY: -EIO +ksz-switch spi1.0 lan1 (uninitialized): error -5 setting up PHY for tree 0, switch 0, port 0 + +The solution is to modify regmap_reg_range to allow accesses with 4 bytes +boundaries. + +Fixes: 5c844d57aa78 ("net: dsa: microchip: fix writes to phy registers >= 0x10") +Signed-off-by: Jesse Van Gavere +Link: https://patch.msgid.link/20241211092932.26881-1-jesse.vangavere@scioteq.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/microchip/ksz_common.c | 42 +++++++++++--------------- + 1 file changed, 18 insertions(+), 24 deletions(-) + +diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c +index 1c3f18649998..997c225dfba4 100644 +--- a/drivers/net/dsa/microchip/ksz_common.c ++++ b/drivers/net/dsa/microchip/ksz_common.c +@@ -892,10 +892,9 @@ static const struct regmap_range ksz9896_valid_regs[] = { + regmap_reg_range(0x1030, 0x1030), + regmap_reg_range(0x1100, 0x1115), + regmap_reg_range(0x111a, 0x111f), +- regmap_reg_range(0x1122, 0x1127), +- regmap_reg_range(0x112a, 0x112b), +- regmap_reg_range(0x1136, 0x1139), +- regmap_reg_range(0x113e, 0x113f), ++ regmap_reg_range(0x1120, 0x112b), ++ regmap_reg_range(0x1134, 0x113b), ++ regmap_reg_range(0x113c, 0x113f), + regmap_reg_range(0x1400, 0x1401), + regmap_reg_range(0x1403, 0x1403), + regmap_reg_range(0x1410, 0x1417), +@@ -922,10 +921,9 @@ static const struct regmap_range ksz9896_valid_regs[] = { + regmap_reg_range(0x2030, 0x2030), + regmap_reg_range(0x2100, 0x2115), + regmap_reg_range(0x211a, 0x211f), +- regmap_reg_range(0x2122, 0x2127), +- regmap_reg_range(0x212a, 0x212b), +- regmap_reg_range(0x2136, 0x2139), +- regmap_reg_range(0x213e, 0x213f), ++ regmap_reg_range(0x2120, 0x212b), ++ regmap_reg_range(0x2134, 0x213b), ++ regmap_reg_range(0x213c, 0x213f), + regmap_reg_range(0x2400, 0x2401), + regmap_reg_range(0x2403, 0x2403), + regmap_reg_range(0x2410, 0x2417), +@@ -952,10 +950,9 @@ static const struct regmap_range ksz9896_valid_regs[] = { + regmap_reg_range(0x3030, 0x3030), + regmap_reg_range(0x3100, 0x3115), + regmap_reg_range(0x311a, 0x311f), +- regmap_reg_range(0x3122, 0x3127), +- regmap_reg_range(0x312a, 0x312b), +- regmap_reg_range(0x3136, 0x3139), +- regmap_reg_range(0x313e, 0x313f), ++ regmap_reg_range(0x3120, 0x312b), ++ regmap_reg_range(0x3134, 0x313b), ++ regmap_reg_range(0x313c, 0x313f), + regmap_reg_range(0x3400, 0x3401), + regmap_reg_range(0x3403, 0x3403), + regmap_reg_range(0x3410, 0x3417), +@@ -982,10 +979,9 @@ static const struct regmap_range ksz9896_valid_regs[] = { + regmap_reg_range(0x4030, 0x4030), + regmap_reg_range(0x4100, 0x4115), + regmap_reg_range(0x411a, 0x411f), +- regmap_reg_range(0x4122, 0x4127), +- regmap_reg_range(0x412a, 0x412b), +- regmap_reg_range(0x4136, 0x4139), +- regmap_reg_range(0x413e, 0x413f), ++ regmap_reg_range(0x4120, 0x412b), ++ regmap_reg_range(0x4134, 0x413b), ++ regmap_reg_range(0x413c, 0x413f), + regmap_reg_range(0x4400, 0x4401), + regmap_reg_range(0x4403, 0x4403), + regmap_reg_range(0x4410, 0x4417), +@@ -1012,10 +1008,9 @@ static const struct regmap_range ksz9896_valid_regs[] = { + regmap_reg_range(0x5030, 0x5030), + regmap_reg_range(0x5100, 0x5115), + regmap_reg_range(0x511a, 0x511f), +- regmap_reg_range(0x5122, 0x5127), +- regmap_reg_range(0x512a, 0x512b), +- regmap_reg_range(0x5136, 0x5139), +- regmap_reg_range(0x513e, 0x513f), ++ regmap_reg_range(0x5120, 0x512b), ++ regmap_reg_range(0x5134, 0x513b), ++ regmap_reg_range(0x513c, 0x513f), + regmap_reg_range(0x5400, 0x5401), + regmap_reg_range(0x5403, 0x5403), + regmap_reg_range(0x5410, 0x5417), +@@ -1042,10 +1037,9 @@ static const struct regmap_range ksz9896_valid_regs[] = { + regmap_reg_range(0x6030, 0x6030), + regmap_reg_range(0x6100, 0x6115), + regmap_reg_range(0x611a, 0x611f), +- regmap_reg_range(0x6122, 0x6127), +- regmap_reg_range(0x612a, 0x612b), +- regmap_reg_range(0x6136, 0x6139), +- regmap_reg_range(0x613e, 0x613f), ++ regmap_reg_range(0x6120, 0x612b), ++ regmap_reg_range(0x6134, 0x613b), ++ regmap_reg_range(0x613c, 0x613f), + regmap_reg_range(0x6300, 0x6301), + regmap_reg_range(0x6400, 0x6401), + regmap_reg_range(0x6403, 0x6403), +-- +2.39.5 + diff --git a/queue-6.6/net-lapb-increase-lapb_header_len.patch b/queue-6.6/net-lapb-increase-lapb_header_len.patch new file mode 100644 index 00000000000..3e5eff66843 --- /dev/null +++ b/queue-6.6/net-lapb-increase-lapb_header_len.patch @@ -0,0 +1,86 @@ +From 84b057378447917851b89f89685801d454d03e3d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Dec 2024 14:10:31 +0000 +Subject: net: lapb: increase LAPB_HEADER_LEN + +From: Eric Dumazet + +[ Upstream commit a6d75ecee2bf828ac6a1b52724aba0a977e4eaf4 ] + +It is unclear if net/lapb code is supposed to be ready for 8021q. + +We can at least avoid crashes like the following : + +skbuff: skb_under_panic: text:ffffffff8aabe1f6 len:24 put:20 head:ffff88802824a400 data:ffff88802824a3fe tail:0x16 end:0x140 dev:nr0.2 +------------[ cut here ]------------ + kernel BUG at net/core/skbuff.c:206 ! +Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI +CPU: 1 UID: 0 PID: 5508 Comm: dhcpcd Not tainted 6.12.0-rc7-syzkaller-00144-g66418447d27b #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/30/2024 + RIP: 0010:skb_panic net/core/skbuff.c:206 [inline] + RIP: 0010:skb_under_panic+0x14b/0x150 net/core/skbuff.c:216 +Code: 0d 8d 48 c7 c6 2e 9e 29 8e 48 8b 54 24 08 8b 0c 24 44 8b 44 24 04 4d 89 e9 50 41 54 41 57 41 56 e8 1a 6f 37 02 48 83 c4 20 90 <0f> 0b 0f 1f 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 +RSP: 0018:ffffc90002ddf638 EFLAGS: 00010282 +RAX: 0000000000000086 RBX: dffffc0000000000 RCX: 7a24750e538ff600 +RDX: 0000000000000000 RSI: 0000000000000201 RDI: 0000000000000000 +RBP: ffff888034a86650 R08: ffffffff8174b13c R09: 1ffff920005bbe60 +R10: dffffc0000000000 R11: fffff520005bbe61 R12: 0000000000000140 +R13: ffff88802824a400 R14: ffff88802824a3fe R15: 0000000000000016 +FS: 00007f2a5990d740(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 000000110c2631fd CR3: 0000000029504000 CR4: 00000000003526f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + skb_push+0xe5/0x100 net/core/skbuff.c:2636 + nr_header+0x36/0x320 net/netrom/nr_dev.c:69 + dev_hard_header include/linux/netdevice.h:3148 [inline] + vlan_dev_hard_header+0x359/0x480 net/8021q/vlan_dev.c:83 + dev_hard_header include/linux/netdevice.h:3148 [inline] + lapbeth_data_transmit+0x1f6/0x2a0 drivers/net/wan/lapbether.c:257 + lapb_data_transmit+0x91/0xb0 net/lapb/lapb_iface.c:447 + lapb_transmit_buffer+0x168/0x1f0 net/lapb/lapb_out.c:149 + lapb_establish_data_link+0x84/0xd0 + lapb_device_event+0x4e0/0x670 + notifier_call_chain+0x19f/0x3e0 kernel/notifier.c:93 + __dev_notify_flags+0x207/0x400 + dev_change_flags+0xf0/0x1a0 net/core/dev.c:8922 + devinet_ioctl+0xa4e/0x1aa0 net/ipv4/devinet.c:1188 + inet_ioctl+0x3d7/0x4f0 net/ipv4/af_inet.c:1003 + sock_do_ioctl+0x158/0x460 net/socket.c:1227 + sock_ioctl+0x626/0x8e0 net/socket.c:1346 + vfs_ioctl fs/ioctl.c:51 [inline] + __do_sys_ioctl fs/ioctl.c:907 [inline] + __se_sys_ioctl+0xf9/0x170 fs/ioctl.c:893 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: syzbot+fb99d1b0c0f81d94a5e2@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/netdev/67506220.050a0220.17bd51.006c.GAE@google.com/T/#u +Signed-off-by: Eric Dumazet +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20241204141031.4030267-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/net/lapb.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/net/lapb.h b/include/net/lapb.h +index 124ee122f2c8..6c07420644e4 100644 +--- a/include/net/lapb.h ++++ b/include/net/lapb.h +@@ -4,7 +4,7 @@ + #include + #include + +-#define LAPB_HEADER_LEN 20 /* LAPB over Ethernet + a bit more */ ++#define LAPB_HEADER_LEN MAX_HEADER /* LAPB over Ethernet + a bit more */ + + #define LAPB_ACK_PENDING_CONDITION 0x01 + #define LAPB_REJECT_CONDITION 0x02 +-- +2.39.5 + diff --git a/queue-6.6/net-mlx5-dr-prevent-potential-error-pointer-derefere.patch b/queue-6.6/net-mlx5-dr-prevent-potential-error-pointer-derefere.patch new file mode 100644 index 00000000000..49c6babd6e6 --- /dev/null +++ b/queue-6.6/net-mlx5-dr-prevent-potential-error-pointer-derefere.patch @@ -0,0 +1,43 @@ +From ea774f956cadc33339b462bff62f3c3bcfa55b5a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Dec 2024 15:06:41 +0300 +Subject: net/mlx5: DR, prevent potential error pointer dereference + +From: Dan Carpenter + +[ Upstream commit 11776cff0b563c8b8a4fa76cab620bfb633a8cb8 ] + +The dr_domain_add_vport_cap() function generally returns NULL on error +but sometimes we want it to return ERR_PTR(-EBUSY) so the caller can +retry. The problem here is that "ret" can be either -EBUSY or -ENOMEM +and if it's and -ENOMEM then the error pointer is propogated back and +eventually dereferenced in dr_ste_v0_build_src_gvmi_qpn_tag(). + +Fixes: 11a45def2e19 ("net/mlx5: DR, Add support for SF vports") +Signed-off-by: Dan Carpenter +Reviewed-by: Tariq Toukan +Link: https://patch.msgid.link/07477254-e179-43e2-b1b3-3b9db4674195@stanley.mountain +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +index 3d74109f8230..49f22cad92bf 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +@@ -297,7 +297,9 @@ dr_domain_add_vport_cap(struct mlx5dr_domain *dmn, u16 vport) + if (ret) { + mlx5dr_dbg(dmn, "Couldn't insert new vport into xarray (%d)\n", ret); + kvfree(vport_caps); +- return ERR_PTR(ret); ++ if (ret == -EBUSY) ++ return ERR_PTR(-EBUSY); ++ return NULL; + } + + return vport_caps; +-- +2.39.5 + diff --git a/queue-6.6/net-mscc-ocelot-be-resilient-to-loss-of-ptp-packets-.patch b/queue-6.6/net-mscc-ocelot-be-resilient-to-loss-of-ptp-packets-.patch new file mode 100644 index 00000000000..3661d7f4a54 --- /dev/null +++ b/queue-6.6/net-mscc-ocelot-be-resilient-to-loss-of-ptp-packets-.patch @@ -0,0 +1,404 @@ +From 924eef7b2dffc01578e82d5c2088009edf2e6b7b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 16:55:18 +0200 +Subject: net: mscc: ocelot: be resilient to loss of PTP packets during + transmission + +From: Vladimir Oltean + +[ Upstream commit b454abfab52543c44b581afc807b9f97fc1e7a3a ] + +The Felix DSA driver presents unique challenges that make the simplistic +ocelot PTP TX timestamping procedure unreliable: any transmitted packet +may be lost in hardware before it ever leaves our local system. + +This may happen because there is congestion on the DSA conduit, the +switch CPU port or even user port (Qdiscs like taprio may delay packets +indefinitely by design). + +The technical problem is that the kernel, i.e. ocelot_port_add_txtstamp_skb(), +runs out of timestamp IDs eventually, because it never detects that +packets are lost, and keeps the IDs of the lost packets on hold +indefinitely. The manifestation of the issue once the entire timestamp +ID range becomes busy looks like this in dmesg: + +mscc_felix 0000:00:00.5: port 0 delivering skb without TX timestamp +mscc_felix 0000:00:00.5: port 1 delivering skb without TX timestamp + +At the surface level, we need a timeout timer so that the kernel knows a +timestamp ID is available again. But there is a deeper problem with the +implementation, which is the monotonically increasing ocelot_port->ts_id. +In the presence of packet loss, it will be impossible to detect that and +reuse one of the holes created in the range of free timestamp IDs. + +What we actually need is a bitmap of 63 timestamp IDs tracking which one +is available. That is able to use up holes caused by packet loss, but +also gives us a unique opportunity to not implement an actual timer_list +for the timeout timer (very complicated in terms of locking). + +We could only declare a timestamp ID stale on demand (lazily), aka when +there's no other timestamp ID available. There are pros and cons to this +approach: the implementation is much more simple than per-packet timers +would be, but most of the stale packets would be quasi-leaked - not +really leaked, but blocked in driver memory, since this algorithm sees +no reason to free them. + +An improved technique would be to check for stale timestamp IDs every +time we allocate a new one. Assuming a constant flux of PTP packets, +this avoids stale packets being blocked in memory, but of course, +packets lost at the end of the flux are still blocked until the flux +resumes (nobody left to kick them out). + +Since implementing per-packet timers is way too complicated, this should +be good enough. + +Testing procedure: + +Persistently block traffic class 5 and try to run PTP on it: +$ tc qdisc replace dev swp3 parent root taprio num_tc 8 \ + map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + base-time 0 sched-entry S 0xdf 100000 flags 0x2 +[ 126.948141] mscc_felix 0000:00:00.5: port 3 tc 5 min gate length 0 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 1 octets including FCS +$ ptp4l -i swp3 -2 -P -m --socket_priority 5 --fault_reset_interval ASAP --logSyncInterval -3 +ptp4l[70.351]: port 1 (swp3): INITIALIZING to LISTENING on INIT_COMPLETE +ptp4l[70.354]: port 0 (/var/run/ptp4l): INITIALIZING to LISTENING on INIT_COMPLETE +ptp4l[70.358]: port 0 (/var/run/ptp4lro): INITIALIZING to LISTENING on INIT_COMPLETE +[ 70.394583] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +ptp4l[70.406]: timed out while polling for tx timestamp +ptp4l[70.406]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it +ptp4l[70.406]: port 1 (swp3): send peer delay response failed +ptp4l[70.407]: port 1 (swp3): clearing fault immediately +ptp4l[70.952]: port 1 (swp3): new foreign master d858d7.fffe.00ca6d-1 +[ 71.394858] mscc_felix 0000:00:00.5: port 3 timestamp id 1 +ptp4l[71.400]: timed out while polling for tx timestamp +ptp4l[71.400]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it +ptp4l[71.401]: port 1 (swp3): send peer delay response failed +ptp4l[71.401]: port 1 (swp3): clearing fault immediately +[ 72.393616] mscc_felix 0000:00:00.5: port 3 timestamp id 2 +ptp4l[72.401]: timed out while polling for tx timestamp +ptp4l[72.402]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it +ptp4l[72.402]: port 1 (swp3): send peer delay response failed +ptp4l[72.402]: port 1 (swp3): clearing fault immediately +ptp4l[72.952]: port 1 (swp3): new foreign master d858d7.fffe.00ca6d-1 +[ 73.395291] mscc_felix 0000:00:00.5: port 3 timestamp id 3 +ptp4l[73.400]: timed out while polling for tx timestamp +ptp4l[73.400]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it +ptp4l[73.400]: port 1 (swp3): send peer delay response failed +ptp4l[73.400]: port 1 (swp3): clearing fault immediately +[ 74.394282] mscc_felix 0000:00:00.5: port 3 timestamp id 4 +ptp4l[74.400]: timed out while polling for tx timestamp +ptp4l[74.401]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it +ptp4l[74.401]: port 1 (swp3): send peer delay response failed +ptp4l[74.401]: port 1 (swp3): clearing fault immediately +ptp4l[74.953]: port 1 (swp3): new foreign master d858d7.fffe.00ca6d-1 +[ 75.396830] mscc_felix 0000:00:00.5: port 3 invalidating stale timestamp ID 0 which seems lost +[ 75.405760] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +ptp4l[75.410]: timed out while polling for tx timestamp +ptp4l[75.411]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it +ptp4l[75.411]: port 1 (swp3): send peer delay response failed +ptp4l[75.411]: port 1 (swp3): clearing fault immediately +(...) + +Remove the blocking condition and see that the port recovers: +$ same tc command as above, but use "sched-entry S 0xff" instead +$ same ptp4l command as above +ptp4l[99.489]: port 1 (swp3): INITIALIZING to LISTENING on INIT_COMPLETE +ptp4l[99.490]: port 0 (/var/run/ptp4l): INITIALIZING to LISTENING on INIT_COMPLETE +ptp4l[99.492]: port 0 (/var/run/ptp4lro): INITIALIZING to LISTENING on INIT_COMPLETE +[ 100.403768] mscc_felix 0000:00:00.5: port 3 invalidating stale timestamp ID 0 which seems lost +[ 100.412545] mscc_felix 0000:00:00.5: port 3 invalidating stale timestamp ID 1 which seems lost +[ 100.421283] mscc_felix 0000:00:00.5: port 3 invalidating stale timestamp ID 2 which seems lost +[ 100.430015] mscc_felix 0000:00:00.5: port 3 invalidating stale timestamp ID 3 which seems lost +[ 100.438744] mscc_felix 0000:00:00.5: port 3 invalidating stale timestamp ID 4 which seems lost +[ 100.447470] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 100.505919] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +ptp4l[100.963]: port 1 (swp3): new foreign master d858d7.fffe.00ca6d-1 +[ 101.405077] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 101.507953] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 102.405405] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 102.509391] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 103.406003] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 103.510011] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 104.405601] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 104.510624] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +ptp4l[104.965]: selected best master clock d858d7.fffe.00ca6d +ptp4l[104.966]: port 1 (swp3): assuming the grand master role +ptp4l[104.967]: port 1 (swp3): LISTENING to GRAND_MASTER on RS_GRAND_MASTER +[ 105.106201] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 105.232420] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 105.359001] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 105.405500] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 105.485356] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 105.511220] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 105.610938] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +[ 105.737237] mscc_felix 0000:00:00.5: port 3 timestamp id 0 +(...) + +Notice that in this new usage pattern, a non-congested port should +basically use timestamp ID 0 all the time, progressing to higher numbers +only if there are unacknowledged timestamps in flight. Compare this to +the old usage, where the timestamp ID used to monotonically increase +modulo OCELOT_MAX_PTP_ID. + +In terms of implementation, this simplifies the bookkeeping of the +ocelot_port :: ts_id and ptp_skbs_in_flight. Since we need to traverse +the list of two-step timestampable skbs for each new packet anyway, the +information can already be computed and does not need to be stored. +Also, ocelot_port->tx_skbs is always accessed under the switch-wide +ocelot->ts_id_lock IRQ-unsafe spinlock, so we don't need the skb queue's +lock and can use the unlocked primitives safely. + +This problem was actually detected using the tc-taprio offload, and is +causing trouble in TSN scenarios, which Felix (NXP LS1028A / VSC9959) +supports but Ocelot (VSC7514) does not. Thus, I've selected the commit +to blame as the one adding initial timestamping support for the Felix +switch. + +Fixes: c0bcf537667c ("net: dsa: ocelot: add hardware timestamping support for Felix") +Signed-off-by: Vladimir Oltean +Link: https://patch.msgid.link/20241205145519.1236778-5-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mscc/ocelot_ptp.c | 134 +++++++++++++++---------- + include/linux/dsa/ocelot.h | 1 + + include/soc/mscc/ocelot.h | 2 - + 3 files changed, 80 insertions(+), 57 deletions(-) + +diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c +index c54e96ff3976..bc44aa635d49 100644 +--- a/drivers/net/ethernet/mscc/ocelot_ptp.c ++++ b/drivers/net/ethernet/mscc/ocelot_ptp.c +@@ -14,6 +14,8 @@ + #include + #include "ocelot.h" + ++#define OCELOT_PTP_TX_TSTAMP_TIMEOUT (5 * HZ) ++ + int ocelot_ptp_gettime64(struct ptp_clock_info *ptp, struct timespec64 *ts) + { + struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info); +@@ -607,34 +609,88 @@ int ocelot_get_ts_info(struct ocelot *ocelot, int port, + } + EXPORT_SYMBOL(ocelot_get_ts_info); + +-static int ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port, ++static struct sk_buff *ocelot_port_dequeue_ptp_tx_skb(struct ocelot *ocelot, ++ int port, u8 ts_id, ++ u32 seqid) ++{ ++ struct ocelot_port *ocelot_port = ocelot->ports[port]; ++ struct sk_buff *skb, *skb_tmp, *skb_match = NULL; ++ struct ptp_header *hdr; ++ ++ spin_lock(&ocelot->ts_id_lock); ++ ++ skb_queue_walk_safe(&ocelot_port->tx_skbs, skb, skb_tmp) { ++ if (OCELOT_SKB_CB(skb)->ts_id != ts_id) ++ continue; ++ ++ /* Check that the timestamp ID is for the expected PTP ++ * sequenceId. We don't have to test ptp_parse_header() against ++ * NULL, because we've pre-validated the packet's ptp_class. ++ */ ++ hdr = ptp_parse_header(skb, OCELOT_SKB_CB(skb)->ptp_class); ++ if (seqid != ntohs(hdr->sequence_id)) ++ continue; ++ ++ __skb_unlink(skb, &ocelot_port->tx_skbs); ++ ocelot->ptp_skbs_in_flight--; ++ skb_match = skb; ++ break; ++ } ++ ++ spin_unlock(&ocelot->ts_id_lock); ++ ++ return skb_match; ++} ++ ++static int ocelot_port_queue_ptp_tx_skb(struct ocelot *ocelot, int port, + struct sk_buff *clone) + { + struct ocelot_port *ocelot_port = ocelot->ports[port]; ++ DECLARE_BITMAP(ts_id_in_flight, OCELOT_MAX_PTP_ID); ++ struct sk_buff *skb, *skb_tmp; ++ unsigned long n; + + spin_lock(&ocelot->ts_id_lock); + +- if (ocelot_port->ptp_skbs_in_flight == OCELOT_MAX_PTP_ID || +- ocelot->ptp_skbs_in_flight == OCELOT_PTP_FIFO_SIZE) { ++ /* To get a better chance of acquiring a timestamp ID, first flush the ++ * stale packets still waiting in the TX timestamping queue. They are ++ * probably lost. ++ */ ++ skb_queue_walk_safe(&ocelot_port->tx_skbs, skb, skb_tmp) { ++ if (time_before(OCELOT_SKB_CB(skb)->ptp_tx_time + ++ OCELOT_PTP_TX_TSTAMP_TIMEOUT, jiffies)) { ++ dev_warn_ratelimited(ocelot->dev, ++ "port %d invalidating stale timestamp ID %u which seems lost\n", ++ port, OCELOT_SKB_CB(skb)->ts_id); ++ __skb_unlink(skb, &ocelot_port->tx_skbs); ++ kfree_skb(skb); ++ ocelot->ptp_skbs_in_flight--; ++ } else { ++ __set_bit(OCELOT_SKB_CB(skb)->ts_id, ts_id_in_flight); ++ } ++ } ++ ++ if (ocelot->ptp_skbs_in_flight == OCELOT_PTP_FIFO_SIZE) { + spin_unlock(&ocelot->ts_id_lock); + return -EBUSY; + } + +- skb_shinfo(clone)->tx_flags |= SKBTX_IN_PROGRESS; +- /* Store timestamp ID in OCELOT_SKB_CB(clone)->ts_id */ +- OCELOT_SKB_CB(clone)->ts_id = ocelot_port->ts_id; +- +- ocelot_port->ts_id++; +- if (ocelot_port->ts_id == OCELOT_MAX_PTP_ID) +- ocelot_port->ts_id = 0; ++ n = find_first_zero_bit(ts_id_in_flight, OCELOT_MAX_PTP_ID); ++ if (n == OCELOT_MAX_PTP_ID) { ++ spin_unlock(&ocelot->ts_id_lock); ++ return -EBUSY; ++ } + +- ocelot_port->ptp_skbs_in_flight++; ++ /* Found an available timestamp ID, use it */ ++ OCELOT_SKB_CB(clone)->ts_id = n; ++ OCELOT_SKB_CB(clone)->ptp_tx_time = jiffies; + ocelot->ptp_skbs_in_flight++; +- +- skb_queue_tail(&ocelot_port->tx_skbs, clone); ++ __skb_queue_tail(&ocelot_port->tx_skbs, clone); + + spin_unlock(&ocelot->ts_id_lock); + ++ dev_dbg_ratelimited(ocelot->dev, "port %d timestamp id %lu\n", port, n); ++ + return 0; + } + +@@ -690,12 +746,14 @@ int ocelot_port_txtstamp_request(struct ocelot *ocelot, int port, + if (!(*clone)) + return -ENOMEM; + +- err = ocelot_port_add_txtstamp_skb(ocelot, port, *clone); ++ /* Store timestamp ID in OCELOT_SKB_CB(clone)->ts_id */ ++ err = ocelot_port_queue_ptp_tx_skb(ocelot, port, *clone); + if (err) { + kfree_skb(*clone); + return err; + } + ++ skb_shinfo(*clone)->tx_flags |= SKBTX_IN_PROGRESS; + OCELOT_SKB_CB(skb)->ptp_cmd = ptp_cmd; + OCELOT_SKB_CB(*clone)->ptp_class = ptp_class; + } +@@ -731,26 +789,14 @@ static void ocelot_get_hwtimestamp(struct ocelot *ocelot, + spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags); + } + +-static bool ocelot_validate_ptp_skb(struct sk_buff *clone, u16 seqid) +-{ +- struct ptp_header *hdr; +- +- hdr = ptp_parse_header(clone, OCELOT_SKB_CB(clone)->ptp_class); +- if (WARN_ON(!hdr)) +- return false; +- +- return seqid == ntohs(hdr->sequence_id); +-} +- + void ocelot_get_txtstamp(struct ocelot *ocelot) + { + int budget = OCELOT_PTP_QUEUE_SZ; + + while (budget--) { +- struct sk_buff *skb, *skb_tmp, *skb_match = NULL; + struct skb_shared_hwtstamps shhwtstamps; + u32 val, id, seqid, txport; +- struct ocelot_port *port; ++ struct sk_buff *skb_match; + struct timespec64 ts; + + val = ocelot_read(ocelot, SYS_PTP_STATUS); +@@ -766,36 +812,14 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) + txport = SYS_PTP_STATUS_PTP_MESS_TXPORT_X(val); + seqid = SYS_PTP_STATUS_PTP_MESS_SEQ_ID(val); + +- port = ocelot->ports[txport]; +- +- spin_lock(&ocelot->ts_id_lock); +- port->ptp_skbs_in_flight--; +- ocelot->ptp_skbs_in_flight--; +- spin_unlock(&ocelot->ts_id_lock); +- + /* Retrieve its associated skb */ +-try_again: +- spin_lock(&port->tx_skbs.lock); +- +- skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) { +- if (OCELOT_SKB_CB(skb)->ts_id != id) +- continue; +- __skb_unlink(skb, &port->tx_skbs); +- skb_match = skb; +- break; +- } +- +- spin_unlock(&port->tx_skbs.lock); +- +- if (WARN_ON(!skb_match)) ++ skb_match = ocelot_port_dequeue_ptp_tx_skb(ocelot, txport, id, ++ seqid); ++ if (!skb_match) { ++ dev_warn_ratelimited(ocelot->dev, ++ "port %d received TX timestamp (seqid %d, ts id %u) for packet previously declared stale\n", ++ txport, seqid, id); + goto next_ts; +- +- if (!ocelot_validate_ptp_skb(skb_match, seqid)) { +- dev_err_ratelimited(ocelot->dev, +- "port %d received stale TX timestamp for seqid %d, discarding\n", +- txport, seqid); +- kfree_skb(skb); +- goto try_again; + } + + /* Get the h/w timestamp */ +diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h +index 6fbfbde68a37..620a3260fc08 100644 +--- a/include/linux/dsa/ocelot.h ++++ b/include/linux/dsa/ocelot.h +@@ -15,6 +15,7 @@ + struct ocelot_skb_cb { + struct sk_buff *clone; + unsigned int ptp_class; /* valid only for clones */ ++ unsigned long ptp_tx_time; /* valid only for clones */ + u32 tstamp_lo; + u8 ptp_cmd; + u8 ts_id; +diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h +index 846132ca5503..51d3e9ec5aa3 100644 +--- a/include/soc/mscc/ocelot.h ++++ b/include/soc/mscc/ocelot.h +@@ -778,7 +778,6 @@ struct ocelot_port { + + phy_interface_t phy_mode; + +- unsigned int ptp_skbs_in_flight; + struct sk_buff_head tx_skbs; + + unsigned int trap_proto; +@@ -786,7 +785,6 @@ struct ocelot_port { + u16 mrp_ring_id; + + u8 ptp_cmd; +- u8 ts_id; + + u8 index; + +-- +2.39.5 + diff --git a/queue-6.6/net-mscc-ocelot-fix-memory-leak-on-ocelot_port_add_t.patch b/queue-6.6/net-mscc-ocelot-fix-memory-leak-on-ocelot_port_add_t.patch new file mode 100644 index 00000000000..10f5c9c840a --- /dev/null +++ b/queue-6.6/net-mscc-ocelot-fix-memory-leak-on-ocelot_port_add_t.patch @@ -0,0 +1,41 @@ +From 8509d3605af428389a02ff83bc4338641fdcfb2c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 16:55:15 +0200 +Subject: net: mscc: ocelot: fix memory leak on ocelot_port_add_txtstamp_skb() + +From: Vladimir Oltean + +[ Upstream commit 4b01bec25bef62544228bce06db6a3afa5d3d6bb ] + +If ocelot_port_add_txtstamp_skb() fails, for example due to a full PTP +timestamp FIFO, we must undo the skb_clone_sk() call with kfree_skb(). +Otherwise, the reference to the skb clone is lost. + +Fixes: 52849bcf0029 ("net: mscc: ocelot: avoid overflowing the PTP timestamp FIFO") +Signed-off-by: Vladimir Oltean +Link: https://patch.msgid.link/20241205145519.1236778-2-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mscc/ocelot_ptp.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c +index cb32234a5bf1..3c22652879ac 100644 +--- a/drivers/net/ethernet/mscc/ocelot_ptp.c ++++ b/drivers/net/ethernet/mscc/ocelot_ptp.c +@@ -692,8 +692,10 @@ int ocelot_port_txtstamp_request(struct ocelot *ocelot, int port, + return -ENOMEM; + + err = ocelot_port_add_txtstamp_skb(ocelot, port, *clone); +- if (err) ++ if (err) { ++ kfree_skb(*clone); + return err; ++ } + + OCELOT_SKB_CB(skb)->ptp_cmd = ptp_cmd; + OCELOT_SKB_CB(*clone)->ptp_class = ptp_class; +-- +2.39.5 + diff --git a/queue-6.6/net-mscc-ocelot-improve-handling-of-tx-timestamp-for.patch b/queue-6.6/net-mscc-ocelot-improve-handling-of-tx-timestamp-for.patch new file mode 100644 index 00000000000..50e28860e80 --- /dev/null +++ b/queue-6.6/net-mscc-ocelot-improve-handling-of-tx-timestamp-for.patch @@ -0,0 +1,54 @@ +From c0e7078c3ca089eaf996554c89fbc46d683298d0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 16:55:16 +0200 +Subject: net: mscc: ocelot: improve handling of TX timestamp for unknown skb + +From: Vladimir Oltean + +[ Upstream commit b6fba4b3f0becb794e274430f3a0839d8ba31262 ] + +This condition, theoretically impossible to trigger, is not really +handled well. By "continuing", we are skipping the write to SYS_PTP_NXT +which advances the timestamp FIFO to the next entry. So we are reading +the same FIFO entry all over again, printing stack traces and eventually +killing the kernel. + +No real problem has been observed here. This is part of a larger rework +of the timestamp IRQ procedure, with this logical change split out into +a patch of its own. We will need to "goto next_ts" for other conditions +as well. + +Fixes: 9fde506e0c53 ("net: mscc: ocelot: warn when a PTP IRQ is raised for an unknown skb") +Signed-off-by: Vladimir Oltean +Link: https://patch.msgid.link/20241205145519.1236778-3-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mscc/ocelot_ptp.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c +index 3c22652879ac..1386fb2ff4a9 100644 +--- a/drivers/net/ethernet/mscc/ocelot_ptp.c ++++ b/drivers/net/ethernet/mscc/ocelot_ptp.c +@@ -790,7 +790,7 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) + spin_unlock_irqrestore(&port->tx_skbs.lock, flags); + + if (WARN_ON(!skb_match)) +- continue; ++ goto next_ts; + + if (!ocelot_validate_ptp_skb(skb_match, seqid)) { + dev_err_ratelimited(ocelot->dev, +@@ -808,7 +808,7 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) + shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec); + skb_complete_tx_timestamp(skb_match, &shhwtstamps); + +- /* Next ts */ ++next_ts: + ocelot_write(ocelot, SYS_PTP_NXT_PTP_NXT, SYS_PTP_NXT); + } + } +-- +2.39.5 + diff --git a/queue-6.6/net-mscc-ocelot-ocelot-ts_id_lock-and-ocelot_port-tx.patch b/queue-6.6/net-mscc-ocelot-ocelot-ts_id_lock-and-ocelot_port-tx.patch new file mode 100644 index 00000000000..a5a6608eddf --- /dev/null +++ b/queue-6.6/net-mscc-ocelot-ocelot-ts_id_lock-and-ocelot_port-tx.patch @@ -0,0 +1,104 @@ +From 976d718b186835557e242963e90aa25f5536f205 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 16:55:17 +0200 +Subject: net: mscc: ocelot: ocelot->ts_id_lock and ocelot_port->tx_skbs.lock + are IRQ-safe + +From: Vladimir Oltean + +[ Upstream commit 0c53cdb95eb4a604062e326636971d96dd9b1b26 ] + +ocelot_get_txtstamp() is a threaded IRQ handler, requested explicitly as +such by both ocelot_ptp_rdy_irq_handler() and vsc9959_irq_handler(). + +As such, it runs with IRQs enabled, and not in hardirq context. Thus, +ocelot_port_add_txtstamp_skb() has no reason to turn off IRQs, it cannot +be preempted by ocelot_get_txtstamp(). For the same reason, +dev_kfree_skb_any_reason() will always evaluate as kfree_skb_reason() in +this calling context, so just simplify the dev_kfree_skb_any() call to +kfree_skb(). + +Also, ocelot_port_txtstamp_request() runs from NET_TX softirq context, +not with hardirqs enabled. Thus, ocelot_get_txtstamp() which shares the +ocelot_port->tx_skbs.lock lock with it, has no reason to disable hardirqs. + +This is part of a larger rework of the TX timestamping procedure. +A logical subportion of the rework has been split into a separate +change. + +Signed-off-by: Vladimir Oltean +Link: https://patch.msgid.link/20241205145519.1236778-4-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: b454abfab525 ("net: mscc: ocelot: be resilient to loss of PTP packets during transmission") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mscc/ocelot_ptp.c | 14 ++++++-------- + 1 file changed, 6 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c +index 1386fb2ff4a9..c54e96ff3976 100644 +--- a/drivers/net/ethernet/mscc/ocelot_ptp.c ++++ b/drivers/net/ethernet/mscc/ocelot_ptp.c +@@ -611,13 +611,12 @@ static int ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port, + struct sk_buff *clone) + { + struct ocelot_port *ocelot_port = ocelot->ports[port]; +- unsigned long flags; + +- spin_lock_irqsave(&ocelot->ts_id_lock, flags); ++ spin_lock(&ocelot->ts_id_lock); + + if (ocelot_port->ptp_skbs_in_flight == OCELOT_MAX_PTP_ID || + ocelot->ptp_skbs_in_flight == OCELOT_PTP_FIFO_SIZE) { +- spin_unlock_irqrestore(&ocelot->ts_id_lock, flags); ++ spin_unlock(&ocelot->ts_id_lock); + return -EBUSY; + } + +@@ -634,7 +633,7 @@ static int ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port, + + skb_queue_tail(&ocelot_port->tx_skbs, clone); + +- spin_unlock_irqrestore(&ocelot->ts_id_lock, flags); ++ spin_unlock(&ocelot->ts_id_lock); + + return 0; + } +@@ -753,7 +752,6 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) + u32 val, id, seqid, txport; + struct ocelot_port *port; + struct timespec64 ts; +- unsigned long flags; + + val = ocelot_read(ocelot, SYS_PTP_STATUS); + +@@ -777,7 +775,7 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) + + /* Retrieve its associated skb */ + try_again: +- spin_lock_irqsave(&port->tx_skbs.lock, flags); ++ spin_lock(&port->tx_skbs.lock); + + skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) { + if (OCELOT_SKB_CB(skb)->ts_id != id) +@@ -787,7 +785,7 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) + break; + } + +- spin_unlock_irqrestore(&port->tx_skbs.lock, flags); ++ spin_unlock(&port->tx_skbs.lock); + + if (WARN_ON(!skb_match)) + goto next_ts; +@@ -796,7 +794,7 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) + dev_err_ratelimited(ocelot->dev, + "port %d received stale TX timestamp for seqid %d, discarding\n", + txport, seqid); +- dev_kfree_skb_any(skb); ++ kfree_skb(skb); + goto try_again; + } + +-- +2.39.5 + diff --git a/queue-6.6/net-mscc-ocelot-perform-error-cleanup-in-ocelot_hwst.patch b/queue-6.6/net-mscc-ocelot-perform-error-cleanup-in-ocelot_hwst.patch new file mode 100644 index 00000000000..bf23dceff52 --- /dev/null +++ b/queue-6.6/net-mscc-ocelot-perform-error-cleanup-in-ocelot_hwst.patch @@ -0,0 +1,128 @@ +From 4a63a9e2814c41f2410314f638a0a35153f41c00 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 16:55:19 +0200 +Subject: net: mscc: ocelot: perform error cleanup in ocelot_hwstamp_set() + +From: Vladimir Oltean + +[ Upstream commit 43a4166349a254446e7a3db65f721c6a30daccf3 ] + +An unsupported RX filter will leave the port with TX timestamping still +applied as per the new request, rather than the old setting. When +parsing the tx_type, don't apply it just yet, but delay that until after +we've parsed the rx_filter as well (and potentially returned -ERANGE for +that). + +Similarly, copy_to_user() may fail, which is a rare occurrence, but +should still be treated by unwinding what was done. + +Fixes: 96ca08c05838 ("net: mscc: ocelot: set up traps for PTP packets") +Signed-off-by: Vladimir Oltean +Link: https://patch.msgid.link/20241205145519.1236778-6-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mscc/ocelot_ptp.c | 59 ++++++++++++++++++-------- + 1 file changed, 42 insertions(+), 17 deletions(-) + +diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c +index bc44aa635d49..34a2d8ea3b2d 100644 +--- a/drivers/net/ethernet/mscc/ocelot_ptp.c ++++ b/drivers/net/ethernet/mscc/ocelot_ptp.c +@@ -497,6 +497,28 @@ static int ocelot_traps_to_ptp_rx_filter(unsigned int proto) + return HWTSTAMP_FILTER_NONE; + } + ++static int ocelot_ptp_tx_type_to_cmd(int tx_type, int *ptp_cmd) ++{ ++ switch (tx_type) { ++ case HWTSTAMP_TX_ON: ++ *ptp_cmd = IFH_REW_OP_TWO_STEP_PTP; ++ break; ++ case HWTSTAMP_TX_ONESTEP_SYNC: ++ /* IFH_REW_OP_ONE_STEP_PTP updates the correctionField, ++ * what we need to update is the originTimestamp. ++ */ ++ *ptp_cmd = IFH_REW_OP_ORIGIN_PTP; ++ break; ++ case HWTSTAMP_TX_OFF: ++ *ptp_cmd = 0; ++ break; ++ default: ++ return -ERANGE; ++ } ++ ++ return 0; ++} ++ + int ocelot_hwstamp_get(struct ocelot *ocelot, int port, struct ifreq *ifr) + { + struct ocelot_port *ocelot_port = ocelot->ports[port]; +@@ -523,30 +545,19 @@ EXPORT_SYMBOL(ocelot_hwstamp_get); + int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr) + { + struct ocelot_port *ocelot_port = ocelot->ports[port]; ++ int ptp_cmd, old_ptp_cmd = ocelot_port->ptp_cmd; + bool l2 = false, l4 = false; + struct hwtstamp_config cfg; ++ bool old_l2, old_l4; + int err; + + if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) + return -EFAULT; + + /* Tx type sanity check */ +- switch (cfg.tx_type) { +- case HWTSTAMP_TX_ON: +- ocelot_port->ptp_cmd = IFH_REW_OP_TWO_STEP_PTP; +- break; +- case HWTSTAMP_TX_ONESTEP_SYNC: +- /* IFH_REW_OP_ONE_STEP_PTP updates the correctional field, we +- * need to update the origin time. +- */ +- ocelot_port->ptp_cmd = IFH_REW_OP_ORIGIN_PTP; +- break; +- case HWTSTAMP_TX_OFF: +- ocelot_port->ptp_cmd = 0; +- break; +- default: +- return -ERANGE; +- } ++ err = ocelot_ptp_tx_type_to_cmd(cfg.tx_type, &ptp_cmd); ++ if (err) ++ return err; + + switch (cfg.rx_filter) { + case HWTSTAMP_FILTER_NONE: +@@ -571,13 +582,27 @@ int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr) + return -ERANGE; + } + ++ old_l2 = ocelot_port->trap_proto & OCELOT_PROTO_PTP_L2; ++ old_l4 = ocelot_port->trap_proto & OCELOT_PROTO_PTP_L4; ++ + err = ocelot_setup_ptp_traps(ocelot, port, l2, l4); + if (err) + return err; + ++ ocelot_port->ptp_cmd = ptp_cmd; ++ + cfg.rx_filter = ocelot_traps_to_ptp_rx_filter(ocelot_port->trap_proto); + +- return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; ++ if (copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg))) { ++ err = -EFAULT; ++ goto out_restore_ptp_traps; ++ } ++ ++ return 0; ++out_restore_ptp_traps: ++ ocelot_setup_ptp_traps(ocelot, port, old_l2, old_l4); ++ ocelot_port->ptp_cmd = old_ptp_cmd; ++ return err; + } + EXPORT_SYMBOL(ocelot_hwstamp_set); + +-- +2.39.5 + diff --git a/queue-6.6/net-renesas-rswitch-avoid-use-after-put-for-a-device.patch b/queue-6.6/net-renesas-rswitch-avoid-use-after-put-for-a-device.patch new file mode 100644 index 00000000000..efebc00df47 --- /dev/null +++ b/queue-6.6/net-renesas-rswitch-avoid-use-after-put-for-a-device.patch @@ -0,0 +1,56 @@ +From f3555a863c3fbaaceb036652189fbe1d75498196 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 8 Dec 2024 14:50:04 +0500 +Subject: net: renesas: rswitch: avoid use-after-put for a device tree node + +From: Nikita Yushchenko + +[ Upstream commit 66b7e9f85b8459c823b11e9af69dbf4be5eb6be8 ] + +The device tree node saved in the rswitch_device structure is used at +several driver locations. So passing this node to of_node_put() after +the first use is wrong. + +Move of_node_put() for this node to exit paths. + +Fixes: b46f1e579329 ("net: renesas: rswitch: Simplify struct phy * handling") +Signed-off-by: Nikita Yushchenko +Reviewed-by: Yoshihiro Shimoda +Link: https://patch.msgid.link/20241208095004.69468-5-nikita.yoush@cogentembedded.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/renesas/rswitch.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c +index b1432ca79f1e..d04a79ece698 100644 +--- a/drivers/net/ethernet/renesas/rswitch.c ++++ b/drivers/net/ethernet/renesas/rswitch.c +@@ -1819,7 +1819,6 @@ static int rswitch_device_alloc(struct rswitch_private *priv, unsigned int index + rdev->np_port = rswitch_get_port_node(rdev); + rdev->disabled = !rdev->np_port; + err = of_get_ethdev_address(rdev->np_port, ndev); +- of_node_put(rdev->np_port); + if (err) { + if (is_valid_ether_addr(rdev->etha->mac_addr)) + eth_hw_addr_set(ndev, rdev->etha->mac_addr); +@@ -1849,6 +1848,7 @@ static int rswitch_device_alloc(struct rswitch_private *priv, unsigned int index + + out_rxdmac: + out_get_params: ++ of_node_put(rdev->np_port); + netif_napi_del(&rdev->napi); + free_netdev(ndev); + +@@ -1862,6 +1862,7 @@ static void rswitch_device_free(struct rswitch_private *priv, unsigned int index + + rswitch_txdmac_free(ndev); + rswitch_rxdmac_free(ndev); ++ of_node_put(rdev->np_port); + netif_napi_del(&rdev->napi); + free_netdev(ndev); + } +-- +2.39.5 + diff --git a/queue-6.6/net-renesas-rswitch-fix-initial-mpic-register-settin.patch b/queue-6.6/net-renesas-rswitch-fix-initial-mpic-register-settin.patch new file mode 100644 index 00000000000..98e4a03fd0f --- /dev/null +++ b/queue-6.6/net-renesas-rswitch-fix-initial-mpic-register-settin.patch @@ -0,0 +1,104 @@ +From e4a608454cb3284a7f7a3cc65d7f01e318ab2173 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Dec 2024 10:30:12 +0500 +Subject: net: renesas: rswitch: fix initial MPIC register setting + +From: Nikita Yushchenko + +[ Upstream commit fb9e6039c325cc205a368046dc03c56c87df2310 ] + +MPIC.PIS must be set per phy interface type. +MPIC.LSC must be set per speed. + +Do that strictly per datasheet, instead of hardcoding MPIC.PIS to GMII. + +Fixes: 3590918b5d07 ("net: ethernet: renesas: Add support for "Ethernet Switch"") +Signed-off-by: Nikita Yushchenko +Reviewed-by: Michal Swiatkowski +Link: https://patch.msgid.link/20241211053012.368914-1-nikita.yoush@cogentembedded.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/renesas/rswitch.c | 27 ++++++++++++++++++++------ + drivers/net/ethernet/renesas/rswitch.h | 14 ++++++------- + 2 files changed, 28 insertions(+), 13 deletions(-) + +diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c +index 4dd218b6f308..3665258cec31 100644 +--- a/drivers/net/ethernet/renesas/rswitch.c ++++ b/drivers/net/ethernet/renesas/rswitch.c +@@ -1047,25 +1047,40 @@ static int rswitch_etha_wait_link_verification(struct rswitch_etha *etha) + + static void rswitch_rmac_setting(struct rswitch_etha *etha, const u8 *mac) + { +- u32 val; ++ u32 pis, lsc; + + rswitch_etha_write_mac_address(etha, mac); + ++ switch (etha->phy_interface) { ++ case PHY_INTERFACE_MODE_SGMII: ++ pis = MPIC_PIS_GMII; ++ break; ++ case PHY_INTERFACE_MODE_USXGMII: ++ case PHY_INTERFACE_MODE_5GBASER: ++ pis = MPIC_PIS_XGMII; ++ break; ++ default: ++ pis = FIELD_GET(MPIC_PIS, ioread32(etha->addr + MPIC)); ++ break; ++ } ++ + switch (etha->speed) { + case 100: +- val = MPIC_LSC_100M; ++ lsc = MPIC_LSC_100M; + break; + case 1000: +- val = MPIC_LSC_1G; ++ lsc = MPIC_LSC_1G; + break; + case 2500: +- val = MPIC_LSC_2_5G; ++ lsc = MPIC_LSC_2_5G; + break; + default: +- return; ++ lsc = FIELD_GET(MPIC_LSC, ioread32(etha->addr + MPIC)); ++ break; + } + +- iowrite32(MPIC_PIS_GMII | val, etha->addr + MPIC); ++ rswitch_modify(etha->addr, MPIC, MPIC_PIS | MPIC_LSC, ++ FIELD_PREP(MPIC_PIS, pis) | FIELD_PREP(MPIC_LSC, lsc)); + } + + static void rswitch_etha_enable_mii(struct rswitch_etha *etha) +diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h +index 327873b637d7..f2d1cd47187d 100644 +--- a/drivers/net/ethernet/renesas/rswitch.h ++++ b/drivers/net/ethernet/renesas/rswitch.h +@@ -723,13 +723,13 @@ enum rswitch_etha_mode { + + #define EAVCC_VEM_SC_TAG (0x3 << 16) + +-#define MPIC_PIS_MII 0x00 +-#define MPIC_PIS_GMII 0x02 +-#define MPIC_PIS_XGMII 0x04 +-#define MPIC_LSC_SHIFT 3 +-#define MPIC_LSC_100M (1 << MPIC_LSC_SHIFT) +-#define MPIC_LSC_1G (2 << MPIC_LSC_SHIFT) +-#define MPIC_LSC_2_5G (3 << MPIC_LSC_SHIFT) ++#define MPIC_PIS GENMASK(2, 0) ++#define MPIC_PIS_GMII 2 ++#define MPIC_PIS_XGMII 4 ++#define MPIC_LSC GENMASK(5, 3) ++#define MPIC_LSC_100M 1 ++#define MPIC_LSC_1G 2 ++#define MPIC_LSC_2_5G 3 + + #define MDIO_READ_C45 0x03 + #define MDIO_WRITE_C45 0x01 +-- +2.39.5 + diff --git a/queue-6.6/net-renesas-rswitch-fix-leaked-pointer-on-error-path.patch b/queue-6.6/net-renesas-rswitch-fix-leaked-pointer-on-error-path.patch new file mode 100644 index 00000000000..4a2cd60a3c0 --- /dev/null +++ b/queue-6.6/net-renesas-rswitch-fix-leaked-pointer-on-error-path.patch @@ -0,0 +1,44 @@ +From 59d8683b570ffc9ce74f275cd7bd824bb2cd16de Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 8 Dec 2024 14:50:03 +0500 +Subject: net: renesas: rswitch: fix leaked pointer on error path + +From: Nikita Yushchenko + +[ Upstream commit bb617328bafa1023d8e9c25a25345a564c66c14f ] + +If error path is taken while filling descriptor for a frame, skb +pointer is left in the entry. Later, on the ring entry reuse, the +same entry could be used as a part of a multi-descriptor frame, +and skb for that new frame could be stored in a different entry. + +Then, the stale pointer will reach the completion routine, and passed +to the release operation. + +Fix that by clearing the saved skb pointer at the error path. + +Fixes: d2c96b9d5f83 ("net: rswitch: Add jumbo frames handling for TX") +Signed-off-by: Nikita Yushchenko +Reviewed-by: Yoshihiro Shimoda +Link: https://patch.msgid.link/20241208095004.69468-4-nikita.yoush@cogentembedded.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/renesas/rswitch.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c +index 13577fe2c7ec..b1432ca79f1e 100644 +--- a/drivers/net/ethernet/renesas/rswitch.c ++++ b/drivers/net/ethernet/renesas/rswitch.c +@@ -1631,6 +1631,7 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd + return ret; + + err_unmap: ++ gq->skbs[(gq->cur + nr_desc - 1) % gq->ring_size] = NULL; + dma_unmap_single(ndev->dev.parent, dma_addr_orig, skb->len, DMA_TO_DEVICE); + + err_kfree: +-- +2.39.5 + diff --git a/queue-6.6/net-renesas-rswitch-fix-race-window-between-tx-start.patch b/queue-6.6/net-renesas-rswitch-fix-race-window-between-tx-start.patch new file mode 100644 index 00000000000..0252e008711 --- /dev/null +++ b/queue-6.6/net-renesas-rswitch-fix-race-window-between-tx-start.patch @@ -0,0 +1,84 @@ +From 29bd965168a378b251abfaf63610edebb5026cd8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 8 Dec 2024 14:50:02 +0500 +Subject: net: renesas: rswitch: fix race window between tx start and complete + +From: Nikita Yushchenko + +[ Upstream commit 0c9547e6ccf40455b0574cf589be3b152a3edf5b ] + +If hardware is already transmitting, it can start handling the +descriptor being written to immediately after it observes updated DT +field, before the queue is kicked by a write to GWTRC. + +If the start_xmit() execution is preempted at unfortunate moment, this +transmission can complete, and interrupt handled, before gq->cur gets +updated. With the current implementation of completion, this will cause +the last entry not completed. + +Fix that by changing completion loop to check DT values directly, instead +of depending on gq->cur. + +Fixes: 3590918b5d07 ("net: ethernet: renesas: Add support for "Ethernet Switch"") +Signed-off-by: Nikita Yushchenko +Reviewed-by: Yoshihiro Shimoda +Link: https://patch.msgid.link/20241208095004.69468-3-nikita.yoush@cogentembedded.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/renesas/rswitch.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c +index 17be2479654a..13577fe2c7ec 100644 +--- a/drivers/net/ethernet/renesas/rswitch.c ++++ b/drivers/net/ethernet/renesas/rswitch.c +@@ -793,13 +793,10 @@ static void rswitch_tx_free(struct net_device *ndev) + struct rswitch_ext_desc *desc; + struct sk_buff *skb; + +- for (; rswitch_get_num_cur_queues(gq) > 0; +- gq->dirty = rswitch_next_queue_index(gq, false, 1)) { +- desc = &gq->tx_ring[gq->dirty]; +- if ((desc->desc.die_dt & DT_MASK) != DT_FEMPTY) +- break; +- ++ desc = &gq->tx_ring[gq->dirty]; ++ while ((desc->desc.die_dt & DT_MASK) == DT_FEMPTY) { + dma_rmb(); ++ + skb = gq->skbs[gq->dirty]; + if (skb) { + dma_unmap_single(ndev->dev.parent, +@@ -810,7 +807,10 @@ static void rswitch_tx_free(struct net_device *ndev) + rdev->ndev->stats.tx_packets++; + rdev->ndev->stats.tx_bytes += skb->len; + } ++ + desc->desc.die_dt = DT_EEMPTY; ++ gq->dirty = rswitch_next_queue_index(gq, false, 1); ++ desc = &gq->tx_ring[gq->dirty]; + } + } + +@@ -1613,6 +1613,8 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd + gq->skbs[gq->cur] = skb; + gq->unmap_addrs[gq->cur] = dma_addr_orig; + ++ dma_wmb(); ++ + /* DT_FSTART should be set at last. So, this is reverse order. */ + for (i = nr_desc; i-- > 0; ) { + desc = &gq->tx_ring[rswitch_next_queue_index(gq, true, i)]; +@@ -1623,8 +1625,6 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd + goto err_unmap; + } + +- wmb(); /* gq->cur must be incremented after die_dt was set */ +- + gq->cur = rswitch_next_queue_index(gq, true, nr_desc); + rswitch_modify(rdev->addr, GWTRC(gq->index), 0, BIT(gq->index % 32)); + +-- +2.39.5 + diff --git a/queue-6.6/net-renesas-rswitch-handle-stop-vs-interrupt-race.patch b/queue-6.6/net-renesas-rswitch-handle-stop-vs-interrupt-race.patch new file mode 100644 index 00000000000..2f90fea0e18 --- /dev/null +++ b/queue-6.6/net-renesas-rswitch-handle-stop-vs-interrupt-race.patch @@ -0,0 +1,119 @@ +From c812182cd714aba4780bd9043f04ce9632ef7203 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 9 Dec 2024 16:32:04 +0500 +Subject: net: renesas: rswitch: handle stop vs interrupt race + +From: Nikita Yushchenko + +[ Upstream commit 3dd002f20098b9569f8fd7f8703f364571e2e975 ] + +Currently the stop routine of rswitch driver does not immediately +prevent hardware from continuing to update descriptors and requesting +interrupts. + +It can happen that when rswitch_stop() executes the masking of +interrupts from the queues of the port being closed, napi poll for +that port is already scheduled or running on a different CPU. When +execution of this napi poll completes, it will unmask the interrupts. +And unmasked interrupt can fire after rswitch_stop() returns from +napi_disable() call. Then, the handler won't mask it, because +napi_schedule_prep() will return false, and interrupt storm will +happen. + +This can't be fixed by making rswitch_stop() call napi_disable() before +masking interrupts. In this case, the interrupt storm will happen if +interrupt fires between napi_disable() and masking. + +Fix this by checking for priv->opened_ports bit when unmasking +interrupts after napi poll. For that to be consistent, move +priv->opened_ports changes into spinlock-protected areas, and reorder +other operations in rswitch_open() and rswitch_stop() accordingly. + +Signed-off-by: Nikita Yushchenko +Reviewed-by: Yoshihiro Shimoda +Fixes: 3590918b5d07 ("net: ethernet: renesas: Add support for "Ethernet Switch"") +Link: https://patch.msgid.link/20241209113204.175015-1-nikita.yoush@cogentembedded.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/renesas/rswitch.c | 33 ++++++++++++++------------ + 1 file changed, 18 insertions(+), 15 deletions(-) + +diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c +index d04a79ece698..4dd218b6f308 100644 +--- a/drivers/net/ethernet/renesas/rswitch.c ++++ b/drivers/net/ethernet/renesas/rswitch.c +@@ -839,8 +839,10 @@ static int rswitch_poll(struct napi_struct *napi, int budget) + + if (napi_complete_done(napi, budget - quota)) { + spin_lock_irqsave(&priv->lock, flags); +- rswitch_enadis_data_irq(priv, rdev->tx_queue->index, true); +- rswitch_enadis_data_irq(priv, rdev->rx_queue->index, true); ++ if (test_bit(rdev->port, priv->opened_ports)) { ++ rswitch_enadis_data_irq(priv, rdev->tx_queue->index, true); ++ rswitch_enadis_data_irq(priv, rdev->rx_queue->index, true); ++ } + spin_unlock_irqrestore(&priv->lock, flags); + } + +@@ -1467,20 +1469,20 @@ static int rswitch_open(struct net_device *ndev) + struct rswitch_device *rdev = netdev_priv(ndev); + unsigned long flags; + +- phy_start(ndev->phydev); ++ if (bitmap_empty(rdev->priv->opened_ports, RSWITCH_NUM_PORTS)) ++ iowrite32(GWCA_TS_IRQ_BIT, rdev->priv->addr + GWTSDIE); + + napi_enable(&rdev->napi); +- netif_start_queue(ndev); + + spin_lock_irqsave(&rdev->priv->lock, flags); ++ bitmap_set(rdev->priv->opened_ports, rdev->port, 1); + rswitch_enadis_data_irq(rdev->priv, rdev->tx_queue->index, true); + rswitch_enadis_data_irq(rdev->priv, rdev->rx_queue->index, true); + spin_unlock_irqrestore(&rdev->priv->lock, flags); + +- if (bitmap_empty(rdev->priv->opened_ports, RSWITCH_NUM_PORTS)) +- iowrite32(GWCA_TS_IRQ_BIT, rdev->priv->addr + GWTSDIE); ++ phy_start(ndev->phydev); + +- bitmap_set(rdev->priv->opened_ports, rdev->port, 1); ++ netif_start_queue(ndev); + + return 0; + }; +@@ -1492,7 +1494,16 @@ static int rswitch_stop(struct net_device *ndev) + unsigned long flags; + + netif_tx_stop_all_queues(ndev); ++ ++ phy_stop(ndev->phydev); ++ ++ spin_lock_irqsave(&rdev->priv->lock, flags); ++ rswitch_enadis_data_irq(rdev->priv, rdev->tx_queue->index, false); ++ rswitch_enadis_data_irq(rdev->priv, rdev->rx_queue->index, false); + bitmap_clear(rdev->priv->opened_ports, rdev->port, 1); ++ spin_unlock_irqrestore(&rdev->priv->lock, flags); ++ ++ napi_disable(&rdev->napi); + + if (bitmap_empty(rdev->priv->opened_ports, RSWITCH_NUM_PORTS)) + iowrite32(GWCA_TS_IRQ_BIT, rdev->priv->addr + GWTSDID); +@@ -1505,14 +1516,6 @@ static int rswitch_stop(struct net_device *ndev) + kfree(ts_info); + } + +- spin_lock_irqsave(&rdev->priv->lock, flags); +- rswitch_enadis_data_irq(rdev->priv, rdev->tx_queue->index, false); +- rswitch_enadis_data_irq(rdev->priv, rdev->rx_queue->index, false); +- spin_unlock_irqrestore(&rdev->priv->lock, flags); +- +- phy_stop(ndev->phydev); +- napi_disable(&rdev->napi); +- + return 0; + }; + +-- +2.39.5 + diff --git a/queue-6.6/net-rswitch-add-a-setting-ext-descriptor-function.patch b/queue-6.6/net-rswitch-add-a-setting-ext-descriptor-function.patch new file mode 100644 index 00000000000..ce92a7e57fe --- /dev/null +++ b/queue-6.6/net-rswitch-add-a-setting-ext-descriptor-function.patch @@ -0,0 +1,116 @@ +From 9cff1ad3476558d3baad92a01ff35c1af191de8a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Dec 2023 13:10:26 +0900 +Subject: net: rswitch: Add a setting ext descriptor function + +From: Yoshihiro Shimoda + +[ Upstream commit fcff581ee43078cf23216aa7079012e935a6a078 ] + +If the driver would like to transmit a jumbo frame like 2KiB or more, +it should be split into multiple queues. In the near future, to support +this, add a setting ext descriptor function to improve code readability. + +Signed-off-by: Yoshihiro Shimoda +Signed-off-by: David S. Miller +Stable-dep-of: 0c9547e6ccf4 ("net: renesas: rswitch: fix race window between tx start and complete") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/renesas/rswitch.c | 73 +++++++++++++++++--------- + 1 file changed, 47 insertions(+), 26 deletions(-) + +diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c +index b008a44ea6ac..c01b4bd3f812 100644 +--- a/drivers/net/ethernet/renesas/rswitch.c ++++ b/drivers/net/ethernet/renesas/rswitch.c +@@ -1516,6 +1516,51 @@ static int rswitch_stop(struct net_device *ndev) + return 0; + }; + ++static bool rswitch_ext_desc_set_info1(struct rswitch_device *rdev, ++ struct sk_buff *skb, ++ struct rswitch_ext_desc *desc) ++{ ++ desc->info1 = cpu_to_le64(INFO1_DV(BIT(rdev->etha->index)) | ++ INFO1_IPV(GWCA_IPV_NUM) | INFO1_FMT); ++ if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) { ++ struct rswitch_gwca_ts_info *ts_info; ++ ++ ts_info = kzalloc(sizeof(*ts_info), GFP_ATOMIC); ++ if (!ts_info) ++ return false; ++ ++ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; ++ rdev->ts_tag++; ++ desc->info1 |= cpu_to_le64(INFO1_TSUN(rdev->ts_tag) | INFO1_TXC); ++ ++ ts_info->skb = skb_get(skb); ++ ts_info->port = rdev->port; ++ ts_info->tag = rdev->ts_tag; ++ list_add_tail(&ts_info->list, &rdev->priv->gwca.ts_info_list); ++ ++ skb_tx_timestamp(skb); ++ } ++ ++ return true; ++} ++ ++static bool rswitch_ext_desc_set(struct rswitch_device *rdev, ++ struct sk_buff *skb, ++ struct rswitch_ext_desc *desc, ++ dma_addr_t dma_addr, u16 len, u8 die_dt) ++{ ++ rswitch_desc_set_dptr(&desc->desc, dma_addr); ++ desc->desc.info_ds = cpu_to_le16(len); ++ if (!rswitch_ext_desc_set_info1(rdev, skb, desc)) ++ return false; ++ ++ dma_wmb(); ++ ++ desc->desc.die_dt = die_dt; ++ ++ return true; ++} ++ + static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *ndev) + { + struct rswitch_device *rdev = netdev_priv(ndev); +@@ -1539,33 +1584,9 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd + gq->skbs[gq->cur] = skb; + gq->unmap_addrs[gq->cur] = dma_addr; + desc = &gq->tx_ring[gq->cur]; +- rswitch_desc_set_dptr(&desc->desc, dma_addr); +- desc->desc.info_ds = cpu_to_le16(skb->len); +- +- desc->info1 = cpu_to_le64(INFO1_DV(BIT(rdev->etha->index)) | +- INFO1_IPV(GWCA_IPV_NUM) | INFO1_FMT); +- if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) { +- struct rswitch_gwca_ts_info *ts_info; +- +- ts_info = kzalloc(sizeof(*ts_info), GFP_ATOMIC); +- if (!ts_info) +- goto err_unmap; +- +- skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; +- rdev->ts_tag++; +- desc->info1 |= cpu_to_le64(INFO1_TSUN(rdev->ts_tag) | INFO1_TXC); +- +- ts_info->skb = skb_get(skb); +- ts_info->port = rdev->port; +- ts_info->tag = rdev->ts_tag; +- list_add_tail(&ts_info->list, &rdev->priv->gwca.ts_info_list); +- +- skb_tx_timestamp(skb); +- } +- +- dma_wmb(); ++ if (!rswitch_ext_desc_set(rdev, skb, desc, dma_addr, skb->len, DT_FSINGLE | DIE)) ++ goto err_unmap; + +- desc->desc.die_dt = DT_FSINGLE | DIE; + wmb(); /* gq->cur must be incremented after die_dt was set */ + + gq->cur = rswitch_next_queue_index(gq, true, 1); +-- +2.39.5 + diff --git a/queue-6.6/net-rswitch-add-jumbo-frames-handling-for-tx.patch b/queue-6.6/net-rswitch-add-jumbo-frames-handling-for-tx.patch new file mode 100644 index 00000000000..8e48d20da1f --- /dev/null +++ b/queue-6.6/net-rswitch-add-jumbo-frames-handling-for-tx.patch @@ -0,0 +1,118 @@ +From 8f3edfe945df59ffab00cc32bdc8210b3ba7b63e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Dec 2023 13:10:29 +0900 +Subject: net: rswitch: Add jumbo frames handling for TX + +From: Yoshihiro Shimoda + +[ Upstream commit d2c96b9d5f83e4327cf044d00d7f713edd7fecfd ] + +If the driver would like to transmit a jumbo frame like 2KiB or more, +it should be split into multiple queues. In the near future, to support +this, add handling specific descriptor types F{START,MID,END}. However, +such jumbo frames will not happen yet because the maximum MTU size is +still default for now. + +Signed-off-by: Yoshihiro Shimoda +Signed-off-by: David S. Miller +Stable-dep-of: 0c9547e6ccf4 ("net: renesas: rswitch: fix race window between tx start and complete") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/renesas/rswitch.c | 56 +++++++++++++++++++++----- + 1 file changed, 46 insertions(+), 10 deletions(-) + +diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c +index c01b4bd3f812..17be2479654a 100644 +--- a/drivers/net/ethernet/renesas/rswitch.c ++++ b/drivers/net/ethernet/renesas/rswitch.c +@@ -1561,15 +1561,44 @@ static bool rswitch_ext_desc_set(struct rswitch_device *rdev, + return true; + } + ++static u8 rswitch_ext_desc_get_die_dt(unsigned int nr_desc, unsigned int index) ++{ ++ if (nr_desc == 1) ++ return DT_FSINGLE | DIE; ++ if (index == 0) ++ return DT_FSTART; ++ if (nr_desc - 1 == index) ++ return DT_FEND | DIE; ++ return DT_FMID; ++} ++ ++static u16 rswitch_ext_desc_get_len(u8 die_dt, unsigned int orig_len) ++{ ++ switch (die_dt & DT_MASK) { ++ case DT_FSINGLE: ++ case DT_FEND: ++ return (orig_len % RSWITCH_DESC_BUF_SIZE) ?: RSWITCH_DESC_BUF_SIZE; ++ case DT_FSTART: ++ case DT_FMID: ++ return RSWITCH_DESC_BUF_SIZE; ++ default: ++ return 0; ++ } ++} ++ + static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *ndev) + { + struct rswitch_device *rdev = netdev_priv(ndev); + struct rswitch_gwca_queue *gq = rdev->tx_queue; ++ dma_addr_t dma_addr, dma_addr_orig; + netdev_tx_t ret = NETDEV_TX_OK; + struct rswitch_ext_desc *desc; +- dma_addr_t dma_addr; ++ unsigned int i, nr_desc; ++ u8 die_dt; ++ u16 len; + +- if (rswitch_get_num_cur_queues(gq) >= gq->ring_size - 1) { ++ nr_desc = (skb->len - 1) / RSWITCH_DESC_BUF_SIZE + 1; ++ if (rswitch_get_num_cur_queues(gq) >= gq->ring_size - nr_desc) { + netif_stop_subqueue(ndev, 0); + return NETDEV_TX_BUSY; + } +@@ -1577,25 +1606,32 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd + if (skb_put_padto(skb, ETH_ZLEN)) + return ret; + +- dma_addr = dma_map_single(ndev->dev.parent, skb->data, skb->len, DMA_TO_DEVICE); +- if (dma_mapping_error(ndev->dev.parent, dma_addr)) ++ dma_addr_orig = dma_map_single(ndev->dev.parent, skb->data, skb->len, DMA_TO_DEVICE); ++ if (dma_mapping_error(ndev->dev.parent, dma_addr_orig)) + goto err_kfree; + + gq->skbs[gq->cur] = skb; +- gq->unmap_addrs[gq->cur] = dma_addr; +- desc = &gq->tx_ring[gq->cur]; +- if (!rswitch_ext_desc_set(rdev, skb, desc, dma_addr, skb->len, DT_FSINGLE | DIE)) +- goto err_unmap; ++ gq->unmap_addrs[gq->cur] = dma_addr_orig; ++ ++ /* DT_FSTART should be set at last. So, this is reverse order. */ ++ for (i = nr_desc; i-- > 0; ) { ++ desc = &gq->tx_ring[rswitch_next_queue_index(gq, true, i)]; ++ die_dt = rswitch_ext_desc_get_die_dt(nr_desc, i); ++ dma_addr = dma_addr_orig + i * RSWITCH_DESC_BUF_SIZE; ++ len = rswitch_ext_desc_get_len(die_dt, skb->len); ++ if (!rswitch_ext_desc_set(rdev, skb, desc, dma_addr, len, die_dt)) ++ goto err_unmap; ++ } + + wmb(); /* gq->cur must be incremented after die_dt was set */ + +- gq->cur = rswitch_next_queue_index(gq, true, 1); ++ gq->cur = rswitch_next_queue_index(gq, true, nr_desc); + rswitch_modify(rdev->addr, GWTRC(gq->index), 0, BIT(gq->index % 32)); + + return ret; + + err_unmap: +- dma_unmap_single(ndev->dev.parent, dma_addr, skb->len, DMA_TO_DEVICE); ++ dma_unmap_single(ndev->dev.parent, dma_addr_orig, skb->len, DMA_TO_DEVICE); + + err_kfree: + dev_kfree_skb_any(skb); +-- +2.39.5 + diff --git a/queue-6.6/net-rswitch-add-unmap_addrs-instead-of-dma-address-i.patch b/queue-6.6/net-rswitch-add-unmap_addrs-instead-of-dma-address-i.patch new file mode 100644 index 00000000000..6080ca36c7e --- /dev/null +++ b/queue-6.6/net-rswitch-add-unmap_addrs-instead-of-dma-address-i.patch @@ -0,0 +1,104 @@ +From 807a0b9430c0d90bb5bc04e7675a8042189ba58c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Dec 2023 13:10:25 +0900 +Subject: net: rswitch: Add unmap_addrs instead of dma address in each desc + +From: Yoshihiro Shimoda + +[ Upstream commit 271e015b91535dd87fd0f5df0cc3b906c2eddef9 ] + +If the driver would like to transmit a jumbo frame like 2KiB or more, +it should be split into multiple queues. In the near future, to support +this, add unmap_addrs array to unmap dma mapping address instead of dma +address in each TX descriptor because the descriptors may not have +the top dma address. + +Signed-off-by: Yoshihiro Shimoda +Signed-off-by: David S. Miller +Stable-dep-of: 0c9547e6ccf4 ("net: renesas: rswitch: fix race window between tx start and complete") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/renesas/rswitch.c | 19 +++++++++++-------- + drivers/net/ethernet/renesas/rswitch.h | 1 + + 2 files changed, 12 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c +index 989cfc86098f..b008a44ea6ac 100644 +--- a/drivers/net/ethernet/renesas/rswitch.c ++++ b/drivers/net/ethernet/renesas/rswitch.c +@@ -283,6 +283,8 @@ static void rswitch_gwca_queue_free(struct net_device *ndev, + gq->tx_ring = NULL; + kfree(gq->skbs); + gq->skbs = NULL; ++ kfree(gq->unmap_addrs); ++ gq->unmap_addrs = NULL; + } + } + +@@ -321,6 +323,9 @@ static int rswitch_gwca_queue_alloc(struct net_device *ndev, + gq->skbs = kcalloc(gq->ring_size, sizeof(*gq->skbs), GFP_KERNEL); + if (!gq->skbs) + return -ENOMEM; ++ gq->unmap_addrs = kcalloc(gq->ring_size, sizeof(*gq->unmap_addrs), GFP_KERNEL); ++ if (!gq->unmap_addrs) ++ goto out; + gq->tx_ring = dma_alloc_coherent(ndev->dev.parent, + sizeof(struct rswitch_ext_desc) * + (gq->ring_size + 1), &gq->ring_dma, GFP_KERNEL); +@@ -786,9 +791,7 @@ static void rswitch_tx_free(struct net_device *ndev) + struct rswitch_device *rdev = netdev_priv(ndev); + struct rswitch_gwca_queue *gq = rdev->tx_queue; + struct rswitch_ext_desc *desc; +- dma_addr_t dma_addr; + struct sk_buff *skb; +- unsigned int size; + + for (; rswitch_get_num_cur_queues(gq) > 0; + gq->dirty = rswitch_next_queue_index(gq, false, 1)) { +@@ -797,18 +800,17 @@ static void rswitch_tx_free(struct net_device *ndev) + break; + + dma_rmb(); +- size = le16_to_cpu(desc->desc.info_ds) & TX_DS; + skb = gq->skbs[gq->dirty]; + if (skb) { +- dma_addr = rswitch_desc_get_dptr(&desc->desc); +- dma_unmap_single(ndev->dev.parent, dma_addr, +- size, DMA_TO_DEVICE); ++ dma_unmap_single(ndev->dev.parent, ++ gq->unmap_addrs[gq->dirty], ++ skb->len, DMA_TO_DEVICE); + dev_kfree_skb_any(gq->skbs[gq->dirty]); + gq->skbs[gq->dirty] = NULL; ++ rdev->ndev->stats.tx_packets++; ++ rdev->ndev->stats.tx_bytes += skb->len; + } + desc->desc.die_dt = DT_EEMPTY; +- rdev->ndev->stats.tx_packets++; +- rdev->ndev->stats.tx_bytes += size; + } + } + +@@ -1535,6 +1537,7 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd + goto err_kfree; + + gq->skbs[gq->cur] = skb; ++ gq->unmap_addrs[gq->cur] = dma_addr; + desc = &gq->tx_ring[gq->cur]; + rswitch_desc_set_dptr(&desc->desc, dma_addr); + desc->desc.info_ds = cpu_to_le16(skb->len); +diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h +index e62c28a442b9..327873b637d7 100644 +--- a/drivers/net/ethernet/renesas/rswitch.h ++++ b/drivers/net/ethernet/renesas/rswitch.h +@@ -956,6 +956,7 @@ struct rswitch_gwca_queue { + /* For TX */ + struct { + struct sk_buff **skbs; ++ dma_addr_t *unmap_addrs; + }; + /* For RX */ + struct { +-- +2.39.5 + diff --git a/queue-6.6/net-rswitch-drop-unused-argument-return-value.patch b/queue-6.6/net-rswitch-drop-unused-argument-return-value.patch new file mode 100644 index 00000000000..d79c164d246 --- /dev/null +++ b/queue-6.6/net-rswitch-drop-unused-argument-return-value.patch @@ -0,0 +1,75 @@ +From d198441b2c25ebce634e8f048ba4ce8c85d94d27 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Dec 2023 13:10:22 +0900 +Subject: net: rswitch: Drop unused argument/return value + +From: Yoshihiro Shimoda + +[ Upstream commit c7e0022390d43788f63c7021ad441c1f8d9acf5f ] + +Drop unused argument and return value of rswitch_tx_free() to +simplify the code. + +Signed-off-by: Yoshihiro Shimoda +Reviewed-by: Geert Uytterhoeven +Signed-off-by: David S. Miller +Stable-dep-of: 0c9547e6ccf4 ("net: renesas: rswitch: fix race window between tx start and complete") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/renesas/rswitch.c | 10 +++------- + 1 file changed, 3 insertions(+), 7 deletions(-) + +diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c +index ae9d8722b76f..b783516eb9e2 100644 +--- a/drivers/net/ethernet/renesas/rswitch.c ++++ b/drivers/net/ethernet/renesas/rswitch.c +@@ -760,20 +760,19 @@ static bool rswitch_rx(struct net_device *ndev, int *quota) + return 0; + } + +-static int rswitch_tx_free(struct net_device *ndev, bool free_txed_only) ++static void rswitch_tx_free(struct net_device *ndev) + { + struct rswitch_device *rdev = netdev_priv(ndev); + struct rswitch_gwca_queue *gq = rdev->tx_queue; + struct rswitch_ext_desc *desc; + dma_addr_t dma_addr; + struct sk_buff *skb; +- int free_num = 0; + int size; + + for (; rswitch_get_num_cur_queues(gq) > 0; + gq->dirty = rswitch_next_queue_index(gq, false, 1)) { + desc = &gq->tx_ring[gq->dirty]; +- if (free_txed_only && (desc->desc.die_dt & DT_MASK) != DT_FEMPTY) ++ if ((desc->desc.die_dt & DT_MASK) != DT_FEMPTY) + break; + + dma_rmb(); +@@ -785,14 +784,11 @@ static int rswitch_tx_free(struct net_device *ndev, bool free_txed_only) + size, DMA_TO_DEVICE); + dev_kfree_skb_any(gq->skbs[gq->dirty]); + gq->skbs[gq->dirty] = NULL; +- free_num++; + } + desc->desc.die_dt = DT_EEMPTY; + rdev->ndev->stats.tx_packets++; + rdev->ndev->stats.tx_bytes += size; + } +- +- return free_num; + } + + static int rswitch_poll(struct napi_struct *napi, int budget) +@@ -807,7 +803,7 @@ static int rswitch_poll(struct napi_struct *napi, int budget) + priv = rdev->priv; + + retry: +- rswitch_tx_free(ndev, true); ++ rswitch_tx_free(ndev); + + if (rswitch_rx(ndev, "a)) + goto out; +-- +2.39.5 + diff --git a/queue-6.6/net-rswitch-use-build_skb-for-rx.patch b/queue-6.6/net-rswitch-use-build_skb-for-rx.patch new file mode 100644 index 00000000000..322b0de3e88 --- /dev/null +++ b/queue-6.6/net-rswitch-use-build_skb-for-rx.patch @@ -0,0 +1,250 @@ +From 7a959a2790401d484b0421fa2fb3b82834c92ba6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Dec 2023 13:10:24 +0900 +Subject: net: rswitch: Use build_skb() for RX + +From: Yoshihiro Shimoda + +[ Upstream commit 6a203cb5165d2257e8d54193b69afdb480a17f6f ] + +If this hardware receives a jumbo frame like 2KiB or more, it will be +split into multiple queues. In the near future, to support this, use +build_skb() instead of netdev_alloc_skb_ip_align(). + +Signed-off-by: Yoshihiro Shimoda +Signed-off-by: David S. Miller +Stable-dep-of: 0c9547e6ccf4 ("net: renesas: rswitch: fix race window between tx start and complete") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/renesas/rswitch.c | 73 +++++++++++++++----------- + drivers/net/ethernet/renesas/rswitch.h | 19 ++++++- + 2 files changed, 59 insertions(+), 33 deletions(-) + +diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c +index d10af779ee89..989cfc86098f 100644 +--- a/drivers/net/ethernet/renesas/rswitch.c ++++ b/drivers/net/ethernet/renesas/rswitch.c +@@ -234,19 +234,18 @@ static bool rswitch_is_queue_rxed(struct rswitch_gwca_queue *gq) + return false; + } + +-static int rswitch_gwca_queue_alloc_skb(struct rswitch_gwca_queue *gq, +- unsigned int start_index, +- unsigned int num) ++static int rswitch_gwca_queue_alloc_rx_buf(struct rswitch_gwca_queue *gq, ++ unsigned int start_index, ++ unsigned int num) + { + unsigned int i, index; + + for (i = 0; i < num; i++) { + index = (i + start_index) % gq->ring_size; +- if (gq->skbs[index]) ++ if (gq->rx_bufs[index]) + continue; +- gq->skbs[index] = netdev_alloc_skb_ip_align(gq->ndev, +- PKT_BUF_SZ + RSWITCH_ALIGN - 1); +- if (!gq->skbs[index]) ++ gq->rx_bufs[index] = netdev_alloc_frag(RSWITCH_BUF_SIZE); ++ if (!gq->rx_bufs[index]) + goto err; + } + +@@ -255,8 +254,8 @@ static int rswitch_gwca_queue_alloc_skb(struct rswitch_gwca_queue *gq, + err: + for (; i-- > 0; ) { + index = (i + start_index) % gq->ring_size; +- dev_kfree_skb(gq->skbs[index]); +- gq->skbs[index] = NULL; ++ skb_free_frag(gq->rx_bufs[index]); ++ gq->rx_bufs[index] = NULL; + } + + return -ENOMEM; +@@ -274,16 +273,17 @@ static void rswitch_gwca_queue_free(struct net_device *ndev, + gq->rx_ring = NULL; + + for (i = 0; i < gq->ring_size; i++) +- dev_kfree_skb(gq->skbs[i]); ++ skb_free_frag(gq->rx_bufs[i]); ++ kfree(gq->rx_bufs); ++ gq->rx_bufs = NULL; + } else { + dma_free_coherent(ndev->dev.parent, + sizeof(struct rswitch_ext_desc) * + (gq->ring_size + 1), gq->tx_ring, gq->ring_dma); + gq->tx_ring = NULL; ++ kfree(gq->skbs); ++ gq->skbs = NULL; + } +- +- kfree(gq->skbs); +- gq->skbs = NULL; + } + + static void rswitch_gwca_ts_queue_free(struct rswitch_private *priv) +@@ -307,17 +307,20 @@ static int rswitch_gwca_queue_alloc(struct net_device *ndev, + gq->ring_size = ring_size; + gq->ndev = ndev; + +- gq->skbs = kcalloc(gq->ring_size, sizeof(*gq->skbs), GFP_KERNEL); +- if (!gq->skbs) +- return -ENOMEM; +- + if (!dir_tx) { +- rswitch_gwca_queue_alloc_skb(gq, 0, gq->ring_size); ++ gq->rx_bufs = kcalloc(gq->ring_size, sizeof(*gq->rx_bufs), GFP_KERNEL); ++ if (!gq->rx_bufs) ++ return -ENOMEM; ++ if (rswitch_gwca_queue_alloc_rx_buf(gq, 0, gq->ring_size) < 0) ++ goto out; + + gq->rx_ring = dma_alloc_coherent(ndev->dev.parent, + sizeof(struct rswitch_ext_ts_desc) * + (gq->ring_size + 1), &gq->ring_dma, GFP_KERNEL); + } else { ++ gq->skbs = kcalloc(gq->ring_size, sizeof(*gq->skbs), GFP_KERNEL); ++ if (!gq->skbs) ++ return -ENOMEM; + gq->tx_ring = dma_alloc_coherent(ndev->dev.parent, + sizeof(struct rswitch_ext_desc) * + (gq->ring_size + 1), &gq->ring_dma, GFP_KERNEL); +@@ -366,12 +369,13 @@ static int rswitch_gwca_queue_format(struct net_device *ndev, + for (i = 0, desc = gq->tx_ring; i < gq->ring_size; i++, desc++) { + if (!gq->dir_tx) { + dma_addr = dma_map_single(ndev->dev.parent, +- gq->skbs[i]->data, PKT_BUF_SZ, ++ gq->rx_bufs[i] + RSWITCH_HEADROOM, ++ RSWITCH_MAP_BUF_SIZE, + DMA_FROM_DEVICE); + if (dma_mapping_error(ndev->dev.parent, dma_addr)) + goto err; + +- desc->desc.info_ds = cpu_to_le16(PKT_BUF_SZ); ++ desc->desc.info_ds = cpu_to_le16(RSWITCH_DESC_BUF_SIZE); + rswitch_desc_set_dptr(&desc->desc, dma_addr); + desc->desc.die_dt = DT_FEMPTY | DIE; + } else { +@@ -394,8 +398,8 @@ static int rswitch_gwca_queue_format(struct net_device *ndev, + if (!gq->dir_tx) { + for (desc = gq->tx_ring; i-- > 0; desc++) { + dma_addr = rswitch_desc_get_dptr(&desc->desc); +- dma_unmap_single(ndev->dev.parent, dma_addr, PKT_BUF_SZ, +- DMA_FROM_DEVICE); ++ dma_unmap_single(ndev->dev.parent, dma_addr, ++ RSWITCH_MAP_BUF_SIZE, DMA_FROM_DEVICE); + } + } + +@@ -432,12 +436,13 @@ static int rswitch_gwca_queue_ext_ts_fill(struct net_device *ndev, + desc = &gq->rx_ring[index]; + if (!gq->dir_tx) { + dma_addr = dma_map_single(ndev->dev.parent, +- gq->skbs[index]->data, PKT_BUF_SZ, ++ gq->rx_bufs[index] + RSWITCH_HEADROOM, ++ RSWITCH_MAP_BUF_SIZE, + DMA_FROM_DEVICE); + if (dma_mapping_error(ndev->dev.parent, dma_addr)) + goto err; + +- desc->desc.info_ds = cpu_to_le16(PKT_BUF_SZ); ++ desc->desc.info_ds = cpu_to_le16(RSWITCH_DESC_BUF_SIZE); + rswitch_desc_set_dptr(&desc->desc, dma_addr); + dma_wmb(); + desc->desc.die_dt = DT_FEMPTY | DIE; +@@ -455,8 +460,8 @@ static int rswitch_gwca_queue_ext_ts_fill(struct net_device *ndev, + index = (i + start_index) % gq->ring_size; + desc = &gq->rx_ring[index]; + dma_addr = rswitch_desc_get_dptr(&desc->desc); +- dma_unmap_single(ndev->dev.parent, dma_addr, PKT_BUF_SZ, +- DMA_FROM_DEVICE); ++ dma_unmap_single(ndev->dev.parent, dma_addr, ++ RSWITCH_MAP_BUF_SIZE, DMA_FROM_DEVICE); + } + } + +@@ -723,10 +728,15 @@ static bool rswitch_rx(struct net_device *ndev, int *quota) + while ((desc->desc.die_dt & DT_MASK) != DT_FEMPTY) { + dma_rmb(); + pkt_len = le16_to_cpu(desc->desc.info_ds) & RX_DS; +- skb = gq->skbs[gq->cur]; +- gq->skbs[gq->cur] = NULL; + dma_addr = rswitch_desc_get_dptr(&desc->desc); +- dma_unmap_single(ndev->dev.parent, dma_addr, PKT_BUF_SZ, DMA_FROM_DEVICE); ++ dma_unmap_single(ndev->dev.parent, dma_addr, ++ RSWITCH_MAP_BUF_SIZE, DMA_FROM_DEVICE); ++ skb = build_skb(gq->rx_bufs[gq->cur], RSWITCH_BUF_SIZE); ++ if (!skb) ++ goto out; ++ skb_reserve(skb, RSWITCH_HEADROOM); ++ skb_put(skb, pkt_len); ++ + get_ts = rdev->priv->ptp_priv->tstamp_rx_ctrl & RCAR_GEN4_RXTSTAMP_TYPE_V2_L2_EVENT; + if (get_ts) { + struct skb_shared_hwtstamps *shhwtstamps; +@@ -738,12 +748,13 @@ static bool rswitch_rx(struct net_device *ndev, int *quota) + ts.tv_nsec = __le32_to_cpu(desc->ts_nsec & cpu_to_le32(0x3fffffff)); + shhwtstamps->hwtstamp = timespec64_to_ktime(ts); + } +- skb_put(skb, pkt_len); + skb->protocol = eth_type_trans(skb, ndev); + napi_gro_receive(&rdev->napi, skb); + rdev->ndev->stats.rx_packets++; + rdev->ndev->stats.rx_bytes += pkt_len; + ++out: ++ gq->rx_bufs[gq->cur] = NULL; + gq->cur = rswitch_next_queue_index(gq, true, 1); + desc = &gq->rx_ring[gq->cur]; + +@@ -752,7 +763,7 @@ static bool rswitch_rx(struct net_device *ndev, int *quota) + } + + num = rswitch_get_num_cur_queues(gq); +- ret = rswitch_gwca_queue_alloc_skb(gq, gq->dirty, num); ++ ret = rswitch_gwca_queue_alloc_rx_buf(gq, gq->dirty, num); + if (ret < 0) + goto err; + ret = rswitch_gwca_queue_ext_ts_fill(ndev, gq, gq->dirty, num); +diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h +index 542328959530..e62c28a442b9 100644 +--- a/drivers/net/ethernet/renesas/rswitch.h ++++ b/drivers/net/ethernet/renesas/rswitch.h +@@ -29,8 +29,13 @@ + #define RX_RING_SIZE 1024 + #define TS_RING_SIZE (TX_RING_SIZE * RSWITCH_NUM_PORTS) + +-#define PKT_BUF_SZ 1584 ++#define RSWITCH_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN) ++#define RSWITCH_DESC_BUF_SIZE 2048 ++#define RSWITCH_TAILROOM SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + #define RSWITCH_ALIGN 128 ++#define RSWITCH_BUF_SIZE (RSWITCH_HEADROOM + RSWITCH_DESC_BUF_SIZE + \ ++ RSWITCH_TAILROOM + RSWITCH_ALIGN) ++#define RSWITCH_MAP_BUF_SIZE (RSWITCH_BUF_SIZE - RSWITCH_HEADROOM) + #define RSWITCH_MAX_CTAG_PCP 7 + + #define RSWITCH_TIMEOUT_US 100000 +@@ -945,8 +950,18 @@ struct rswitch_gwca_queue { + /* For [rt]x_ring */ + unsigned int index; + bool dir_tx; +- struct sk_buff **skbs; + struct net_device *ndev; /* queue to ndev for irq */ ++ ++ union { ++ /* For TX */ ++ struct { ++ struct sk_buff **skbs; ++ }; ++ /* For RX */ ++ struct { ++ void **rx_bufs; ++ }; ++ }; + }; + + struct rswitch_gwca_ts_info { +-- +2.39.5 + diff --git a/queue-6.6/net-rswitch-use-unsigned-int-for-desc-related-array-.patch b/queue-6.6/net-rswitch-use-unsigned-int-for-desc-related-array-.patch new file mode 100644 index 00000000000..7445443d32e --- /dev/null +++ b/queue-6.6/net-rswitch-use-unsigned-int-for-desc-related-array-.patch @@ -0,0 +1,401 @@ +From 9854b9d62aeb89a7ca58e92fc4c343cabf47b05f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Dec 2023 13:10:23 +0900 +Subject: net: rswitch: Use unsigned int for desc related array index + +From: Yoshihiro Shimoda + +[ Upstream commit 8857034184538ca92b0e029f6f56e5e04f518ad2 ] + +Array index should not be negative, so use unsigned int for +descriptors related array index. + +Signed-off-by: Yoshihiro Shimoda +Signed-off-by: David S. Miller +Stable-dep-of: 0c9547e6ccf4 ("net: renesas: rswitch: fix race window between tx start and complete") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/renesas/rswitch.c | 88 ++++++++++++++------------ + drivers/net/ethernet/renesas/rswitch.h | 14 ++-- + 2 files changed, 56 insertions(+), 46 deletions(-) + +diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c +index b783516eb9e2..d10af779ee89 100644 +--- a/drivers/net/ethernet/renesas/rswitch.c ++++ b/drivers/net/ethernet/renesas/rswitch.c +@@ -55,7 +55,8 @@ static void rswitch_clock_disable(struct rswitch_private *priv) + iowrite32(RCDC_RCD, priv->addr + RCDC); + } + +-static bool rswitch_agent_clock_is_enabled(void __iomem *coma_addr, int port) ++static bool rswitch_agent_clock_is_enabled(void __iomem *coma_addr, ++ unsigned int port) + { + u32 val = ioread32(coma_addr + RCEC); + +@@ -65,7 +66,8 @@ static bool rswitch_agent_clock_is_enabled(void __iomem *coma_addr, int port) + return false; + } + +-static void rswitch_agent_clock_ctrl(void __iomem *coma_addr, int port, int enable) ++static void rswitch_agent_clock_ctrl(void __iomem *coma_addr, unsigned int port, ++ int enable) + { + u32 val; + +@@ -99,7 +101,7 @@ static void rswitch_coma_init(struct rswitch_private *priv) + /* R-Switch-2 block (TOP) */ + static void rswitch_top_init(struct rswitch_private *priv) + { +- int i; ++ unsigned int i; + + for (i = 0; i < RSWITCH_MAX_NUM_QUEUES; i++) + iowrite32((i / 16) << (GWCA_INDEX * 8), priv->addr + TPEMIMC7(i)); +@@ -108,7 +110,7 @@ static void rswitch_top_init(struct rswitch_private *priv) + /* Forwarding engine block (MFWD) */ + static void rswitch_fwd_init(struct rswitch_private *priv) + { +- int i; ++ unsigned int i; + + /* For ETHA */ + for (i = 0; i < RSWITCH_NUM_PORTS; i++) { +@@ -165,7 +167,7 @@ static int rswitch_gwca_axi_ram_reset(struct rswitch_private *priv) + static bool rswitch_is_any_data_irq(struct rswitch_private *priv, u32 *dis, bool tx) + { + u32 *mask = tx ? priv->gwca.tx_irq_bits : priv->gwca.rx_irq_bits; +- int i; ++ unsigned int i; + + for (i = 0; i < RSWITCH_NUM_IRQ_REGS; i++) { + if (dis[i] & mask[i]) +@@ -177,7 +179,7 @@ static bool rswitch_is_any_data_irq(struct rswitch_private *priv, u32 *dis, bool + + static void rswitch_get_data_irq_status(struct rswitch_private *priv, u32 *dis) + { +- int i; ++ unsigned int i; + + for (i = 0; i < RSWITCH_NUM_IRQ_REGS; i++) { + dis[i] = ioread32(priv->addr + GWDIS(i)); +@@ -185,23 +187,26 @@ static void rswitch_get_data_irq_status(struct rswitch_private *priv, u32 *dis) + } + } + +-static void rswitch_enadis_data_irq(struct rswitch_private *priv, int index, bool enable) ++static void rswitch_enadis_data_irq(struct rswitch_private *priv, ++ unsigned int index, bool enable) + { + u32 offs = enable ? GWDIE(index / 32) : GWDID(index / 32); + + iowrite32(BIT(index % 32), priv->addr + offs); + } + +-static void rswitch_ack_data_irq(struct rswitch_private *priv, int index) ++static void rswitch_ack_data_irq(struct rswitch_private *priv, ++ unsigned int index) + { + u32 offs = GWDIS(index / 32); + + iowrite32(BIT(index % 32), priv->addr + offs); + } + +-static int rswitch_next_queue_index(struct rswitch_gwca_queue *gq, bool cur, int num) ++static unsigned int rswitch_next_queue_index(struct rswitch_gwca_queue *gq, ++ bool cur, unsigned int num) + { +- int index = cur ? gq->cur : gq->dirty; ++ unsigned int index = cur ? gq->cur : gq->dirty; + + if (index + num >= gq->ring_size) + index = (index + num) % gq->ring_size; +@@ -211,7 +216,7 @@ static int rswitch_next_queue_index(struct rswitch_gwca_queue *gq, bool cur, int + return index; + } + +-static int rswitch_get_num_cur_queues(struct rswitch_gwca_queue *gq) ++static unsigned int rswitch_get_num_cur_queues(struct rswitch_gwca_queue *gq) + { + if (gq->cur >= gq->dirty) + return gq->cur - gq->dirty; +@@ -230,9 +235,10 @@ static bool rswitch_is_queue_rxed(struct rswitch_gwca_queue *gq) + } + + static int rswitch_gwca_queue_alloc_skb(struct rswitch_gwca_queue *gq, +- int start_index, int num) ++ unsigned int start_index, ++ unsigned int num) + { +- int i, index; ++ unsigned int i, index; + + for (i = 0; i < num; i++) { + index = (i + start_index) % gq->ring_size; +@@ -247,7 +253,7 @@ static int rswitch_gwca_queue_alloc_skb(struct rswitch_gwca_queue *gq, + return 0; + + err: +- for (i--; i >= 0; i--) { ++ for (; i-- > 0; ) { + index = (i + start_index) % gq->ring_size; + dev_kfree_skb(gq->skbs[index]); + gq->skbs[index] = NULL; +@@ -259,7 +265,7 @@ static int rswitch_gwca_queue_alloc_skb(struct rswitch_gwca_queue *gq, + static void rswitch_gwca_queue_free(struct net_device *ndev, + struct rswitch_gwca_queue *gq) + { +- int i; ++ unsigned int i; + + if (!gq->dir_tx) { + dma_free_coherent(ndev->dev.parent, +@@ -293,9 +299,9 @@ static void rswitch_gwca_ts_queue_free(struct rswitch_private *priv) + static int rswitch_gwca_queue_alloc(struct net_device *ndev, + struct rswitch_private *priv, + struct rswitch_gwca_queue *gq, +- bool dir_tx, int ring_size) ++ bool dir_tx, unsigned int ring_size) + { +- int i, bit; ++ unsigned int i, bit; + + gq->dir_tx = dir_tx; + gq->ring_size = ring_size; +@@ -350,11 +356,11 @@ static int rswitch_gwca_queue_format(struct net_device *ndev, + struct rswitch_private *priv, + struct rswitch_gwca_queue *gq) + { +- int ring_size = sizeof(struct rswitch_ext_desc) * gq->ring_size; ++ unsigned int ring_size = sizeof(struct rswitch_ext_desc) * gq->ring_size; + struct rswitch_ext_desc *desc; + struct rswitch_desc *linkfix; + dma_addr_t dma_addr; +- int i; ++ unsigned int i; + + memset(gq->tx_ring, 0, ring_size); + for (i = 0, desc = gq->tx_ring; i < gq->ring_size; i++, desc++) { +@@ -386,7 +392,7 @@ static int rswitch_gwca_queue_format(struct net_device *ndev, + + err: + if (!gq->dir_tx) { +- for (i--, desc = gq->tx_ring; i >= 0; i--, desc++) { ++ for (desc = gq->tx_ring; i-- > 0; desc++) { + dma_addr = rswitch_desc_get_dptr(&desc->desc); + dma_unmap_single(ndev->dev.parent, dma_addr, PKT_BUF_SZ, + DMA_FROM_DEVICE); +@@ -397,11 +403,12 @@ static int rswitch_gwca_queue_format(struct net_device *ndev, + } + + static void rswitch_gwca_ts_queue_fill(struct rswitch_private *priv, +- int start_index, int num) ++ unsigned int start_index, ++ unsigned int num) + { + struct rswitch_gwca_queue *gq = &priv->gwca.ts_queue; + struct rswitch_ts_desc *desc; +- int i, index; ++ unsigned int i, index; + + for (i = 0; i < num; i++) { + index = (i + start_index) % gq->ring_size; +@@ -412,12 +419,13 @@ static void rswitch_gwca_ts_queue_fill(struct rswitch_private *priv, + + static int rswitch_gwca_queue_ext_ts_fill(struct net_device *ndev, + struct rswitch_gwca_queue *gq, +- int start_index, int num) ++ unsigned int start_index, ++ unsigned int num) + { + struct rswitch_device *rdev = netdev_priv(ndev); + struct rswitch_ext_ts_desc *desc; ++ unsigned int i, index; + dma_addr_t dma_addr; +- int i, index; + + for (i = 0; i < num; i++) { + index = (i + start_index) % gq->ring_size; +@@ -443,7 +451,7 @@ static int rswitch_gwca_queue_ext_ts_fill(struct net_device *ndev, + + err: + if (!gq->dir_tx) { +- for (i--; i >= 0; i--) { ++ for (; i-- > 0; ) { + index = (i + start_index) % gq->ring_size; + desc = &gq->rx_ring[index]; + dma_addr = rswitch_desc_get_dptr(&desc->desc); +@@ -459,7 +467,7 @@ static int rswitch_gwca_queue_ext_ts_format(struct net_device *ndev, + struct rswitch_private *priv, + struct rswitch_gwca_queue *gq) + { +- int ring_size = sizeof(struct rswitch_ext_ts_desc) * gq->ring_size; ++ unsigned int ring_size = sizeof(struct rswitch_ext_ts_desc) * gq->ring_size; + struct rswitch_ext_ts_desc *desc; + struct rswitch_desc *linkfix; + int err; +@@ -486,7 +494,7 @@ static int rswitch_gwca_queue_ext_ts_format(struct net_device *ndev, + + static int rswitch_gwca_linkfix_alloc(struct rswitch_private *priv) + { +- int i, num_queues = priv->gwca.num_queues; ++ unsigned int i, num_queues = priv->gwca.num_queues; + struct rswitch_gwca *gwca = &priv->gwca; + struct device *dev = &priv->pdev->dev; + +@@ -536,7 +544,7 @@ static int rswitch_gwca_ts_queue_alloc(struct rswitch_private *priv) + static struct rswitch_gwca_queue *rswitch_gwca_get(struct rswitch_private *priv) + { + struct rswitch_gwca_queue *gq; +- int index; ++ unsigned int index; + + index = find_first_zero_bit(priv->gwca.used, priv->gwca.num_queues); + if (index >= priv->gwca.num_queues) +@@ -582,7 +590,7 @@ static void rswitch_txdmac_free(struct net_device *ndev) + rswitch_gwca_put(rdev->priv, rdev->tx_queue); + } + +-static int rswitch_txdmac_init(struct rswitch_private *priv, int index) ++static int rswitch_txdmac_init(struct rswitch_private *priv, unsigned int index) + { + struct rswitch_device *rdev = priv->rdev[index]; + +@@ -616,7 +624,7 @@ static void rswitch_rxdmac_free(struct net_device *ndev) + rswitch_gwca_put(rdev->priv, rdev->rx_queue); + } + +-static int rswitch_rxdmac_init(struct rswitch_private *priv, int index) ++static int rswitch_rxdmac_init(struct rswitch_private *priv, unsigned int index) + { + struct rswitch_device *rdev = priv->rdev[index]; + struct net_device *ndev = rdev->ndev; +@@ -626,7 +634,8 @@ static int rswitch_rxdmac_init(struct rswitch_private *priv, int index) + + static int rswitch_gwca_hw_init(struct rswitch_private *priv) + { +- int i, err; ++ unsigned int i; ++ int err; + + err = rswitch_gwca_change_mode(priv, GWMC_OPC_DISABLE); + if (err < 0) +@@ -697,9 +706,10 @@ static bool rswitch_rx(struct net_device *ndev, int *quota) + struct rswitch_device *rdev = netdev_priv(ndev); + struct rswitch_gwca_queue *gq = rdev->rx_queue; + struct rswitch_ext_ts_desc *desc; +- int limit, boguscnt, num, ret; ++ int limit, boguscnt, ret; + struct sk_buff *skb; + dma_addr_t dma_addr; ++ unsigned int num; + u16 pkt_len; + u32 get_ts; + +@@ -767,7 +777,7 @@ static void rswitch_tx_free(struct net_device *ndev) + struct rswitch_ext_desc *desc; + dma_addr_t dma_addr; + struct sk_buff *skb; +- int size; ++ unsigned int size; + + for (; rswitch_get_num_cur_queues(gq) > 0; + gq->dirty = rswitch_next_queue_index(gq, false, 1)) { +@@ -846,7 +856,7 @@ static void rswitch_queue_interrupt(struct net_device *ndev) + static irqreturn_t rswitch_data_irq(struct rswitch_private *priv, u32 *dis) + { + struct rswitch_gwca_queue *gq; +- int i, index, bit; ++ unsigned int i, index, bit; + + for (i = 0; i < priv->gwca.num_queues; i++) { + gq = &priv->gwca.queues[i]; +@@ -913,8 +923,8 @@ static void rswitch_ts(struct rswitch_private *priv) + struct skb_shared_hwtstamps shhwtstamps; + struct rswitch_ts_desc *desc; + struct timespec64 ts; ++ unsigned int num; + u32 tag, port; +- int num; + + desc = &gq->ts_ring[gq->cur]; + while ((desc->desc.die_dt & DT_MASK) != DT_FEMPTY_ND) { +@@ -1431,7 +1441,7 @@ static int rswitch_ether_port_init_all(struct rswitch_private *priv) + + static void rswitch_ether_port_deinit_all(struct rswitch_private *priv) + { +- int i; ++ unsigned int i; + + for (i = 0; i < RSWITCH_NUM_PORTS; i++) { + phy_exit(priv->rdev[i]->serdes); +@@ -1686,7 +1696,7 @@ static const struct of_device_id renesas_eth_sw_of_table[] = { + }; + MODULE_DEVICE_TABLE(of, renesas_eth_sw_of_table); + +-static void rswitch_etha_init(struct rswitch_private *priv, int index) ++static void rswitch_etha_init(struct rswitch_private *priv, unsigned int index) + { + struct rswitch_etha *etha = &priv->etha[index]; + +@@ -1702,7 +1712,7 @@ static void rswitch_etha_init(struct rswitch_private *priv, int index) + etha->psmcs = clk_get_rate(priv->clk) / 100000 / (25 * 2) - 1; + } + +-static int rswitch_device_alloc(struct rswitch_private *priv, int index) ++static int rswitch_device_alloc(struct rswitch_private *priv, unsigned int index) + { + struct platform_device *pdev = priv->pdev; + struct rswitch_device *rdev; +@@ -1773,7 +1783,7 @@ static int rswitch_device_alloc(struct rswitch_private *priv, int index) + return err; + } + +-static void rswitch_device_free(struct rswitch_private *priv, int index) ++static void rswitch_device_free(struct rswitch_private *priv, unsigned int index) + { + struct rswitch_device *rdev = priv->rdev[index]; + struct net_device *ndev = rdev->ndev; +diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h +index 04f49a7a5843..542328959530 100644 +--- a/drivers/net/ethernet/renesas/rswitch.h ++++ b/drivers/net/ethernet/renesas/rswitch.h +@@ -909,7 +909,7 @@ struct rswitch_ext_ts_desc { + } __packed; + + struct rswitch_etha { +- int index; ++ unsigned int index; + void __iomem *addr; + void __iomem *coma_addr; + bool external_phy; +@@ -938,12 +938,12 @@ struct rswitch_gwca_queue { + + /* Common */ + dma_addr_t ring_dma; +- int ring_size; +- int cur; +- int dirty; ++ unsigned int ring_size; ++ unsigned int cur; ++ unsigned int dirty; + +- /* For [rt]_ring */ +- int index; ++ /* For [rt]x_ring */ ++ unsigned int index; + bool dir_tx; + struct sk_buff **skbs; + struct net_device *ndev; /* queue to ndev for irq */ +@@ -959,7 +959,7 @@ struct rswitch_gwca_ts_info { + + #define RSWITCH_NUM_IRQ_REGS (RSWITCH_MAX_NUM_QUEUES / BITS_PER_TYPE(u32)) + struct rswitch_gwca { +- int index; ++ unsigned int index; + struct rswitch_desc *linkfix_table; + dma_addr_t linkfix_table_dma; + u32 linkfix_table_size; +-- +2.39.5 + diff --git a/queue-6.6/net-sched-netem-account-for-backlog-updates-from-chi.patch b/queue-6.6/net-sched-netem-account-for-backlog-updates-from-chi.patch new file mode 100644 index 00000000000..6eddea570a8 --- /dev/null +++ b/queue-6.6/net-sched-netem-account-for-backlog-updates-from-chi.patch @@ -0,0 +1,171 @@ +From 90462680e49cc7019792e79491abd1b974646d21 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 Dec 2024 14:14:11 +0100 +Subject: net/sched: netem: account for backlog updates from child qdisc + +From: Martin Ottens + +[ Upstream commit f8d4bc455047cf3903cd6f85f49978987dbb3027 ] + +In general, 'qlen' of any classful qdisc should keep track of the +number of packets that the qdisc itself and all of its children holds. +In case of netem, 'qlen' only accounts for the packets in its internal +tfifo. When netem is used with a child qdisc, the child qdisc can use +'qdisc_tree_reduce_backlog' to inform its parent, netem, about created +or dropped SKBs. This function updates 'qlen' and the backlog statistics +of netem, but netem does not account for changes made by a child qdisc. +'qlen' then indicates the wrong number of packets in the tfifo. +If a child qdisc creates new SKBs during enqueue and informs its parent +about this, netem's 'qlen' value is increased. When netem dequeues the +newly created SKBs from the child, the 'qlen' in netem is not updated. +If 'qlen' reaches the configured sch->limit, the enqueue function stops +working, even though the tfifo is not full. + +Reproduce the bug: +Ensure that the sender machine has GSO enabled. Configure netem as root +qdisc and tbf as its child on the outgoing interface of the machine +as follows: +$ tc qdisc add dev root handle 1: netem delay 100ms limit 100 +$ tc qdisc add dev parent 1:0 tbf rate 50Mbit burst 1542 latency 50ms + +Send bulk TCP traffic out via this interface, e.g., by running an iPerf3 +client on the machine. Check the qdisc statistics: +$ tc -s qdisc show dev + +Statistics after 10s of iPerf3 TCP test before the fix (note that +netem's backlog > limit, netem stopped accepting packets): +qdisc netem 1: root refcnt 2 limit 1000 delay 100ms + Sent 2767766 bytes 1848 pkt (dropped 652, overlimits 0 requeues 0) + backlog 4294528236b 1155p requeues 0 +qdisc tbf 10: parent 1:1 rate 50Mbit burst 1537b lat 50ms + Sent 2767766 bytes 1848 pkt (dropped 327, overlimits 7601 requeues 0) + backlog 0b 0p requeues 0 + +Statistics after the fix: +qdisc netem 1: root refcnt 2 limit 1000 delay 100ms + Sent 37766372 bytes 24974 pkt (dropped 9, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 +qdisc tbf 10: parent 1:1 rate 50Mbit burst 1537b lat 50ms + Sent 37766372 bytes 24974 pkt (dropped 327, overlimits 96017 requeues 0) + backlog 0b 0p requeues 0 + +tbf segments the GSO SKBs (tbf_segment) and updates the netem's 'qlen'. +The interface fully stops transferring packets and "locks". In this case, +the child qdisc and tfifo are empty, but 'qlen' indicates the tfifo is at +its limit and no more packets are accepted. + +This patch adds a counter for the entries in the tfifo. Netem's 'qlen' is +only decreased when a packet is returned by its dequeue function, and not +during enqueuing into the child qdisc. External updates to 'qlen' are thus +accounted for and only the behavior of the backlog statistics changes. As +in other qdiscs, 'qlen' then keeps track of how many packets are held in +netem and all of its children. As before, sch->limit remains as the +maximum number of packets in the tfifo. The same applies to netem's +backlog statistics. + +Fixes: 50612537e9ab ("netem: fix classful handling") +Signed-off-by: Martin Ottens +Acked-by: Jamal Hadi Salim +Link: https://patch.msgid.link/20241210131412.1837202-1-martin.ottens@fau.de +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/sch_netem.c | 22 ++++++++++++++++------ + 1 file changed, 16 insertions(+), 6 deletions(-) + +diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c +index d36eeb7b0502..152dbbe8fd31 100644 +--- a/net/sched/sch_netem.c ++++ b/net/sched/sch_netem.c +@@ -78,6 +78,8 @@ struct netem_sched_data { + struct sk_buff *t_head; + struct sk_buff *t_tail; + ++ u32 t_len; ++ + /* optional qdisc for classful handling (NULL at netem init) */ + struct Qdisc *qdisc; + +@@ -382,6 +384,7 @@ static void tfifo_reset(struct Qdisc *sch) + rtnl_kfree_skbs(q->t_head, q->t_tail); + q->t_head = NULL; + q->t_tail = NULL; ++ q->t_len = 0; + } + + static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) +@@ -411,6 +414,7 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) + rb_link_node(&nskb->rbnode, parent, p); + rb_insert_color(&nskb->rbnode, &q->t_root); + } ++ q->t_len++; + sch->q.qlen++; + } + +@@ -517,7 +521,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, + 1<q.qlen >= sch->limit)) { ++ if (unlikely(q->t_len >= sch->limit)) { + /* re-link segs, so that qdisc_drop_all() frees them all */ + skb->next = segs; + qdisc_drop_all(skb, sch, to_free); +@@ -701,8 +705,8 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) + tfifo_dequeue: + skb = __qdisc_dequeue_head(&sch->q); + if (skb) { +- qdisc_qstats_backlog_dec(sch, skb); + deliver: ++ qdisc_qstats_backlog_dec(sch, skb); + qdisc_bstats_update(sch, skb); + return skb; + } +@@ -718,8 +722,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) + + if (time_to_send <= now && q->slot.slot_next <= now) { + netem_erase_head(q, skb); +- sch->q.qlen--; +- qdisc_qstats_backlog_dec(sch, skb); ++ q->t_len--; + skb->next = NULL; + skb->prev = NULL; + /* skb->dev shares skb->rbnode area, +@@ -746,16 +749,21 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) + if (net_xmit_drop_count(err)) + qdisc_qstats_drop(sch); + qdisc_tree_reduce_backlog(sch, 1, pkt_len); ++ sch->qstats.backlog -= pkt_len; ++ sch->q.qlen--; + } + goto tfifo_dequeue; + } ++ sch->q.qlen--; + goto deliver; + } + + if (q->qdisc) { + skb = q->qdisc->ops->dequeue(q->qdisc); +- if (skb) ++ if (skb) { ++ sch->q.qlen--; + goto deliver; ++ } + } + + qdisc_watchdog_schedule_ns(&q->watchdog, +@@ -765,8 +773,10 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) + + if (q->qdisc) { + skb = q->qdisc->ops->dequeue(q->qdisc); +- if (skb) ++ if (skb) { ++ sch->q.qlen--; + goto deliver; ++ } + } + return NULL; + } +-- +2.39.5 + diff --git a/queue-6.6/net-sparx5-fix-fdma-performance-issue.patch b/queue-6.6/net-sparx5-fix-fdma-performance-issue.patch new file mode 100644 index 00000000000..4698a3a1367 --- /dev/null +++ b/queue-6.6/net-sparx5-fix-fdma-performance-issue.patch @@ -0,0 +1,63 @@ +From 9404aa01a3f85d2368fa44a9787448c54d30035f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 14:54:26 +0100 +Subject: net: sparx5: fix FDMA performance issue + +From: Daniel Machon + +[ Upstream commit f004f2e535e2b66ccbf5ac35f8eaadeac70ad7b7 ] + +The FDMA handler is responsible for scheduling a NAPI poll, which will +eventually fetch RX packets from the FDMA queue. Currently, the FDMA +handler is run in a threaded context. For some reason, this kills +performance. Admittedly, I did not do a thorough investigation to see +exactly what causes the issue, however, I noticed that in the other +driver utilizing the same FDMA engine, we run the FDMA handler in hard +IRQ context. + +Fix this performance issue, by running the FDMA handler in hard IRQ +context, not deferring any work to a thread. + +Prior to this change, the RX UDP performance was: + +Interval Transfer Bitrate Jitter +0.00-10.20 sec 44.6 MBytes 36.7 Mbits/sec 0.027 ms + +After this change, the rx UDP performance is: + +Interval Transfer Bitrate Jitter +0.00-9.12 sec 1.01 GBytes 953 Mbits/sec 0.020 ms + +Fixes: 10615907e9b5 ("net: sparx5: switchdev: adding frame DMA functionality") +Signed-off-by: Daniel Machon +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +index 8f116982c08a..98bee953234b 100644 +--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c ++++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +@@ -693,12 +693,11 @@ static int sparx5_start(struct sparx5 *sparx5) + err = -ENXIO; + if (sparx5->fdma_irq >= 0) { + if (GCB_CHIP_ID_REV_ID_GET(sparx5->chip_id) > 0) +- err = devm_request_threaded_irq(sparx5->dev, +- sparx5->fdma_irq, +- NULL, +- sparx5_fdma_handler, +- IRQF_ONESHOT, +- "sparx5-fdma", sparx5); ++ err = devm_request_irq(sparx5->dev, ++ sparx5->fdma_irq, ++ sparx5_fdma_handler, ++ 0, ++ "sparx5-fdma", sparx5); + if (!err) + err = sparx5_fdma_start(sparx5); + if (err) +-- +2.39.5 + diff --git a/queue-6.6/net-sparx5-fix-the-maximum-frame-length-register.patch b/queue-6.6/net-sparx5-fix-the-maximum-frame-length-register.patch new file mode 100644 index 00000000000..b0396bc3320 --- /dev/null +++ b/queue-6.6/net-sparx5-fix-the-maximum-frame-length-register.patch @@ -0,0 +1,39 @@ +From c521d2760804127d52de0530f031afeea9197d92 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 14:54:28 +0100 +Subject: net: sparx5: fix the maximum frame length register + +From: Daniel Machon + +[ Upstream commit ddd7ba006078a2bef5971b2dc5f8383d47f96207 ] + +On port initialization, we configure the maximum frame length accepted +by the receive module associated with the port. This value is currently +written to the MAX_LEN field of the DEV10G_MAC_ENA_CFG register, when in +fact, it should be written to the DEV10G_MAC_MAXLEN_CFG register. Fix +this. + +Fixes: 946e7fd5053a ("net: sparx5: add port module support") +Signed-off-by: Daniel Machon +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/microchip/sparx5/sparx5_port.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c +index 60dd2fd603a8..fcdaa37879f7 100644 +--- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c ++++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c +@@ -1119,7 +1119,7 @@ int sparx5_port_init(struct sparx5 *sparx5, + spx5_inst_rmw(DEV10G_MAC_MAXLEN_CFG_MAX_LEN_SET(ETH_MAXLEN), + DEV10G_MAC_MAXLEN_CFG_MAX_LEN, + devinst, +- DEV10G_MAC_ENA_CFG(0)); ++ DEV10G_MAC_MAXLEN_CFG(0)); + + /* Handle Signal Detect in 10G PCS */ + spx5_inst_wr(PCS10G_BR_PCS_SD_CFG_SD_POL_SET(sd_pol) | +-- +2.39.5 + diff --git a/queue-6.6/netfilter-idletimer-fix-for-possible-abba-deadlock.patch b/queue-6.6/netfilter-idletimer-fix-for-possible-abba-deadlock.patch new file mode 100644 index 00000000000..134cbc8080a --- /dev/null +++ b/queue-6.6/netfilter-idletimer-fix-for-possible-abba-deadlock.patch @@ -0,0 +1,130 @@ +From 133c9bf580f66593d438700c5fb2a0ddfee48aa9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 Dec 2024 19:32:29 +0100 +Subject: netfilter: IDLETIMER: Fix for possible ABBA deadlock + +From: Phil Sutter + +[ Upstream commit f36b01994d68ffc253c8296e2228dfe6e6431c03 ] + +Deletion of the last rule referencing a given idletimer may happen at +the same time as a read of its file in sysfs: + +| ====================================================== +| WARNING: possible circular locking dependency detected +| 6.12.0-rc7-01692-g5e9a28f41134-dirty #594 Not tainted +| ------------------------------------------------------ +| iptables/3303 is trying to acquire lock: +| ffff8881057e04b8 (kn->active#48){++++}-{0:0}, at: __kernfs_remove+0x20 +| +| but task is already holding lock: +| ffffffffa0249068 (list_mutex){+.+.}-{3:3}, at: idletimer_tg_destroy_v] +| +| which lock already depends on the new lock. + +A simple reproducer is: + +| #!/bin/bash +| +| while true; do +| iptables -A INPUT -i foo -j IDLETIMER --timeout 10 --label "testme" +| iptables -D INPUT -i foo -j IDLETIMER --timeout 10 --label "testme" +| done & +| while true; do +| cat /sys/class/xt_idletimer/timers/testme >/dev/null +| done + +Avoid this by freeing list_mutex right after deleting the element from +the list, then continuing with the teardown. + +Fixes: 0902b469bd25 ("netfilter: xtables: idletimer target implementation") +Signed-off-by: Phil Sutter +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/xt_IDLETIMER.c | 52 +++++++++++++++++++----------------- + 1 file changed, 28 insertions(+), 24 deletions(-) + +diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c +index f8b25b6f5da7..9869ef3c2ab3 100644 +--- a/net/netfilter/xt_IDLETIMER.c ++++ b/net/netfilter/xt_IDLETIMER.c +@@ -409,21 +409,23 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par) + + mutex_lock(&list_mutex); + +- if (--info->timer->refcnt == 0) { +- pr_debug("deleting timer %s\n", info->label); +- +- list_del(&info->timer->entry); +- timer_shutdown_sync(&info->timer->timer); +- cancel_work_sync(&info->timer->work); +- sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); +- kfree(info->timer->attr.attr.name); +- kfree(info->timer); +- } else { ++ if (--info->timer->refcnt > 0) { + pr_debug("decreased refcnt of timer %s to %u\n", + info->label, info->timer->refcnt); ++ mutex_unlock(&list_mutex); ++ return; + } + ++ pr_debug("deleting timer %s\n", info->label); ++ ++ list_del(&info->timer->entry); + mutex_unlock(&list_mutex); ++ ++ timer_shutdown_sync(&info->timer->timer); ++ cancel_work_sync(&info->timer->work); ++ sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); ++ kfree(info->timer->attr.attr.name); ++ kfree(info->timer); + } + + static void idletimer_tg_destroy_v1(const struct xt_tgdtor_param *par) +@@ -434,25 +436,27 @@ static void idletimer_tg_destroy_v1(const struct xt_tgdtor_param *par) + + mutex_lock(&list_mutex); + +- if (--info->timer->refcnt == 0) { +- pr_debug("deleting timer %s\n", info->label); +- +- list_del(&info->timer->entry); +- if (info->timer->timer_type & XT_IDLETIMER_ALARM) { +- alarm_cancel(&info->timer->alarm); +- } else { +- timer_shutdown_sync(&info->timer->timer); +- } +- cancel_work_sync(&info->timer->work); +- sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); +- kfree(info->timer->attr.attr.name); +- kfree(info->timer); +- } else { ++ if (--info->timer->refcnt > 0) { + pr_debug("decreased refcnt of timer %s to %u\n", + info->label, info->timer->refcnt); ++ mutex_unlock(&list_mutex); ++ return; + } + ++ pr_debug("deleting timer %s\n", info->label); ++ ++ list_del(&info->timer->entry); + mutex_unlock(&list_mutex); ++ ++ if (info->timer->timer_type & XT_IDLETIMER_ALARM) { ++ alarm_cancel(&info->timer->alarm); ++ } else { ++ timer_shutdown_sync(&info->timer->timer); ++ } ++ cancel_work_sync(&info->timer->work); ++ sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); ++ kfree(info->timer->attr.attr.name); ++ kfree(info->timer); + } + + +-- +2.39.5 + diff --git a/queue-6.6/netfilter-nf_tables-do-not-defer-rule-destruction-vi.patch b/queue-6.6/netfilter-nf_tables-do-not-defer-rule-destruction-vi.patch new file mode 100644 index 00000000000..373bc1ccb2f --- /dev/null +++ b/queue-6.6/netfilter-nf_tables-do-not-defer-rule-destruction-vi.patch @@ -0,0 +1,167 @@ +From bc0d378494a06bb6fd081d66017e5aa3164ec718 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 7 Dec 2024 12:14:48 +0100 +Subject: netfilter: nf_tables: do not defer rule destruction via call_rcu + +From: Florian Westphal + +[ Upstream commit b04df3da1b5c6f6dc7cdccc37941740c078c4043 ] + +nf_tables_chain_destroy can sleep, it can't be used from call_rcu +callbacks. + +Moreover, nf_tables_rule_release() is only safe for error unwinding, +while transaction mutex is held and the to-be-desroyed rule was not +exposed to either dataplane or dumps, as it deactives+frees without +the required synchronize_rcu() in-between. + +nft_rule_expr_deactivate() callbacks will change ->use counters +of other chains/sets, see e.g. nft_lookup .deactivate callback, these +must be serialized via transaction mutex. + +Also add a few lockdep asserts to make this more explicit. + +Calling synchronize_rcu() isn't ideal, but fixing this without is hard +and way more intrusive. As-is, we can get: + +WARNING: .. net/netfilter/nf_tables_api.c:5515 nft_set_destroy+0x.. +Workqueue: events nf_tables_trans_destroy_work +RIP: 0010:nft_set_destroy+0x3fe/0x5c0 +Call Trace: + + nf_tables_trans_destroy_work+0x6b7/0xad0 + process_one_work+0x64a/0xce0 + worker_thread+0x613/0x10d0 + +In case the synchronize_rcu becomes an issue, we can explore alternatives. + +One way would be to allocate nft_trans_rule objects + one nft_trans_chain +object, deactivate the rules + the chain and then defer the freeing to the +nft destroy workqueue. We'd still need to keep the synchronize_rcu path as +a fallback to handle -ENOMEM corner cases though. + +Reported-by: syzbot+b26935466701e56cfdc2@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/all/67478d92.050a0220.253251.0062.GAE@google.com/T/ +Fixes: c03d278fdf35 ("netfilter: nf_tables: wait for rcu grace period on net_device removal") +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + include/net/netfilter/nf_tables.h | 4 ---- + net/netfilter/nf_tables_api.c | 32 +++++++++++++++---------------- + 2 files changed, 15 insertions(+), 21 deletions(-) + +diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h +index 804dcd3a7d8f..b5f9ee5810a3 100644 +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -1080,7 +1080,6 @@ struct nft_rule_blob { + * @name: name of the chain + * @udlen: user data length + * @udata: user data in the chain +- * @rcu_head: rcu head for deferred release + * @blob_next: rule blob pointer to the next in the chain + */ + struct nft_chain { +@@ -1098,7 +1097,6 @@ struct nft_chain { + char *name; + u16 udlen; + u8 *udata; +- struct rcu_head rcu_head; + + /* Only used during control plane commit phase: */ + struct nft_rule_blob *blob_next; +@@ -1242,7 +1240,6 @@ static inline void nft_use_inc_restore(u32 *use) + * @sets: sets in the table + * @objects: stateful objects in the table + * @flowtables: flow tables in the table +- * @net: netnamespace this table belongs to + * @hgenerator: handle generator state + * @handle: table handle + * @use: number of chain references to this table +@@ -1259,7 +1256,6 @@ struct nft_table { + struct list_head sets; + struct list_head objects; + struct list_head flowtables; +- possible_net_t net; + u64 hgenerator; + u64 handle; + u32 use; +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index eee7997048fb..a110aad45fe4 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -1431,7 +1431,6 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info, + INIT_LIST_HEAD(&table->sets); + INIT_LIST_HEAD(&table->objects); + INIT_LIST_HEAD(&table->flowtables); +- write_pnet(&table->net, net); + table->family = family; + table->flags = flags; + table->handle = ++nft_net->table_handle; +@@ -3784,8 +3783,11 @@ void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule) + kfree(rule); + } + ++/* can only be used if rule is no longer visible to dumps */ + static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) + { ++ lockdep_commit_lock_is_held(ctx->net); ++ + nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE); + nf_tables_rule_destroy(ctx, rule); + } +@@ -5561,6 +5563,8 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding, + enum nft_trans_phase phase) + { ++ lockdep_commit_lock_is_held(ctx->net); ++ + switch (phase) { + case NFT_TRANS_PREPARE_ERROR: + nft_set_trans_unbind(ctx, set); +@@ -11182,19 +11186,6 @@ static void __nft_release_basechain_now(struct nft_ctx *ctx) + nf_tables_chain_destroy(ctx->chain); + } + +-static void nft_release_basechain_rcu(struct rcu_head *head) +-{ +- struct nft_chain *chain = container_of(head, struct nft_chain, rcu_head); +- struct nft_ctx ctx = { +- .family = chain->table->family, +- .chain = chain, +- .net = read_pnet(&chain->table->net), +- }; +- +- __nft_release_basechain_now(&ctx); +- put_net(ctx.net); +-} +- + int __nft_release_basechain(struct nft_ctx *ctx) + { + struct nft_rule *rule; +@@ -11209,11 +11200,18 @@ int __nft_release_basechain(struct nft_ctx *ctx) + nft_chain_del(ctx->chain); + nft_use_dec(&ctx->table->use); + +- if (maybe_get_net(ctx->net)) +- call_rcu(&ctx->chain->rcu_head, nft_release_basechain_rcu); +- else ++ if (!maybe_get_net(ctx->net)) { + __nft_release_basechain_now(ctx); ++ return 0; ++ } ++ ++ /* wait for ruleset dumps to complete. Owning chain is no longer in ++ * lists, so new dumps can't find any of these rules anymore. ++ */ ++ synchronize_rcu(); + ++ __nft_release_basechain_now(ctx); ++ put_net(ctx->net); + return 0; + } + EXPORT_SYMBOL_GPL(__nft_release_basechain); +-- +2.39.5 + diff --git a/queue-6.6/ptp-kvm-x86-return-eopnotsupp-instead-of-enodev-from.patch b/queue-6.6/ptp-kvm-x86-return-eopnotsupp-instead-of-enodev-from.patch new file mode 100644 index 00000000000..30f75498022 --- /dev/null +++ b/queue-6.6/ptp-kvm-x86-return-eopnotsupp-instead-of-enodev-from.patch @@ -0,0 +1,63 @@ +From de27ef06af262db761144b8c0a297e20af4a3064 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Dec 2024 18:09:55 +0100 +Subject: ptp: kvm: x86: Return EOPNOTSUPP instead of ENODEV from + kvm_arch_ptp_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Thomas Weißschuh + +[ Upstream commit 5e7aa97c7acf171275ac02a8bb018c31b8918d13 ] + +The caller, ptp_kvm_init(), emits a warning if kvm_arch_ptp_init() exits +with any error which is not EOPNOTSUPP: + + "fail to initialize ptp_kvm" + +Replace ENODEV with EOPNOTSUPP to avoid this spurious warning, +aligning with the ARM implementation. + +Fixes: a86ed2cfa13c ("ptp: Don't print an error if ptp_kvm is not supported") +Signed-off-by: Thomas Weißschuh +Link: https://patch.msgid.link/20241203-kvm_ptp-eopnotsuppp-v2-1-d1d060f27aa6@weissschuh.net +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/ptp/ptp_kvm_x86.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/ptp/ptp_kvm_x86.c b/drivers/ptp/ptp_kvm_x86.c +index 902844cc1a17..5e5b2ef78547 100644 +--- a/drivers/ptp/ptp_kvm_x86.c ++++ b/drivers/ptp/ptp_kvm_x86.c +@@ -26,7 +26,7 @@ int kvm_arch_ptp_init(void) + long ret; + + if (!kvm_para_available()) +- return -ENODEV; ++ return -EOPNOTSUPP; + + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { + p = alloc_page(GFP_KERNEL | __GFP_ZERO); +@@ -46,14 +46,14 @@ int kvm_arch_ptp_init(void) + + clock_pair_gpa = slow_virt_to_phys(clock_pair); + if (!pvclock_get_pvti_cpu0_va()) { +- ret = -ENODEV; ++ ret = -EOPNOTSUPP; + goto err; + } + + ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING, clock_pair_gpa, + KVM_CLOCK_PAIRING_WALLCLOCK); + if (ret == -KVM_ENOSYS) { +- ret = -ENODEV; ++ ret = -EOPNOTSUPP; + goto err; + } + +-- +2.39.5 + diff --git a/queue-6.6/qca_spi-fix-clock-speed-for-multiple-qca7000.patch b/queue-6.6/qca_spi-fix-clock-speed-for-multiple-qca7000.patch new file mode 100644 index 00000000000..4dab6ca7225 --- /dev/null +++ b/queue-6.6/qca_spi-fix-clock-speed-for-multiple-qca7000.patch @@ -0,0 +1,98 @@ +From 8848e49e87c9daf94ec905447cb7b57c977da7e0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 Dec 2024 19:46:42 +0100 +Subject: qca_spi: Fix clock speed for multiple QCA7000 + +From: Stefan Wahren + +[ Upstream commit 4dba406fac06b009873fe7a28231b9b7e4288b09 ] + +Storing the maximum clock speed in module parameter qcaspi_clkspeed +has the unintended side effect that the first probed instance +defines the value for all other instances. Fix this issue by storing +it in max_speed_hz of the relevant SPI device. + +This fix keeps the priority of the speed parameter (module parameter, +device tree property, driver default). Btw this uses the opportunity +to get the rid of the unused member clkspeed. + +Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000") +Signed-off-by: Stefan Wahren +Link: https://patch.msgid.link/20241206184643.123399-2-wahrenst@gmx.net +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/qualcomm/qca_spi.c | 24 ++++++++++-------------- + drivers/net/ethernet/qualcomm/qca_spi.h | 1 - + 2 files changed, 10 insertions(+), 15 deletions(-) + +diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c +index b697a9e6face..78200c1b5ba9 100644 +--- a/drivers/net/ethernet/qualcomm/qca_spi.c ++++ b/drivers/net/ethernet/qualcomm/qca_spi.c +@@ -828,7 +828,6 @@ qcaspi_netdev_init(struct net_device *dev) + + dev->mtu = QCAFRM_MAX_MTU; + dev->type = ARPHRD_ETHER; +- qca->clkspeed = qcaspi_clkspeed; + qca->burst_len = qcaspi_burst_len; + qca->spi_thread = NULL; + qca->buffer_size = (dev->mtu + VLAN_ETH_HLEN + QCAFRM_HEADER_LEN + +@@ -917,17 +916,15 @@ qca_spi_probe(struct spi_device *spi) + legacy_mode = of_property_read_bool(spi->dev.of_node, + "qca,legacy-mode"); + +- if (qcaspi_clkspeed == 0) { +- if (spi->max_speed_hz) +- qcaspi_clkspeed = spi->max_speed_hz; +- else +- qcaspi_clkspeed = QCASPI_CLK_SPEED; +- } ++ if (qcaspi_clkspeed) ++ spi->max_speed_hz = qcaspi_clkspeed; ++ else if (!spi->max_speed_hz) ++ spi->max_speed_hz = QCASPI_CLK_SPEED; + +- if ((qcaspi_clkspeed < QCASPI_CLK_SPEED_MIN) || +- (qcaspi_clkspeed > QCASPI_CLK_SPEED_MAX)) { +- dev_err(&spi->dev, "Invalid clkspeed: %d\n", +- qcaspi_clkspeed); ++ if (spi->max_speed_hz < QCASPI_CLK_SPEED_MIN || ++ spi->max_speed_hz > QCASPI_CLK_SPEED_MAX) { ++ dev_err(&spi->dev, "Invalid clkspeed: %u\n", ++ spi->max_speed_hz); + return -EINVAL; + } + +@@ -952,14 +949,13 @@ qca_spi_probe(struct spi_device *spi) + return -EINVAL; + } + +- dev_info(&spi->dev, "ver=%s, clkspeed=%d, burst_len=%d, pluggable=%d\n", ++ dev_info(&spi->dev, "ver=%s, clkspeed=%u, burst_len=%d, pluggable=%d\n", + QCASPI_DRV_VERSION, +- qcaspi_clkspeed, ++ spi->max_speed_hz, + qcaspi_burst_len, + qcaspi_pluggable); + + spi->mode = SPI_MODE_3; +- spi->max_speed_hz = qcaspi_clkspeed; + if (spi_setup(spi) < 0) { + dev_err(&spi->dev, "Unable to setup SPI device\n"); + return -EFAULT; +diff --git a/drivers/net/ethernet/qualcomm/qca_spi.h b/drivers/net/ethernet/qualcomm/qca_spi.h +index 58ad910068d4..b3b17bd46e12 100644 +--- a/drivers/net/ethernet/qualcomm/qca_spi.h ++++ b/drivers/net/ethernet/qualcomm/qca_spi.h +@@ -101,7 +101,6 @@ struct qcaspi { + #endif + + /* user configurable options */ +- u32 clkspeed; + u8 legacy_mode; + u16 burst_len; + }; +-- +2.39.5 + diff --git a/queue-6.6/qca_spi-make-driver-probing-reliable.patch b/queue-6.6/qca_spi-make-driver-probing-reliable.patch new file mode 100644 index 00000000000..a273123ec3c --- /dev/null +++ b/queue-6.6/qca_spi-make-driver-probing-reliable.patch @@ -0,0 +1,40 @@ +From a29c83fe03f8e0816e4fa4ef32edceb4b6a97c21 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 Dec 2024 19:46:43 +0100 +Subject: qca_spi: Make driver probing reliable + +From: Stefan Wahren + +[ Upstream commit becc6399ce3b724cffe9ccb7ef0bff440bb1b62b ] + +The module parameter qcaspi_pluggable controls if QCA7000 signature +should be checked at driver probe (current default) or not. Unfortunately +this could fail in case the chip is temporary in reset, which isn't under +total control by the Linux host. So disable this check per default +in order to avoid unexpected probe failures. + +Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000") +Signed-off-by: Stefan Wahren +Link: https://patch.msgid.link/20241206184643.123399-3-wahrenst@gmx.net +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/qualcomm/qca_spi.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c +index 78200c1b5ba9..c24235d3b9f3 100644 +--- a/drivers/net/ethernet/qualcomm/qca_spi.c ++++ b/drivers/net/ethernet/qualcomm/qca_spi.c +@@ -66,7 +66,7 @@ MODULE_PARM_DESC(qcaspi_burst_len, "Number of data bytes per burst. Use 1-5000." + + #define QCASPI_PLUGGABLE_MIN 0 + #define QCASPI_PLUGGABLE_MAX 1 +-static int qcaspi_pluggable = QCASPI_PLUGGABLE_MIN; ++static int qcaspi_pluggable = QCASPI_PLUGGABLE_MAX; + module_param(qcaspi_pluggable, int, 0); + MODULE_PARM_DESC(qcaspi_pluggable, "Pluggable SPI connection (yes/no)."); + +-- +2.39.5 + diff --git a/queue-6.6/selftests-mlxsw-sharedbuffer-ensure-no-extra-packets.patch b/queue-6.6/selftests-mlxsw-sharedbuffer-ensure-no-extra-packets.patch new file mode 100644 index 00000000000..979b29fe32b --- /dev/null +++ b/queue-6.6/selftests-mlxsw-sharedbuffer-ensure-no-extra-packets.patch @@ -0,0 +1,140 @@ +From ee356cc48062ebbea86b6ddab5d9007c1f4b0831 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 17:36:01 +0100 +Subject: selftests: mlxsw: sharedbuffer: Ensure no extra packets are counted + +From: Danielle Ratson + +[ Upstream commit 5f2c7ab15fd806043db1a7d54b5ec36be0bd93b1 ] + +The test assumes that the packet it is sending is the only packet being +passed to the device. + +However, it is not the case and so other packets are filling the buffers +as well. Therefore, the test sometimes fails because it is reading a +maximum occupancy that is larger than expected. + +Add egress filters on $h1 and $h2 that will guarantee the above. + +Fixes: a865ad999603 ("selftests: mlxsw: Add shared buffer traffic test") +Signed-off-by: Danielle Ratson +Reviewed-by: Ido Schimmel +Signed-off-by: Ido Schimmel +Signed-off-by: Petr Machata +Link: https://patch.msgid.link/64c28bc9b1cc1d78c4a73feda7cedbe9526ccf8b.1733414773.git.petrm@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + .../drivers/net/mlxsw/sharedbuffer.sh | 40 +++++++++++++++++++ + 1 file changed, 40 insertions(+) + +diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh +index 21bebc5726f6..c068e6c2a580 100755 +--- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh ++++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh +@@ -22,20 +22,34 @@ SB_ITC=0 + h1_create() + { + simple_if_init $h1 192.0.1.1/24 ++ tc qdisc add dev $h1 clsact ++ ++ # Add egress filter on $h1 that will guarantee that the packet sent, ++ # will be the only packet being passed to the device. ++ tc filter add dev $h1 egress pref 2 handle 102 matchall action drop + } + + h1_destroy() + { ++ tc filter del dev $h1 egress pref 2 handle 102 matchall action drop ++ tc qdisc del dev $h1 clsact + simple_if_fini $h1 192.0.1.1/24 + } + + h2_create() + { + simple_if_init $h2 192.0.1.2/24 ++ tc qdisc add dev $h2 clsact ++ ++ # Add egress filter on $h2 that will guarantee that the packet sent, ++ # will be the only packet being passed to the device. ++ tc filter add dev $h2 egress pref 1 handle 101 matchall action drop + } + + h2_destroy() + { ++ tc filter del dev $h2 egress pref 1 handle 101 matchall action drop ++ tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.1.2/24 + } + +@@ -101,6 +115,11 @@ port_pool_test() + local exp_max_occ=$(devlink_cell_size_get) + local max_occ + ++ tc filter add dev $h1 egress protocol ip pref 1 handle 101 flower \ ++ src_mac $h1mac dst_mac $h2mac \ ++ src_ip 192.0.1.1 dst_ip 192.0.1.2 \ ++ action pass ++ + devlink sb occupancy clearmax $DEVLINK_DEV + + $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ +@@ -117,6 +136,11 @@ port_pool_test() + max_occ=$(sb_occ_pool_check $cpu_dl_port $SB_POOL_EGR_CPU $exp_max_occ) + check_err $? "Expected ePool($SB_POOL_EGR_CPU) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "CPU port's egress pool" ++ ++ tc filter del dev $h1 egress protocol ip pref 1 handle 101 flower \ ++ src_mac $h1mac dst_mac $h2mac \ ++ src_ip 192.0.1.1 dst_ip 192.0.1.2 \ ++ action pass + } + + port_tc_ip_test() +@@ -124,6 +148,11 @@ port_tc_ip_test() + local exp_max_occ=$(devlink_cell_size_get) + local max_occ + ++ tc filter add dev $h1 egress protocol ip pref 1 handle 101 flower \ ++ src_mac $h1mac dst_mac $h2mac \ ++ src_ip 192.0.1.1 dst_ip 192.0.1.2 \ ++ action pass ++ + devlink sb occupancy clearmax $DEVLINK_DEV + + $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ +@@ -140,6 +169,11 @@ port_tc_ip_test() + max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_IP $exp_max_occ) + check_err $? "Expected egress TC($SB_ITC_CPU_IP) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "CPU port's egress TC - IP packet" ++ ++ tc filter del dev $h1 egress protocol ip pref 1 handle 101 flower \ ++ src_mac $h1mac dst_mac $h2mac \ ++ src_ip 192.0.1.1 dst_ip 192.0.1.2 \ ++ action pass + } + + port_tc_arp_test() +@@ -147,6 +181,9 @@ port_tc_arp_test() + local exp_max_occ=$(devlink_cell_size_get) + local max_occ + ++ tc filter add dev $h1 egress protocol arp pref 1 handle 101 flower \ ++ src_mac $h1mac action pass ++ + devlink sb occupancy clearmax $DEVLINK_DEV + + $MZ $h1 -c 1 -p 10 -a $h1mac -A 192.0.1.1 -t arp -q +@@ -162,6 +199,9 @@ port_tc_arp_test() + max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_ARP $exp_max_occ) + check_err $? "Expected egress TC($SB_ITC_IP2ME) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "CPU port's egress TC - ARP packet" ++ ++ tc filter del dev $h1 egress protocol arp pref 1 handle 101 flower \ ++ src_mac $h1mac action pass + } + + setup_prepare() +-- +2.39.5 + diff --git a/queue-6.6/selftests-mlxsw-sharedbuffer-remove-duplicate-test-c.patch b/queue-6.6/selftests-mlxsw-sharedbuffer-remove-duplicate-test-c.patch new file mode 100644 index 00000000000..b67a28c915b --- /dev/null +++ b/queue-6.6/selftests-mlxsw-sharedbuffer-remove-duplicate-test-c.patch @@ -0,0 +1,58 @@ +From 7b56efe1b739cd4d6f22b672d68bc2c100466660 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 17:36:00 +0100 +Subject: selftests: mlxsw: sharedbuffer: Remove duplicate test cases + +From: Danielle Ratson + +[ Upstream commit 6c46ad4d1bb2e8ec2265296e53765190f6e32f33 ] + +On both port_tc_ip_test() and port_tc_arp_test(), the max occupancy is +checked on $h2 twice, when only the error message is different and does not +match the check itself. + +Remove the two duplicated test cases from the test. + +Fixes: a865ad999603 ("selftests: mlxsw: Add shared buffer traffic test") +Signed-off-by: Danielle Ratson +Reviewed-by: Ido Schimmel +Signed-off-by: Ido Schimmel +Signed-off-by: Petr Machata +Link: https://patch.msgid.link/d9eb26f6fc16a06a30b5c2c16ad80caf502bc561.1733414773.git.petrm@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + .../selftests/drivers/net/mlxsw/sharedbuffer.sh | 10 ---------- + 1 file changed, 10 deletions(-) + +diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh +index a7b3d6cf3185..21bebc5726f6 100755 +--- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh ++++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh +@@ -131,11 +131,6 @@ port_tc_ip_test() + + devlink sb occupancy snapshot $DEVLINK_DEV + +- RET=0 +- max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) +- check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" +- log_test "physical port's($h1) ingress TC - IP packet" +- + RET=0 + max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) + check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" +@@ -158,11 +153,6 @@ port_tc_arp_test() + + devlink sb occupancy snapshot $DEVLINK_DEV + +- RET=0 +- max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) +- check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" +- log_test "physical port's($h1) ingress TC - ARP packet" +- + RET=0 + max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) + check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" +-- +2.39.5 + diff --git a/queue-6.6/selftests-mlxsw-sharedbuffer-remove-h1-ingress-test-.patch b/queue-6.6/selftests-mlxsw-sharedbuffer-remove-h1-ingress-test-.patch new file mode 100644 index 00000000000..ba4d3f98f7b --- /dev/null +++ b/queue-6.6/selftests-mlxsw-sharedbuffer-remove-h1-ingress-test-.patch @@ -0,0 +1,48 @@ +From 4d0f7ec255559b957e0bbf03aafd88a2d19b8782 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2024 17:35:59 +0100 +Subject: selftests: mlxsw: sharedbuffer: Remove h1 ingress test case + +From: Danielle Ratson + +[ Upstream commit cf3515c556907b4da290967a2a6cbbd9ee0ee723 ] + +The test is sending only one packet generated with mausezahn from $h1 to +$h2. However, for some reason, it is testing for non-zero maximum occupancy +in both the ingress pool of $h1 and $h2. The former only passes when $h2 +happens to send a packet. + +Avoid intermittent failures by removing unintentional test case +regarding the ingress pool of $h1. + +Fixes: a865ad999603 ("selftests: mlxsw: Add shared buffer traffic test") +Signed-off-by: Danielle Ratson +Reviewed-by: Ido Schimmel +Signed-off-by: Ido Schimmel +Signed-off-by: Petr Machata +Link: https://patch.msgid.link/5b7344608d5e06f38209e48d8af8c92fa11b6742.1733414773.git.petrm@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh +index 0c47faff9274..a7b3d6cf3185 100755 +--- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh ++++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh +@@ -108,11 +108,6 @@ port_pool_test() + + devlink sb occupancy snapshot $DEVLINK_DEV + +- RET=0 +- max_occ=$(sb_occ_pool_check $dl_port1 $SB_POOL_ING $exp_max_occ) +- check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ" +- log_test "physical port's($h1) ingress pool" +- + RET=0 + max_occ=$(sb_occ_pool_check $dl_port2 $SB_POOL_ING $exp_max_occ) + check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ" +-- +2.39.5 + diff --git a/queue-6.6/series b/queue-6.6/series index 56cf6854374..2f4b1f43d90 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -32,3 +32,63 @@ bpf-perf-fix-invalid-prog_array-access-in-perf_event_detach_bpf_prog.patch bpf-sockmap-fix-race-between-element-replace-and-close.patch bpf-sockmap-fix-update-element-with-same.patch rtla-timerlat-make-timerlat_hist_cpu-_count-unsigned-long-long.patch +wifi-nl80211-fix-nl80211_attr_mlo_link_id-off-by-one.patch +wifi-mac80211-init-cnt-before-accessing-elem-in-ieee.patch +wifi-mac80211-clean-up-ret-in-sta_link_apply_paramet.patch +wifi-mac80211-fix-station-nss-capability-initializat.patch +acpi-nfit-vmalloc-out-of-bounds-read-in-acpi_nfit_ct.patch +amdgpu-uvd-get-ring-reference-from-rq-scheduler.patch +batman-adv-do-not-send-uninitialized-tt-changes.patch +batman-adv-remove-uninitialized-data-in-full-table-t.patch +batman-adv-do-not-let-tt-changes-list-grows-indefini.patch +tipc-fix-null-deref-in-cleanup_bearer.patch +net-mlx5-dr-prevent-potential-error-pointer-derefere.patch +wifi-cfg80211-sme-init-n_channels-before-channels-ac.patch +selftests-mlxsw-sharedbuffer-remove-h1-ingress-test-.patch +selftests-mlxsw-sharedbuffer-remove-duplicate-test-c.patch +selftests-mlxsw-sharedbuffer-ensure-no-extra-packets.patch +ptp-kvm-x86-return-eopnotsupp-instead-of-enodev-from.patch +net-lapb-increase-lapb_header_len.patch +net-defer-final-struct-net-free-in-netns-dismantle.patch +net-mscc-ocelot-fix-memory-leak-on-ocelot_port_add_t.patch +net-mscc-ocelot-improve-handling-of-tx-timestamp-for.patch +net-mscc-ocelot-ocelot-ts_id_lock-and-ocelot_port-tx.patch +net-mscc-ocelot-be-resilient-to-loss-of-ptp-packets-.patch +net-mscc-ocelot-perform-error-cleanup-in-ocelot_hwst.patch +spi-aspeed-fix-an-error-handling-path-in-aspeed_spi_.patch +net-sparx5-fix-fdma-performance-issue.patch +net-sparx5-fix-the-maximum-frame-length-register.patch +acpi-resource-fix-memory-resource-type-union-access.patch +cxgb4-use-port-number-to-set-mac-addr.patch +qca_spi-fix-clock-speed-for-multiple-qca7000.patch +qca_spi-make-driver-probing-reliable.patch +alsa-control-avoid-warn-for-symlink-errors.patch +asoc-amd-yc-fix-the-wrong-return-value.patch +documentation-pm-clarify-pm_runtime_resume_and_get-r.patch +net-rswitch-drop-unused-argument-return-value.patch +net-rswitch-use-unsigned-int-for-desc-related-array-.patch +net-rswitch-use-build_skb-for-rx.patch +net-rswitch-add-unmap_addrs-instead-of-dma-address-i.patch +net-rswitch-add-a-setting-ext-descriptor-function.patch +net-rswitch-add-jumbo-frames-handling-for-tx.patch +net-renesas-rswitch-fix-race-window-between-tx-start.patch +net-renesas-rswitch-fix-leaked-pointer-on-error-path.patch +net-renesas-rswitch-avoid-use-after-put-for-a-device.patch +net-renesas-rswitch-handle-stop-vs-interrupt-race.patch +libperf-evlist-fix-cpu-argument-on-hybrid-platform.patch +netfilter-idletimer-fix-for-possible-abba-deadlock.patch +netfilter-nf_tables-do-not-defer-rule-destruction-vi.patch +net-dsa-felix-fix-stuck-cpu-injected-packets-with-sh.patch +net-sched-netem-account-for-backlog-updates-from-chi.patch +bonding-fix-feature-propagation-of-netif_f_gso_encap.patch +team-fix-feature-propagation-of-netif_f_gso_encap_al.patch +acpica-events-evxfregn-don-t-release-the-contextmute.patch +bluetooth-iso-reassociate-a-socket-with-an-active-bi.patch +bluetooth-hci_event-fix-using-rcu_read_-un-lock-whil.patch +bluetooth-iso-fix-recursive-locking-warning.patch +bluetooth-sco-add-support-for-16-bits-transparent-vo.patch +bluetooth-btmtk-avoid-uaf-in-btmtk_process_coredump.patch +net-renesas-rswitch-fix-initial-mpic-register-settin.patch +net-dsa-microchip-ksz9896-register-regmap-alignment-.patch +blk-iocost-avoid-using-clamp-on-inuse-in-__propagate.patch +kselftest-arm64-abi-fix-svcr-detection.patch diff --git a/queue-6.6/spi-aspeed-fix-an-error-handling-path-in-aspeed_spi_.patch b/queue-6.6/spi-aspeed-fix-an-error-handling-path-in-aspeed_spi_.patch new file mode 100644 index 00000000000..7296d06e220 --- /dev/null +++ b/queue-6.6/spi-aspeed-fix-an-error-handling-path-in-aspeed_spi_.patch @@ -0,0 +1,64 @@ +From 46687635933502f406cf068fe15b536fdbb3165f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Nov 2024 22:30:29 +0100 +Subject: spi: aspeed: Fix an error handling path in + aspeed_spi_[read|write]_user() + +From: Christophe JAILLET + +[ Upstream commit c84dda3751e945a67d71cbe3af4474aad24a5794 ] + +A aspeed_spi_start_user() is not balanced by a corresponding +aspeed_spi_stop_user(). +Add the missing call. + +Fixes: e3228ed92893 ("spi: spi-mem: Convert Aspeed SMC driver to spi-mem") +Signed-off-by: Christophe JAILLET +Link: https://patch.msgid.link/4052aa2f9a9ea342fa6af83fa991b55ce5d5819e.1732051814.git.christophe.jaillet@wanadoo.fr +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + drivers/spi/spi-aspeed-smc.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/drivers/spi/spi-aspeed-smc.c b/drivers/spi/spi-aspeed-smc.c +index 21b0fa646c7d..38a0613d434a 100644 +--- a/drivers/spi/spi-aspeed-smc.c ++++ b/drivers/spi/spi-aspeed-smc.c +@@ -239,7 +239,7 @@ static ssize_t aspeed_spi_read_user(struct aspeed_spi_chip *chip, + + ret = aspeed_spi_send_cmd_addr(chip, op->addr.nbytes, offset, op->cmd.opcode); + if (ret < 0) +- return ret; ++ goto stop_user; + + if (op->dummy.buswidth && op->dummy.nbytes) { + for (i = 0; i < op->dummy.nbytes / op->dummy.buswidth; i++) +@@ -249,8 +249,9 @@ static ssize_t aspeed_spi_read_user(struct aspeed_spi_chip *chip, + aspeed_spi_set_io_mode(chip, io_mode); + + aspeed_spi_read_from_ahb(buf, chip->ahb_base, len); ++stop_user: + aspeed_spi_stop_user(chip); +- return 0; ++ return ret; + } + + static ssize_t aspeed_spi_write_user(struct aspeed_spi_chip *chip, +@@ -261,10 +262,11 @@ static ssize_t aspeed_spi_write_user(struct aspeed_spi_chip *chip, + aspeed_spi_start_user(chip); + ret = aspeed_spi_send_cmd_addr(chip, op->addr.nbytes, op->addr.val, op->cmd.opcode); + if (ret < 0) +- return ret; ++ goto stop_user; + aspeed_spi_write_to_ahb(chip->ahb_base, op->data.buf.out, op->data.nbytes); ++stop_user: + aspeed_spi_stop_user(chip); +- return 0; ++ return ret; + } + + /* support for 1-1-1, 1-1-2 or 1-1-4 */ +-- +2.39.5 + diff --git a/queue-6.6/team-fix-feature-propagation-of-netif_f_gso_encap_al.patch b/queue-6.6/team-fix-feature-propagation-of-netif_f_gso_encap_al.patch new file mode 100644 index 00000000000..0a41bf46204 --- /dev/null +++ b/queue-6.6/team-fix-feature-propagation-of-netif_f_gso_encap_al.patch @@ -0,0 +1,44 @@ +From 7616238a53a4e71e7e0d89f2364d03549dc33dde Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 Dec 2024 15:12:45 +0100 +Subject: team: Fix feature propagation of NETIF_F_GSO_ENCAP_ALL + +From: Daniel Borkmann + +[ Upstream commit 98712844589e06d9aa305b5077169942139fd75c ] + +Similar to bonding driver, add NETIF_F_GSO_ENCAP_ALL to TEAM_VLAN_FEATURES +in order to support slave devices which propagate NETIF_F_GSO_UDP_TUNNEL & +NETIF_F_GSO_UDP_TUNNEL_CSUM as vlan_features. + +Fixes: 3625920b62c3 ("teaming: fix vlan_features computing") +Signed-off-by: Daniel Borkmann +Cc: Nikolay Aleksandrov +Cc: Ido Schimmel +Cc: Jiri Pirko +Reviewed-by: Nikolay Aleksandrov +Reviewed-by: Hangbin Liu +Link: https://patch.msgid.link/20241210141245.327886-5-daniel@iogearbox.net +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/team/team.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c +index f575f225d417..ae257fa43d87 100644 +--- a/drivers/net/team/team.c ++++ b/drivers/net/team/team.c +@@ -982,7 +982,8 @@ static void team_port_disable(struct team *team, + + #define TEAM_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ + NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | \ +- NETIF_F_HIGHDMA | NETIF_F_LRO) ++ NETIF_F_HIGHDMA | NETIF_F_LRO | \ ++ NETIF_F_GSO_ENCAP_ALL) + + #define TEAM_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ + NETIF_F_RXCSUM | NETIF_F_GSO_SOFTWARE) +-- +2.39.5 + diff --git a/queue-6.6/tipc-fix-null-deref-in-cleanup_bearer.patch b/queue-6.6/tipc-fix-null-deref-in-cleanup_bearer.patch new file mode 100644 index 00000000000..265be96dedb --- /dev/null +++ b/queue-6.6/tipc-fix-null-deref-in-cleanup_bearer.patch @@ -0,0 +1,82 @@ +From f5055bc401465a74fa0a8d75c4e4b1ec000c1fa6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Dec 2024 17:05:48 +0000 +Subject: tipc: fix NULL deref in cleanup_bearer() + +From: Eric Dumazet + +[ Upstream commit b04d86fff66b15c07505d226431f808c15b1703c ] + +syzbot found [1] that after blamed commit, ub->ubsock->sk +was NULL when attempting the atomic_dec() : + +atomic_dec(&tipc_net(sock_net(ub->ubsock->sk))->wq_count); + +Fix this by caching the tipc_net pointer. + +[1] + +Oops: general protection fault, probably for non-canonical address 0xdffffc0000000006: 0000 [#1] PREEMPT SMP KASAN PTI +KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037] +CPU: 0 UID: 0 PID: 5896 Comm: kworker/0:3 Not tainted 6.13.0-rc1-next-20241203-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 +Workqueue: events cleanup_bearer + RIP: 0010:read_pnet include/net/net_namespace.h:387 [inline] + RIP: 0010:sock_net include/net/sock.h:655 [inline] + RIP: 0010:cleanup_bearer+0x1f7/0x280 net/tipc/udp_media.c:820 +Code: 18 48 89 d8 48 c1 e8 03 42 80 3c 28 00 74 08 48 89 df e8 3c f7 99 f6 48 8b 1b 48 83 c3 30 e8 f0 e4 60 00 48 89 d8 48 c1 e8 03 <42> 80 3c 28 00 74 08 48 89 df e8 1a f7 99 f6 49 83 c7 e8 48 8b 1b +RSP: 0018:ffffc9000410fb70 EFLAGS: 00010206 +RAX: 0000000000000006 RBX: 0000000000000030 RCX: ffff88802fe45a00 +RDX: 0000000000000001 RSI: 0000000000000008 RDI: ffffc9000410f900 +RBP: ffff88807e1f0908 R08: ffffc9000410f907 R09: 1ffff92000821f20 +R10: dffffc0000000000 R11: fffff52000821f21 R12: ffff888031d19980 +R13: dffffc0000000000 R14: dffffc0000000000 R15: ffff88807e1f0918 +FS: 0000000000000000(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000556ca050b000 CR3: 0000000031c0c000 CR4: 00000000003526f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + +Fixes: 6a2fa13312e5 ("tipc: Fix use-after-free of kernel socket in cleanup_bearer().") +Reported-by: syzbot+46aa5474f179dacd1a3b@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/netdev/67508b5f.050a0220.17bd51.0070.GAE@google.com/T/#u +Signed-off-by: Eric Dumazet +Reviewed-by: Kuniyuki Iwashima +Link: https://patch.msgid.link/20241204170548.4152658-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/tipc/udp_media.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c +index 70a39e29a635..b16ca400ff55 100644 +--- a/net/tipc/udp_media.c ++++ b/net/tipc/udp_media.c +@@ -807,6 +807,7 @@ static void cleanup_bearer(struct work_struct *work) + { + struct udp_bearer *ub = container_of(work, struct udp_bearer, work); + struct udp_replicast *rcast, *tmp; ++ struct tipc_net *tn; + + list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) { + dst_cache_destroy(&rcast->dst_cache); +@@ -814,10 +815,14 @@ static void cleanup_bearer(struct work_struct *work) + kfree_rcu(rcast, rcu); + } + ++ tn = tipc_net(sock_net(ub->ubsock->sk)); ++ + dst_cache_destroy(&ub->rcast.dst_cache); + udp_tunnel_sock_release(ub->ubsock); ++ ++ /* Note: could use a call_rcu() to avoid another synchronize_net() */ + synchronize_net(); +- atomic_dec(&tipc_net(sock_net(ub->ubsock->sk))->wq_count); ++ atomic_dec(&tn->wq_count); + kfree(ub); + } + +-- +2.39.5 + diff --git a/queue-6.6/wifi-cfg80211-sme-init-n_channels-before-channels-ac.patch b/queue-6.6/wifi-cfg80211-sme-init-n_channels-before-channels-ac.patch new file mode 100644 index 00000000000..e2edb7c8c11 --- /dev/null +++ b/queue-6.6/wifi-cfg80211-sme-init-n_channels-before-channels-ac.patch @@ -0,0 +1,38 @@ +From 7931f13598a9f2fa0a877b44346744cac60dc2e7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Dec 2024 23:20:49 +0800 +Subject: wifi: cfg80211: sme: init n_channels before channels[] access + +From: Haoyu Li + +[ Upstream commit f1d3334d604cc32db63f6e2b3283011e02294e54 ] + +With the __counted_by annocation in cfg80211_scan_request struct, +the "n_channels" struct member must be set before accessing the +"channels" array. Failing to do so will trigger a runtime warning +when enabling CONFIG_UBSAN_BOUNDS and CONFIG_FORTIFY_SOURCE. + +Fixes: e3eac9f32ec0 ("wifi: cfg80211: Annotate struct cfg80211_scan_request with __counted_by") +Signed-off-by: Haoyu Li +Link: https://patch.msgid.link/20241203152049.348806-1-lihaoyu499@gmail.com +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/wireless/sme.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/net/wireless/sme.c b/net/wireless/sme.c +index 591cda99d72f..70881782c25c 100644 +--- a/net/wireless/sme.c ++++ b/net/wireless/sme.c +@@ -83,6 +83,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) + if (!request) + return -ENOMEM; + ++ request->n_channels = n_channels; + if (wdev->conn->params.channel) { + enum nl80211_band band = wdev->conn->params.channel->band; + struct ieee80211_supported_band *sband = +-- +2.39.5 + diff --git a/queue-6.6/wifi-mac80211-clean-up-ret-in-sta_link_apply_paramet.patch b/queue-6.6/wifi-mac80211-clean-up-ret-in-sta_link_apply_paramet.patch new file mode 100644 index 00000000000..43d86c04c71 --- /dev/null +++ b/queue-6.6/wifi-mac80211-clean-up-ret-in-sta_link_apply_paramet.patch @@ -0,0 +1,55 @@ +From 9144b6689363b54e2eb382446d74b177183bc172 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Jun 2024 13:57:19 +0300 +Subject: wifi: mac80211: clean up 'ret' in sta_link_apply_parameters() + +From: Johannes Berg + +[ Upstream commit 642508a42f74d7467aae7c56dff3016db64a25bd ] + +There's no need to have the always-zero ret variable in +the function scope, move it into the inner scope only. + +Signed-off-by: Johannes Berg +Signed-off-by: Miri Korenblit +Link: https://msgid.link/20240605135233.eb7a24632d98.I72d7fe1da89d4b89bcfd0f5fb9057e3e69355cfe@changeid +Signed-off-by: Johannes Berg +Stable-dep-of: 819e0f1e58e0 ("wifi: mac80211: fix station NSS capability initialization order") +Signed-off-by: Sasha Levin +--- + net/mac80211/cfg.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c +index fe4469af3cc9..f9395cd80051 100644 +--- a/net/mac80211/cfg.c ++++ b/net/mac80211/cfg.c +@@ -1795,7 +1795,6 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, + struct sta_info *sta, bool new_link, + struct link_station_parameters *params) + { +- int ret = 0; + struct ieee80211_supported_band *sband; + struct ieee80211_sub_if_data *sdata = sta->sdata; + u32 link_id = params->link_id < 0 ? 0 : params->link_id; +@@ -1837,6 +1836,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, + } + + if (params->txpwr_set) { ++ int ret; ++ + link_sta->pub->txpwr.type = params->txpwr.type; + if (params->txpwr.type == NL80211_TX_POWER_LIMITED) + link_sta->pub->txpwr.power = params->txpwr.power; +@@ -1889,7 +1890,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, + + ieee80211_sta_init_nss(link_sta); + +- return ret; ++ return 0; + } + + static int sta_apply_parameters(struct ieee80211_local *local, +-- +2.39.5 + diff --git a/queue-6.6/wifi-mac80211-fix-station-nss-capability-initializat.patch b/queue-6.6/wifi-mac80211-fix-station-nss-capability-initializat.patch new file mode 100644 index 00000000000..59e761cf217 --- /dev/null +++ b/queue-6.6/wifi-mac80211-fix-station-nss-capability-initializat.patch @@ -0,0 +1,47 @@ +From 85752c8ff2412fa9d847a5adedc234e18b284d84 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 18 Nov 2024 16:07:22 +0800 +Subject: wifi: mac80211: fix station NSS capability initialization order + +From: Benjamin Lin + +[ Upstream commit 819e0f1e58e0ba3800cd9eb96b2a39e44e49df97 ] + +Station's spatial streaming capability should be initialized before +handling VHT OMN, because the handling requires the capability information. + +Fixes: a8bca3e9371d ("wifi: mac80211: track capability/opmode NSS separately") +Signed-off-by: Benjamin Lin +Link: https://patch.msgid.link/20241118080722.9603-1-benjamin-jw.lin@mediatek.com +[rewrite subject] +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/mac80211/cfg.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c +index f9395cd80051..a3c5d4d995db 100644 +--- a/net/mac80211/cfg.c ++++ b/net/mac80211/cfg.c +@@ -1879,6 +1879,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, + params->eht_capa_len, + link_sta); + ++ ieee80211_sta_init_nss(link_sta); ++ + if (params->opmode_notif_used) { + /* returned value is only needed for rc update, but the + * rc isn't initialized here yet, so ignore it +@@ -1888,8 +1890,6 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, + sband->band); + } + +- ieee80211_sta_init_nss(link_sta); +- + return 0; + } + +-- +2.39.5 + diff --git a/queue-6.6/wifi-mac80211-init-cnt-before-accessing-elem-in-ieee.patch b/queue-6.6/wifi-mac80211-init-cnt-before-accessing-elem-in-ieee.patch new file mode 100644 index 00000000000..1c02fd4f96c --- /dev/null +++ b/queue-6.6/wifi-mac80211-init-cnt-before-accessing-elem-in-ieee.patch @@ -0,0 +1,46 @@ +From fe877b0133665013bac6e0a20c39bef4da21f482 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 24 Nov 2024 01:25:00 +0800 +Subject: wifi: mac80211: init cnt before accessing elem in + ieee80211_copy_mbssid_beacon + +From: Haoyu Li + +[ Upstream commit 496db69fd860570145f7c266b31f3af85fca5b00 ] + +With the new __counted_by annocation in cfg80211_mbssid_elems, +the "cnt" struct member must be set before accessing the "elem" +array. Failing to do so will trigger a runtime warning when enabling +CONFIG_UBSAN_BOUNDS and CONFIG_FORTIFY_SOURCE. + +Fixes: c14679d7005a ("wifi: cfg80211: Annotate struct cfg80211_mbssid_elems with __counted_by") +Signed-off-by: Haoyu Li +Link: https://patch.msgid.link/20241123172500.311853-1-lihaoyu499@gmail.com +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/mac80211/cfg.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c +index 3da30c991de8..fe4469af3cc9 100644 +--- a/net/mac80211/cfg.c ++++ b/net/mac80211/cfg.c +@@ -1082,13 +1082,13 @@ ieee80211_copy_mbssid_beacon(u8 *pos, struct cfg80211_mbssid_elems *dst, + { + int i, offset = 0; + ++ dst->cnt = src->cnt; + for (i = 0; i < src->cnt; i++) { + memcpy(pos + offset, src->elem[i].data, src->elem[i].len); + dst->elem[i].len = src->elem[i].len; + dst->elem[i].data = pos + offset; + offset += dst->elem[i].len; + } +- dst->cnt = src->cnt; + + return offset; + } +-- +2.39.5 + diff --git a/queue-6.6/wifi-nl80211-fix-nl80211_attr_mlo_link_id-off-by-one.patch b/queue-6.6/wifi-nl80211-fix-nl80211_attr_mlo_link_id-off-by-one.patch new file mode 100644 index 00000000000..f2cb28517fb --- /dev/null +++ b/queue-6.6/wifi-nl80211-fix-nl80211_attr_mlo_link_id-off-by-one.patch @@ -0,0 +1,77 @@ +From 36467276a2d3eeceee6d3b7e32b0693336501be3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 1 Dec 2024 01:05:26 +0800 +Subject: wifi: nl80211: fix NL80211_ATTR_MLO_LINK_ID off-by-one + +From: Lin Ma + +[ Upstream commit 2e3dbf938656986cce73ac4083500d0bcfbffe24 ] + +Since the netlink attribute range validation provides inclusive +checking, the *max* of attribute NL80211_ATTR_MLO_LINK_ID should be +IEEE80211_MLD_MAX_NUM_LINKS - 1 otherwise causing an off-by-one. + +One crash stack for demonstration: +================================================================== +BUG: KASAN: wild-memory-access in ieee80211_tx_control_port+0x3b6/0xca0 net/mac80211/tx.c:5939 +Read of size 6 at addr 001102080000000c by task fuzzer.386/9508 + +CPU: 1 PID: 9508 Comm: syz.1.386 Not tainted 6.1.70 #2 +Call Trace: + + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0x177/0x231 lib/dump_stack.c:106 + print_report+0xe0/0x750 mm/kasan/report.c:398 + kasan_report+0x139/0x170 mm/kasan/report.c:495 + kasan_check_range+0x287/0x290 mm/kasan/generic.c:189 + memcpy+0x25/0x60 mm/kasan/shadow.c:65 + ieee80211_tx_control_port+0x3b6/0xca0 net/mac80211/tx.c:5939 + rdev_tx_control_port net/wireless/rdev-ops.h:761 [inline] + nl80211_tx_control_port+0x7b3/0xc40 net/wireless/nl80211.c:15453 + genl_family_rcv_msg_doit+0x22e/0x320 net/netlink/genetlink.c:756 + genl_family_rcv_msg net/netlink/genetlink.c:833 [inline] + genl_rcv_msg+0x539/0x740 net/netlink/genetlink.c:850 + netlink_rcv_skb+0x1de/0x420 net/netlink/af_netlink.c:2508 + genl_rcv+0x24/0x40 net/netlink/genetlink.c:861 + netlink_unicast_kernel net/netlink/af_netlink.c:1326 [inline] + netlink_unicast+0x74b/0x8c0 net/netlink/af_netlink.c:1352 + netlink_sendmsg+0x882/0xb90 net/netlink/af_netlink.c:1874 + sock_sendmsg_nosec net/socket.c:716 [inline] + __sock_sendmsg net/socket.c:728 [inline] + ____sys_sendmsg+0x5cc/0x8f0 net/socket.c:2499 + ___sys_sendmsg+0x21c/0x290 net/socket.c:2553 + __sys_sendmsg net/socket.c:2582 [inline] + __do_sys_sendmsg net/socket.c:2591 [inline] + __se_sys_sendmsg+0x19e/0x270 net/socket.c:2589 + do_syscall_x64 arch/x86/entry/common.c:51 [inline] + do_syscall_64+0x45/0x90 arch/x86/entry/common.c:81 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + +Update the policy to ensure correct validation. + +Fixes: 7b0a0e3c3a88 ("wifi: cfg80211: do some rework towards MLO link APIs") +Signed-off-by: Lin Ma +Suggested-by: Cengiz Can +Link: https://patch.msgid.link/20241130170526.96698-1-linma@zju.edu.cn +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/wireless/nl80211.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c +index 797907303669..5b9f39d93b04 100644 +--- a/net/wireless/nl80211.c ++++ b/net/wireless/nl80211.c +@@ -811,7 +811,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { + [NL80211_ATTR_MLO_LINKS] = + NLA_POLICY_NESTED_ARRAY(nl80211_policy), + [NL80211_ATTR_MLO_LINK_ID] = +- NLA_POLICY_RANGE(NLA_U8, 0, IEEE80211_MLD_MAX_NUM_LINKS), ++ NLA_POLICY_RANGE(NLA_U8, 0, IEEE80211_MLD_MAX_NUM_LINKS - 1), + [NL80211_ATTR_MLD_ADDR] = NLA_POLICY_EXACT_LEN(ETH_ALEN), + [NL80211_ATTR_MLO_SUPPORT] = { .type = NLA_FLAG }, + [NL80211_ATTR_MAX_NUM_AKM_SUITES] = { .type = NLA_REJECT }, +-- +2.39.5 +