From: Sasha Levin Date: Mon, 19 Aug 2024 14:19:30 +0000 (-0400) Subject: Fixes for 6.6 X-Git-Tag: v6.1.107~117 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=9b5d31f5154247d9d867ce1ecb20a0eb47814b2f;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.6 Signed-off-by: Sasha Levin --- diff --git a/queue-6.6/alsa-hda-realtek-fix-noise-from-speakers-on-lenovo-i.patch b/queue-6.6/alsa-hda-realtek-fix-noise-from-speakers-on-lenovo-i.patch new file mode 100644 index 00000000000..8719314e17d --- /dev/null +++ b/queue-6.6/alsa-hda-realtek-fix-noise-from-speakers-on-lenovo-i.patch @@ -0,0 +1,39 @@ +From 6463a2fe0d6b212f43e0408b39846829e6ff07ab Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 10 Aug 2024 18:39:06 +0330 +Subject: ALSA: hda/realtek: Fix noise from speakers on Lenovo IdeaPad 3 15IAU7 + +From: Parsa Poorshikhian + +[ Upstream commit ef9718b3d54e822de294351251f3a574f8a082ce ] + +Fix noise from speakers connected to AUX port when no sound is playing. +The problem occurs because the `alc_shutup_pins` function includes +a 0x10ec0257 vendor ID, which causes noise on Lenovo IdeaPad 3 15IAU7 with +Realtek ALC257 codec when no sound is playing. +Removing this vendor ID from the function fixes the bug. + +Fixes: 70794b9563fe ("ALSA: hda/realtek: Add more codec ID to no shutup pins list") +Signed-off-by: Parsa Poorshikhian +Link: https://patch.msgid.link/20240810150939.330693-1-parsa.poorsh@gmail.com +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/pci/hda/patch_realtek.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index 82dcea2b78000..5736516275a34 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -585,7 +585,6 @@ static void alc_shutup_pins(struct hda_codec *codec) + switch (codec->core.vendor_id) { + case 0x10ec0236: + case 0x10ec0256: +- case 0x10ec0257: + case 0x19e58326: + case 0x10ec0283: + case 0x10ec0285: +-- +2.43.0 + diff --git a/queue-6.6/atm-idt77252-prevent-use-after-free-in-dequeue_rx.patch b/queue-6.6/atm-idt77252-prevent-use-after-free-in-dequeue_rx.patch new file mode 100644 index 00000000000..f78aca4114f --- /dev/null +++ b/queue-6.6/atm-idt77252-prevent-use-after-free-in-dequeue_rx.patch @@ -0,0 +1,56 @@ +From 60eb2f9b38f5c93b9766743d0309fcc3a7e3631f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 9 Aug 2024 15:28:19 +0300 +Subject: atm: idt77252: prevent use after free in dequeue_rx() + +From: Dan Carpenter + +[ Upstream commit a9a18e8f770c9b0703dab93580d0b02e199a4c79 ] + +We can't dereference "skb" after calling vcc->push() because the skb +is released. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Dan Carpenter +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/atm/idt77252.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c +index e7f713cd70d3f..a876024d8a05f 100644 +--- a/drivers/atm/idt77252.c ++++ b/drivers/atm/idt77252.c +@@ -1118,8 +1118,8 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) + rpp->len += skb->len; + + if (stat & SAR_RSQE_EPDU) { ++ unsigned int len, truesize; + unsigned char *l1l2; +- unsigned int len; + + l1l2 = (unsigned char *) ((unsigned long) skb->data + skb->len - 6); + +@@ -1189,14 +1189,15 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) + ATM_SKB(skb)->vcc = vcc; + __net_timestamp(skb); + ++ truesize = skb->truesize; + vcc->push(vcc, skb); + atomic_inc(&vcc->stats->rx); + +- if (skb->truesize > SAR_FB_SIZE_3) ++ if (truesize > SAR_FB_SIZE_3) + add_rx_skb(card, 3, SAR_FB_SIZE_3, 1); +- else if (skb->truesize > SAR_FB_SIZE_2) ++ else if (truesize > SAR_FB_SIZE_2) + add_rx_skb(card, 2, SAR_FB_SIZE_2, 1); +- else if (skb->truesize > SAR_FB_SIZE_1) ++ else if (truesize > SAR_FB_SIZE_1) + add_rx_skb(card, 1, SAR_FB_SIZE_1, 1); + else + add_rx_skb(card, 0, SAR_FB_SIZE_0, 1); +-- +2.43.0 + diff --git a/queue-6.6/bpf-fix-updating-attached-freplace-prog-in-prog_arra.patch b/queue-6.6/bpf-fix-updating-attached-freplace-prog-in-prog_arra.patch new file mode 100644 index 00000000000..b495cff0474 --- /dev/null +++ b/queue-6.6/bpf-fix-updating-attached-freplace-prog-in-prog_arra.patch @@ -0,0 +1,61 @@ +From 35d88d9b913922494616be66ffe989db34bb9d64 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 28 Jul 2024 19:46:11 +0800 +Subject: bpf: Fix updating attached freplace prog in prog_array map +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Leon Hwang + +[ Upstream commit fdad456cbcca739bae1849549c7a999857c56f88 ] + +The commit f7866c358733 ("bpf: Fix null pointer dereference in resolve_prog_type() for BPF_PROG_TYPE_EXT") +fixed a NULL pointer dereference panic, but didn't fix the issue that +fails to update attached freplace prog to prog_array map. + +Since commit 1c123c567fb1 ("bpf: Resolve fext program type when checking map compatibility"), +freplace prog and its target prog are able to tail call each other. + +And the commit 3aac1ead5eb6 ("bpf: Move prog->aux->linked_prog and trampoline into bpf_link on attach") +sets prog->aux->dst_prog as NULL after attaching freplace prog to its +target prog. + +After loading freplace the prog_array's owner type is BPF_PROG_TYPE_SCHED_CLS. +Then, after attaching freplace its prog->aux->dst_prog is NULL. +Then, while updating freplace in prog_array the bpf_prog_map_compatible() +incorrectly returns false because resolve_prog_type() returns +BPF_PROG_TYPE_EXT instead of BPF_PROG_TYPE_SCHED_CLS. +After this patch the resolve_prog_type() returns BPF_PROG_TYPE_SCHED_CLS +and update to prog_array can succeed. + +Fixes: f7866c358733 ("bpf: Fix null pointer dereference in resolve_prog_type() for BPF_PROG_TYPE_EXT") +Cc: Toke Høiland-Jørgensen +Cc: Martin KaFai Lau +Acked-by: Yonghong Song +Signed-off-by: Leon Hwang +Link: https://lore.kernel.org/r/20240728114612.48486-2-leon.hwang@linux.dev +Signed-off-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + include/linux/bpf_verifier.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h +index b62535fd8de5f..92919d52f7e1b 100644 +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -760,8 +760,8 @@ static inline u32 type_flag(u32 type) + /* only use after check_attach_btf_id() */ + static inline enum bpf_prog_type resolve_prog_type(const struct bpf_prog *prog) + { +- return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->dst_prog) ? +- prog->aux->dst_prog->type : prog->type; ++ return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->saved_dst_prog_type) ? ++ prog->aux->saved_dst_prog_type : prog->type; + } + + static inline bool bpf_prog_check_recur(const struct bpf_prog *prog) +-- +2.43.0 + diff --git a/queue-6.6/gpio-mlxbf3-support-shutdown-function.patch b/queue-6.6/gpio-mlxbf3-support-shutdown-function.patch new file mode 100644 index 00000000000..25d69dc76f4 --- /dev/null +++ b/queue-6.6/gpio-mlxbf3-support-shutdown-function.patch @@ -0,0 +1,91 @@ +From a626bd82ad46414ae7a79c7d870e107cd7ad41c0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Jun 2024 13:15:09 -0400 +Subject: gpio: mlxbf3: Support shutdown() function + +From: Asmaa Mnebhi + +[ Upstream commit aad41832326723627ad8ac9ee8a543b6dca4454d ] + +During Linux graceful reboot, the GPIO interrupts are not disabled. +Since the drivers are not removed during graceful reboot, +the logic to call mlxbf3_gpio_irq_disable() is not triggered. +Interrupts that remain enabled can cause issues on subsequent boots. + +For example, the mlxbf-gige driver contains PHY logic to bring up the link. +If the gpio-mlxbf3 driver loads first, the mlxbf-gige driver +will use a GPIO interrupt to bring up the link. +Otherwise, it will use polling. +The next time Linux boots and loads the drivers in this order, we encounter the issue: +- mlxbf-gige loads first and uses polling while the GPIO10 + interrupt is still enabled from the previous boot. So if + the interrupt triggers, there is nothing to clear it. +- gpio-mlxbf3 loads. +- i2c-mlxbf loads. The interrupt doesn't trigger for I2C + because it is shared with the GPIO interrupt line which + was not cleared. + +The solution is to add a shutdown function to the GPIO driver to clear and disable +all interrupts. Also clear the interrupt after disabling it in mlxbf3_gpio_irq_disable(). + +Fixes: 38a700efc510 ("gpio: mlxbf3: Add gpio driver support") +Signed-off-by: Asmaa Mnebhi +Reviewed-by: David Thompson +Reviewed-by: Andy Shevchenko +Reviewed-by: Linus Walleij +Link: https://lore.kernel.org/r/20240611171509.22151-1-asmaa@nvidia.com +Signed-off-by: Bartosz Golaszewski +Signed-off-by: Sasha Levin +--- + drivers/gpio/gpio-mlxbf3.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c +index d5906d419b0ab..10ea71273c891 100644 +--- a/drivers/gpio/gpio-mlxbf3.c ++++ b/drivers/gpio/gpio-mlxbf3.c +@@ -39,6 +39,8 @@ + #define MLXBF_GPIO_CAUSE_OR_EVTEN0 0x14 + #define MLXBF_GPIO_CAUSE_OR_CLRCAUSE 0x18 + ++#define MLXBF_GPIO_CLR_ALL_INTS GENMASK(31, 0) ++ + struct mlxbf3_gpio_context { + struct gpio_chip gc; + +@@ -82,6 +84,8 @@ static void mlxbf3_gpio_irq_disable(struct irq_data *irqd) + val = readl(gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); + val &= ~BIT(offset); + writel(val, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); ++ ++ writel(BIT(offset), gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_CLRCAUSE); + raw_spin_unlock_irqrestore(&gs->gc.bgpio_lock, flags); + + gpiochip_disable_irq(gc, offset); +@@ -253,6 +257,15 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev) + return 0; + } + ++static void mlxbf3_gpio_shutdown(struct platform_device *pdev) ++{ ++ struct mlxbf3_gpio_context *gs = platform_get_drvdata(pdev); ++ ++ /* Disable and clear all interrupts */ ++ writel(0, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); ++ writel(MLXBF_GPIO_CLR_ALL_INTS, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_CLRCAUSE); ++} ++ + static const struct acpi_device_id mlxbf3_gpio_acpi_match[] = { + { "MLNXBF33", 0 }, + {} +@@ -265,6 +278,7 @@ static struct platform_driver mlxbf3_gpio_driver = { + .acpi_match_table = mlxbf3_gpio_acpi_match, + }, + .probe = mlxbf3_gpio_probe, ++ .shutdown = mlxbf3_gpio_shutdown, + }; + module_platform_driver(mlxbf3_gpio_driver); + +-- +2.43.0 + diff --git a/queue-6.6/igc-fix-packet-still-tx-after-gate-close-by-reducing.patch b/queue-6.6/igc-fix-packet-still-tx-after-gate-close-by-reducing.patch new file mode 100644 index 00000000000..8447c7efead --- /dev/null +++ b/queue-6.6/igc-fix-packet-still-tx-after-gate-close-by-reducing.patch @@ -0,0 +1,149 @@ +From 3a799096cedb22239a468aa2736f08938311db2c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 6 Jul 2024 11:38:07 -0400 +Subject: igc: Fix packet still tx after gate close by reducing i226 MAC retry + buffer + +From: Faizal Rahim + +[ Upstream commit e037a26ead187901f83cad9c503ccece5ff6817a ] + +Testing uncovered that even when the taprio gate is closed, some packets +still transmit. + +According to i225/6 hardware errata [1], traffic might overflow the +planned QBV window. This happens because MAC maintains an internal buffer, +primarily for supporting half duplex retries. Therefore, even when the +gate closes, residual MAC data in the buffer may still transmit. + +To mitigate this for i226, reduce the MAC's internal buffer from 192 bytes +to the recommended 88 bytes by modifying the RETX_CTL register value. + +This follows guidelines from: +[1] Ethernet Controller I225/I22 Spec Update Rev 2.1 Errata Item 9: + TSN: Packet Transmission Might Cross Qbv Window +[2] I225/6 SW User Manual Rev 1.2.4: Section 8.11.5 Retry Buffer Control + +Note that the RETX_CTL register can't be used in TSN mode because half +duplex feature cannot coexist with TSN. + +Test Steps: +1. Send taprio cmd to board A: + tc qdisc replace dev enp1s0 parent root handle 100 taprio \ + num_tc 4 \ + map 3 2 1 0 3 3 3 3 3 3 3 3 3 3 3 3 \ + queues 1@0 1@1 1@2 1@3 \ + base-time 0 \ + sched-entry S 0x07 500000 \ + sched-entry S 0x0f 500000 \ + flags 0x2 \ + txtime-delay 0 + + Note that for TC3, gate should open for 500us and close for another + 500us. + +3. Take tcpdump log on Board B. + +4. Send udp packets via UDP tai app from Board A to Board B. + +5. Analyze tcpdump log via wireshark log on Board B. Ensure that the + total time from the first to the last packet received during one cycle + for TC3 does not exceed 500us. + +Fixes: 43546211738e ("igc: Add new device ID's") +Signed-off-by: Faizal Rahim +Acked-by: Vinicius Costa Gomes +Tested-by: Mor Bar-Gabay +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc_defines.h | 6 ++++ + drivers/net/ethernet/intel/igc/igc_tsn.c | 34 ++++++++++++++++++++ + 2 files changed, 40 insertions(+) + +diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h +index b3037016f31d2..a18af5c87cde4 100644 +--- a/drivers/net/ethernet/intel/igc/igc_defines.h ++++ b/drivers/net/ethernet/intel/igc/igc_defines.h +@@ -402,6 +402,12 @@ + #define IGC_DTXMXPKTSZ_TSN 0x19 /* 1600 bytes of max TX DMA packet size */ + #define IGC_DTXMXPKTSZ_DEFAULT 0x98 /* 9728-byte Jumbo frames */ + ++/* Retry Buffer Control */ ++#define IGC_RETX_CTL 0x041C ++#define IGC_RETX_CTL_WATERMARK_MASK 0xF ++#define IGC_RETX_CTL_QBVFULLTH_SHIFT 8 /* QBV Retry Buffer Full Threshold */ ++#define IGC_RETX_CTL_QBVFULLEN 0x1000 /* Enable QBV Retry Buffer Full Threshold */ ++ + /* Transmit Scheduling Latency */ + /* Latency between transmission scheduling (LaunchTime) and the time + * the packet is transmitted to the network in nanosecond. +diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c +index 22cefb1eeedfa..46d4c3275bbb5 100644 +--- a/drivers/net/ethernet/intel/igc/igc_tsn.c ++++ b/drivers/net/ethernet/intel/igc/igc_tsn.c +@@ -78,6 +78,15 @@ void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter) + wr32(IGC_GTXOFFSET, txoffset); + } + ++static void igc_tsn_restore_retx_default(struct igc_adapter *adapter) ++{ ++ struct igc_hw *hw = &adapter->hw; ++ u32 retxctl; ++ ++ retxctl = rd32(IGC_RETX_CTL) & IGC_RETX_CTL_WATERMARK_MASK; ++ wr32(IGC_RETX_CTL, retxctl); ++} ++ + /* Returns the TSN specific registers to their default values after + * the adapter is reset. + */ +@@ -91,6 +100,9 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) + wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT); + wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_DEFAULT); + ++ if (igc_is_device_id_i226(hw)) ++ igc_tsn_restore_retx_default(adapter); ++ + tqavctrl = rd32(IGC_TQAVCTRL); + tqavctrl &= ~(IGC_TQAVCTRL_TRANSMIT_MODE_TSN | + IGC_TQAVCTRL_ENHANCED_QAV | IGC_TQAVCTRL_FUTSCDDIS); +@@ -111,6 +123,25 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) + return 0; + } + ++/* To partially fix i226 HW errata, reduce MAC internal buffering from 192 Bytes ++ * to 88 Bytes by setting RETX_CTL register using the recommendation from: ++ * a) Ethernet Controller I225/I226 Specification Update Rev 2.1 ++ * Item 9: TSN: Packet Transmission Might Cross the Qbv Window ++ * b) I225/6 SW User Manual Rev 1.2.4: Section 8.11.5 Retry Buffer Control ++ */ ++static void igc_tsn_set_retx_qbvfullthreshold(struct igc_adapter *adapter) ++{ ++ struct igc_hw *hw = &adapter->hw; ++ u32 retxctl, watermark; ++ ++ retxctl = rd32(IGC_RETX_CTL); ++ watermark = retxctl & IGC_RETX_CTL_WATERMARK_MASK; ++ /* Set QBVFULLTH value using watermark and set QBVFULLEN */ ++ retxctl |= (watermark << IGC_RETX_CTL_QBVFULLTH_SHIFT) | ++ IGC_RETX_CTL_QBVFULLEN; ++ wr32(IGC_RETX_CTL, retxctl); ++} ++ + static int igc_tsn_enable_offload(struct igc_adapter *adapter) + { + struct igc_hw *hw = &adapter->hw; +@@ -123,6 +154,9 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) + wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_TSN); + wr32(IGC_TXPBS, IGC_TXPBSIZE_TSN); + ++ if (igc_is_device_id_i226(hw)) ++ igc_tsn_set_retx_qbvfullthreshold(adapter); ++ + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *ring = adapter->tx_ring[i]; + u32 txqctl = 0; +-- +2.43.0 + diff --git a/queue-6.6/igc-fix-qbv_config_change_errors-logics.patch b/queue-6.6/igc-fix-qbv_config_change_errors-logics.patch new file mode 100644 index 00000000000..bea73de5dfb --- /dev/null +++ b/queue-6.6/igc-fix-qbv_config_change_errors-logics.patch @@ -0,0 +1,131 @@ +From 7e608069f8c5cf8dd35cd4852bf0eee5bb3c2364 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 7 Jul 2024 08:53:16 -0400 +Subject: igc: Fix qbv_config_change_errors logics + +From: Faizal Rahim + +[ Upstream commit f8d6acaee9d35cbff3c3cfad94641666c596f8da ] + +When user issues these cmds: +1. Either a) or b) + a) mqprio with hardware offload disabled + b) taprio with txtime-assist feature enabled +2. etf +3. tc qdisc delete +4. taprio with base time in the past + +At step 4, qbv_config_change_errors wrongly increased by 1. + +Excerpt from IEEE 802.1Q-2018 8.6.9.3.1: +"If AdminBaseTime specifies a time in the past, and the current schedule +is running, then: Increment ConfigChangeError counter" + +qbv_config_change_errors should only increase if base time is in the past +and no taprio is active. In user perspective, taprio was not active when +first triggered at step 4. However, i225/6 reuses qbv for etf, so qbv is +enabled with a dummy schedule at step 2 where it enters +igc_tsn_enable_offload() and qbv_count got incremented to 1. At step 4, it +enters igc_tsn_enable_offload() again, qbv_count is incremented to 2. +Because taprio is running, tc_setup_type is TC_SETUP_QDISC_ETF and +qbv_count > 1, qbv_config_change_errors value got incremented. + +This issue happens due to reliance on qbv_count field where a non-zero +value indicates that taprio is running. But qbv_count increases +regardless if taprio is triggered by user or by other tsn feature. It does +not align with qbv_config_change_errors expectation where it is only +concerned with taprio triggered by user. + +Fixing this by relocating the qbv_config_change_errors logic to +igc_save_qbv_schedule(), eliminating reliance on qbv_count and its +inaccuracies from i225/6's multiple uses of qbv feature for other TSN +features. + +The new function created: igc_tsn_is_taprio_activated_by_user() uses +taprio_offload_enable field to indicate that the current running taprio +was triggered by user, instead of triggered by non-qbv feature like etf. + +Fixes: ae4fe4698300 ("igc: Add qbv_config_change_errors counter") +Signed-off-by: Faizal Rahim +Reviewed-by: Simon Horman +Acked-by: Vinicius Costa Gomes +Tested-by: Mor Bar-Gabay +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc_main.c | 8 ++++++-- + drivers/net/ethernet/intel/igc/igc_tsn.c | 16 ++++++++-------- + drivers/net/ethernet/intel/igc/igc_tsn.h | 1 + + 3 files changed, 15 insertions(+), 10 deletions(-) + +diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c +index d80bbcdeb93ed..21fb1a98ebca6 100644 +--- a/drivers/net/ethernet/intel/igc/igc_main.c ++++ b/drivers/net/ethernet/intel/igc/igc_main.c +@@ -6217,12 +6217,16 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, + if (!validate_schedule(adapter, qopt)) + return -EINVAL; + ++ igc_ptp_read(adapter, &now); ++ ++ if (igc_tsn_is_taprio_activated_by_user(adapter) && ++ is_base_time_past(qopt->base_time, &now)) ++ adapter->qbv_config_change_errors++; ++ + adapter->cycle_time = qopt->cycle_time; + adapter->base_time = qopt->base_time; + adapter->taprio_offload_enable = true; + +- igc_ptp_read(adapter, &now); +- + for (n = 0; n < qopt->num_entries; n++) { + struct tc_taprio_sched_entry *e = &qopt->entries[n]; + +diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c +index 46d4c3275bbb5..8ed7b965484da 100644 +--- a/drivers/net/ethernet/intel/igc/igc_tsn.c ++++ b/drivers/net/ethernet/intel/igc/igc_tsn.c +@@ -87,6 +87,14 @@ static void igc_tsn_restore_retx_default(struct igc_adapter *adapter) + wr32(IGC_RETX_CTL, retxctl); + } + ++bool igc_tsn_is_taprio_activated_by_user(struct igc_adapter *adapter) ++{ ++ struct igc_hw *hw = &adapter->hw; ++ ++ return (rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) && ++ adapter->taprio_offload_enable; ++} ++ + /* Returns the TSN specific registers to their default values after + * the adapter is reset. + */ +@@ -296,14 +304,6 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) + s64 n = div64_s64(ktime_sub_ns(systim, base_time), cycle); + + base_time = ktime_add_ns(base_time, (n + 1) * cycle); +- +- /* Increase the counter if scheduling into the past while +- * Gate Control List (GCL) is running. +- */ +- if ((rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) && +- (adapter->tc_setup_type == TC_SETUP_QDISC_TAPRIO) && +- (adapter->qbv_count > 1)) +- adapter->qbv_config_change_errors++; + } else { + if (igc_is_device_id_i226(hw)) { + ktime_t adjust_time, expires_time; +diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.h b/drivers/net/ethernet/intel/igc/igc_tsn.h +index b53e6af560b73..98ec845a86bf0 100644 +--- a/drivers/net/ethernet/intel/igc/igc_tsn.h ++++ b/drivers/net/ethernet/intel/igc/igc_tsn.h +@@ -7,5 +7,6 @@ + int igc_tsn_offload_apply(struct igc_adapter *adapter); + int igc_tsn_reset(struct igc_adapter *adapter); + void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter); ++bool igc_tsn_is_taprio_activated_by_user(struct igc_adapter *adapter); + + #endif /* _IGC_BASE_H */ +-- +2.43.0 + diff --git a/queue-6.6/igc-fix-reset-adapter-logics-when-tx-mode-change.patch b/queue-6.6/igc-fix-reset-adapter-logics-when-tx-mode-change.patch new file mode 100644 index 00000000000..d0db9da3958 --- /dev/null +++ b/queue-6.6/igc-fix-reset-adapter-logics-when-tx-mode-change.patch @@ -0,0 +1,105 @@ +From 594e35ca16aa5656d99acde5578ab5f0da7e7752 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 7 Jul 2024 08:53:17 -0400 +Subject: igc: Fix reset adapter logics when tx mode change + +From: Faizal Rahim + +[ Upstream commit 0afeaeb5dae86aceded0d5f0c3a54d27858c0c6f ] + +Following the "igc: Fix TX Hang issue when QBV Gate is close" changes, +remaining issues with the reset adapter logic in igc_tsn_offload_apply() +have been observed: + +1. The reset adapter logics for i225 and i226 differ, although they should + be the same according to the guidelines in I225/6 HW Design Section + 7.5.2.1 on software initialization during tx mode changes. +2. The i225 resets adapter every time, even though tx mode doesn't change. + This occurs solely based on the condition igc_is_device_id_i225() when + calling schedule_work(). +3. i226 doesn't reset adapter for tsn->legacy tx mode changes. It only + resets adapter for legacy->tsn tx mode transitions. +4. qbv_count introduced in the patch is actually not needed; in this + context, a non-zero value of qbv_count is used to indicate if tx mode + was unconditionally set to tsn in igc_tsn_enable_offload(). This could + be replaced by checking the existing register + IGC_TQAVCTRL_TRANSMIT_MODE_TSN bit. + +This patch resolves all issues and enters schedule_work() to reset the +adapter only when changing tx mode. It also removes reliance on qbv_count. + +qbv_count field will be removed in a future patch. + +Test ran: + +1. Verify reset adapter behaviour in i225/6: + a) Enrol a new GCL + Reset adapter observed (tx mode change legacy->tsn) + b) Enrol a new GCL without deleting qdisc + No reset adapter observed (tx mode remain tsn->tsn) + c) Delete qdisc + Reset adapter observed (tx mode change tsn->legacy) + +2. Tested scenario from "igc: Fix TX Hang issue when QBV Gate is closed" + to confirm it remains resolved. + +Fixes: 175c241288c0 ("igc: Fix TX Hang issue when QBV Gate is closed") +Signed-off-by: Faizal Rahim +Reviewed-by: Simon Horman +Acked-by: Vinicius Costa Gomes +Tested-by: Mor Bar-Gabay +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc_tsn.c | 24 +++++++++++++++++++----- + 1 file changed, 19 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c +index 8ed7b965484da..ada7514305171 100644 +--- a/drivers/net/ethernet/intel/igc/igc_tsn.c ++++ b/drivers/net/ethernet/intel/igc/igc_tsn.c +@@ -49,6 +49,13 @@ static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter) + return new_flags; + } + ++static bool igc_tsn_is_tx_mode_in_tsn(struct igc_adapter *adapter) ++{ ++ struct igc_hw *hw = &adapter->hw; ++ ++ return !!(rd32(IGC_TQAVCTRL) & IGC_TQAVCTRL_TRANSMIT_MODE_TSN); ++} ++ + void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter) + { + struct igc_hw *hw = &adapter->hw; +@@ -365,15 +372,22 @@ int igc_tsn_reset(struct igc_adapter *adapter) + return err; + } + +-int igc_tsn_offload_apply(struct igc_adapter *adapter) ++static bool igc_tsn_will_tx_mode_change(struct igc_adapter *adapter) + { +- struct igc_hw *hw = &adapter->hw; ++ bool any_tsn_enabled = !!(igc_tsn_new_flags(adapter) & ++ IGC_FLAG_TSN_ANY_ENABLED); ++ ++ return (any_tsn_enabled && !igc_tsn_is_tx_mode_in_tsn(adapter)) || ++ (!any_tsn_enabled && igc_tsn_is_tx_mode_in_tsn(adapter)); ++} + +- /* Per I225/6 HW Design Section 7.5.2.1, transmit mode +- * cannot be changed dynamically. Require reset the adapter. ++int igc_tsn_offload_apply(struct igc_adapter *adapter) ++{ ++ /* Per I225/6 HW Design Section 7.5.2.1 guideline, if tx mode change ++ * from legacy->tsn or tsn->legacy, then reset adapter is needed. + */ + if (netif_running(adapter->netdev) && +- (igc_is_device_id_i225(hw) || !adapter->qbv_count)) { ++ igc_tsn_will_tx_mode_change(adapter)) { + schedule_work(&adapter->reset_task); + return 0; + } +-- +2.43.0 + diff --git a/queue-6.6/mlxbf_gige-disable-rx-filters-until-rx-path-initiali.patch b/queue-6.6/mlxbf_gige-disable-rx-filters-until-rx-path-initiali.patch new file mode 100644 index 00000000000..b43dc5b988a --- /dev/null +++ b/queue-6.6/mlxbf_gige-disable-rx-filters-until-rx-path-initiali.patch @@ -0,0 +1,188 @@ +From d14046b6e3b0b46526edcb21d3d0fbae8b8de3c1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 9 Aug 2024 12:36:12 -0400 +Subject: mlxbf_gige: disable RX filters until RX path initialized + +From: David Thompson + +[ Upstream commit df934abb185c71c9f2fa07a5013672d0cbd36560 ] + +A recent change to the driver exposed a bug where the MAC RX +filters (unicast MAC, broadcast MAC, and multicast MAC) are +configured and enabled before the RX path is fully initialized. +The result of this bug is that after the PHY is started packets +that match these MAC RX filters start to flow into the RX FIFO. +And then, after rx_init() is completed, these packets will go +into the driver RX ring as well. If enough packets are received +to fill the RX ring (default size is 128 packets) before the call +to request_irq() completes, the driver RX function becomes stuck. + +This bug is intermittent but is most likely to be seen where the +oob_net0 interface is connected to a busy network with lots of +broadcast and multicast traffic. + +All the MAC RX filters must be disabled until the RX path is ready, +i.e. all initialization is done and all the IRQs are installed. + +Fixes: f7442a634ac0 ("mlxbf_gige: call request_irq() after NAPI initialized") +Reviewed-by: Asmaa Mnebhi +Signed-off-by: David Thompson +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20240809163612.12852-1-davthompson@nvidia.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + .../ethernet/mellanox/mlxbf_gige/mlxbf_gige.h | 8 +++ + .../mellanox/mlxbf_gige/mlxbf_gige_main.c | 10 ++++ + .../mellanox/mlxbf_gige/mlxbf_gige_regs.h | 2 + + .../mellanox/mlxbf_gige/mlxbf_gige_rx.c | 50 ++++++++++++++++--- + 4 files changed, 64 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h +index bc94e75a7aebd..e7777700ee18a 100644 +--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h ++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h +@@ -40,6 +40,7 @@ + */ + #define MLXBF_GIGE_BCAST_MAC_FILTER_IDX 0 + #define MLXBF_GIGE_LOCAL_MAC_FILTER_IDX 1 ++#define MLXBF_GIGE_MAX_FILTER_IDX 3 + + /* Define for broadcast MAC literal */ + #define BCAST_MAC_ADDR 0xFFFFFFFFFFFF +@@ -175,6 +176,13 @@ enum mlxbf_gige_res { + int mlxbf_gige_mdio_probe(struct platform_device *pdev, + struct mlxbf_gige *priv); + void mlxbf_gige_mdio_remove(struct mlxbf_gige *priv); ++ ++void mlxbf_gige_enable_multicast_rx(struct mlxbf_gige *priv); ++void mlxbf_gige_disable_multicast_rx(struct mlxbf_gige *priv); ++void mlxbf_gige_enable_mac_rx_filter(struct mlxbf_gige *priv, ++ unsigned int index); ++void mlxbf_gige_disable_mac_rx_filter(struct mlxbf_gige *priv, ++ unsigned int index); + void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index, u64 dmac); + void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv, +diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +index 7bb92e2dacda6..57e68bfd3b1a8 100644 +--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c ++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +@@ -168,6 +168,10 @@ static int mlxbf_gige_open(struct net_device *netdev) + if (err) + goto napi_deinit; + ++ mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_BCAST_MAC_FILTER_IDX); ++ mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_LOCAL_MAC_FILTER_IDX); ++ mlxbf_gige_enable_multicast_rx(priv); ++ + /* Set bits in INT_EN that we care about */ + int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR | + MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS | +@@ -379,6 +383,7 @@ static int mlxbf_gige_probe(struct platform_device *pdev) + void __iomem *plu_base; + void __iomem *base; + int addr, phy_irq; ++ unsigned int i; + int err; + + base = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_MAC); +@@ -423,6 +428,11 @@ static int mlxbf_gige_probe(struct platform_device *pdev) + priv->rx_q_entries = MLXBF_GIGE_DEFAULT_RXQ_SZ; + priv->tx_q_entries = MLXBF_GIGE_DEFAULT_TXQ_SZ; + ++ for (i = 0; i <= MLXBF_GIGE_MAX_FILTER_IDX; i++) ++ mlxbf_gige_disable_mac_rx_filter(priv, i); ++ mlxbf_gige_disable_multicast_rx(priv); ++ mlxbf_gige_disable_promisc(priv); ++ + /* Write initial MAC address to hardware */ + mlxbf_gige_initial_mac(priv); + +diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h +index cd0973229c9bb..74bd46bab4c05 100644 +--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h ++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h +@@ -62,6 +62,8 @@ + #define MLXBF_GIGE_TX_STATUS_DATA_FIFO_FULL BIT(1) + #define MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_START 0x0520 + #define MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_END 0x0528 ++#define MLXBF_GIGE_RX_MAC_FILTER_GENERAL 0x0530 ++#define MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST BIT(1) + #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC 0x0540 + #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC_EN BIT(0) + #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS 0x0548 +diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c +index 6999843584934..eb62620b63c7f 100644 +--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c ++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c +@@ -11,15 +11,31 @@ + #include "mlxbf_gige.h" + #include "mlxbf_gige_regs.h" + +-void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, +- unsigned int index, u64 dmac) ++void mlxbf_gige_enable_multicast_rx(struct mlxbf_gige *priv) + { + void __iomem *base = priv->base; +- u64 control; ++ u64 data; + +- /* Write destination MAC to specified MAC RX filter */ +- writeq(dmac, base + MLXBF_GIGE_RX_MAC_FILTER + +- (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE)); ++ data = readq(base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL); ++ data |= MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST; ++ writeq(data, base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL); ++} ++ ++void mlxbf_gige_disable_multicast_rx(struct mlxbf_gige *priv) ++{ ++ void __iomem *base = priv->base; ++ u64 data; ++ ++ data = readq(base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL); ++ data &= ~MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST; ++ writeq(data, base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL); ++} ++ ++void mlxbf_gige_enable_mac_rx_filter(struct mlxbf_gige *priv, ++ unsigned int index) ++{ ++ void __iomem *base = priv->base; ++ u64 control; + + /* Enable MAC receive filter mask for specified index */ + control = readq(base + MLXBF_GIGE_CONTROL); +@@ -27,6 +43,28 @@ void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, + writeq(control, base + MLXBF_GIGE_CONTROL); + } + ++void mlxbf_gige_disable_mac_rx_filter(struct mlxbf_gige *priv, ++ unsigned int index) ++{ ++ void __iomem *base = priv->base; ++ u64 control; ++ ++ /* Disable MAC receive filter mask for specified index */ ++ control = readq(base + MLXBF_GIGE_CONTROL); ++ control &= ~(MLXBF_GIGE_CONTROL_EN_SPECIFIC_MAC << index); ++ writeq(control, base + MLXBF_GIGE_CONTROL); ++} ++ ++void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, ++ unsigned int index, u64 dmac) ++{ ++ void __iomem *base = priv->base; ++ ++ /* Write destination MAC to specified MAC RX filter */ ++ writeq(dmac, base + MLXBF_GIGE_RX_MAC_FILTER + ++ (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE)); ++} ++ + void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index, u64 *dmac) + { +-- +2.43.0 + diff --git a/queue-6.6/mptcp-correct-mptcp_subflow_attr_ssn_offset-reserved.patch b/queue-6.6/mptcp-correct-mptcp_subflow_attr_ssn_offset-reserved.patch new file mode 100644 index 00000000000..172a9d44ab1 --- /dev/null +++ b/queue-6.6/mptcp-correct-mptcp_subflow_attr_ssn_offset-reserved.patch @@ -0,0 +1,42 @@ +From d9369435cbbbe25d0b8b291c2be195aebbd6ccfa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 12 Aug 2024 08:51:23 +0200 +Subject: mptcp: correct MPTCP_SUBFLOW_ATTR_SSN_OFFSET reserved size + +From: Eugene Syromiatnikov + +[ Upstream commit 655111b838cdabdb604f3625a9ff08c5eedb11da ] + +ssn_offset field is u32 and is placed into the netlink response with +nla_put_u32(), but only 2 bytes are reserved for the attribute payload +in subflow_get_info_size() (even though it makes no difference +in the end, as it is aligned up to 4 bytes). Supply the correct +argument to the relevant nla_total_size() call to make it less +confusing. + +Fixes: 5147dfb50832 ("mptcp: allow dumping subflow context to userspace") +Signed-off-by: Eugene Syromiatnikov +Reviewed-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20240812065024.GA19719@asgard.redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/mptcp/diag.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c +index 7017dd60659dc..b2199cc282384 100644 +--- a/net/mptcp/diag.c ++++ b/net/mptcp/diag.c +@@ -95,7 +95,7 @@ static size_t subflow_get_info_size(const struct sock *sk) + nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ */ + nla_total_size_64bit(8) + /* MPTCP_SUBFLOW_ATTR_MAP_SEQ */ + nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_MAP_SFSEQ */ +- nla_total_size(2) + /* MPTCP_SUBFLOW_ATTR_SSN_OFFSET */ ++ nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_SSN_OFFSET */ + nla_total_size(2) + /* MPTCP_SUBFLOW_ATTR_MAP_DATALEN */ + nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_FLAGS */ + nla_total_size(1) + /* MPTCP_SUBFLOW_ATTR_ID_REM */ +-- +2.43.0 + diff --git a/queue-6.6/net-axienet-fix-register-defines-comment-description.patch b/queue-6.6/net-axienet-fix-register-defines-comment-description.patch new file mode 100644 index 00000000000..984e7de13ae --- /dev/null +++ b/queue-6.6/net-axienet-fix-register-defines-comment-description.patch @@ -0,0 +1,62 @@ +From 3c01bad3cae11db14f7d0d1b96511bbd1c2c6e33 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 9 Aug 2024 11:56:09 +0530 +Subject: net: axienet: Fix register defines comment description + +From: Radhey Shyam Pandey + +[ Upstream commit 9ff2f816e2aa65ca9a1cdf0954842f8173c0f48d ] + +In axiethernet header fix register defines comment description to be +inline with IP documentation. It updates MAC configuration register, +MDIO configuration register and frame filter control description. + +Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") +Signed-off-by: Radhey Shyam Pandey +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/xilinx/xilinx_axienet.h | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h +index 575ff9de8985b..a62c2b4c6b2f2 100644 +--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h ++++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h +@@ -159,16 +159,16 @@ + #define XAE_RCW1_OFFSET 0x00000404 /* Rx Configuration Word 1 */ + #define XAE_TC_OFFSET 0x00000408 /* Tx Configuration */ + #define XAE_FCC_OFFSET 0x0000040C /* Flow Control Configuration */ +-#define XAE_EMMC_OFFSET 0x00000410 /* EMAC mode configuration */ +-#define XAE_PHYC_OFFSET 0x00000414 /* RGMII/SGMII configuration */ ++#define XAE_EMMC_OFFSET 0x00000410 /* MAC speed configuration */ ++#define XAE_PHYC_OFFSET 0x00000414 /* RX Max Frame Configuration */ + #define XAE_ID_OFFSET 0x000004F8 /* Identification register */ +-#define XAE_MDIO_MC_OFFSET 0x00000500 /* MII Management Config */ +-#define XAE_MDIO_MCR_OFFSET 0x00000504 /* MII Management Control */ +-#define XAE_MDIO_MWD_OFFSET 0x00000508 /* MII Management Write Data */ +-#define XAE_MDIO_MRD_OFFSET 0x0000050C /* MII Management Read Data */ ++#define XAE_MDIO_MC_OFFSET 0x00000500 /* MDIO Setup */ ++#define XAE_MDIO_MCR_OFFSET 0x00000504 /* MDIO Control */ ++#define XAE_MDIO_MWD_OFFSET 0x00000508 /* MDIO Write Data */ ++#define XAE_MDIO_MRD_OFFSET 0x0000050C /* MDIO Read Data */ + #define XAE_UAW0_OFFSET 0x00000700 /* Unicast address word 0 */ + #define XAE_UAW1_OFFSET 0x00000704 /* Unicast address word 1 */ +-#define XAE_FMI_OFFSET 0x00000708 /* Filter Mask Index */ ++#define XAE_FMI_OFFSET 0x00000708 /* Frame Filter Control */ + #define XAE_AF0_OFFSET 0x00000710 /* Address Filter 0 */ + #define XAE_AF1_OFFSET 0x00000714 /* Address Filter 1 */ + +@@ -307,7 +307,7 @@ + */ + #define XAE_UAW1_UNICASTADDR_MASK 0x0000FFFF + +-/* Bit masks for Axi Ethernet FMI register */ ++/* Bit masks for Axi Ethernet FMC register */ + #define XAE_FMI_PM_MASK 0x80000000 /* Promis. mode enable */ + #define XAE_FMI_IND_MASK 0x00000003 /* Index Mask */ + +-- +2.43.0 + diff --git a/queue-6.6/net-dsa-vsc73xx-check-busy-flag-in-mdio-operations.patch b/queue-6.6/net-dsa-vsc73xx-check-busy-flag-in-mdio-operations.patch new file mode 100644 index 00000000000..4360419b46a --- /dev/null +++ b/queue-6.6/net-dsa-vsc73xx-check-busy-flag-in-mdio-operations.patch @@ -0,0 +1,114 @@ +From e3b3bc1e22a96c0c77e96327911f9f990912fdec Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 9 Aug 2024 21:38:04 +0200 +Subject: net: dsa: vsc73xx: check busy flag in MDIO operations + +From: Pawel Dembicki + +[ Upstream commit fa63c6434b6f6aaf9d8d599dc899bc0a074cc0ad ] + +The VSC73xx has a busy flag used during MDIO operations. It is raised +when MDIO read/write operations are in progress. Without it, PHYs are +misconfigured and bus operations do not work as expected. + +Fixes: 05bd97fc559d ("net: dsa: Add Vitesse VSC73xx DSA router driver") +Reviewed-by: Linus Walleij +Reviewed-by: Florian Fainelli +Signed-off-by: Pawel Dembicki +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/vitesse-vsc73xx-core.c | 37 +++++++++++++++++++++++++- + 1 file changed, 36 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c +index d8f368df8b06f..23bd8b3f89931 100644 +--- a/drivers/net/dsa/vitesse-vsc73xx-core.c ++++ b/drivers/net/dsa/vitesse-vsc73xx-core.c +@@ -38,6 +38,10 @@ + #define VSC73XX_BLOCK_ARBITER 0x5 /* Only subblock 0 */ + #define VSC73XX_BLOCK_SYSTEM 0x7 /* Only subblock 0 */ + ++/* MII Block subblock */ ++#define VSC73XX_BLOCK_MII_INTERNAL 0x0 /* Internal MDIO subblock */ ++#define VSC73XX_BLOCK_MII_EXTERNAL 0x1 /* External MDIO subblock */ ++ + #define CPU_PORT 6 /* CPU port */ + + /* MAC Block registers */ +@@ -196,6 +200,8 @@ + #define VSC73XX_MII_CMD 0x1 + #define VSC73XX_MII_DATA 0x2 + ++#define VSC73XX_MII_STAT_BUSY BIT(3) ++ + /* Arbiter block 5 registers */ + #define VSC73XX_ARBEMPTY 0x0c + #define VSC73XX_ARBDISC 0x0e +@@ -270,6 +276,7 @@ + #define IS_739X(a) (IS_7395(a) || IS_7398(a)) + + #define VSC73XX_POLL_SLEEP_US 1000 ++#define VSC73XX_MDIO_POLL_SLEEP_US 5 + #define VSC73XX_POLL_TIMEOUT_US 10000 + + struct vsc73xx_counter { +@@ -487,6 +494,22 @@ static int vsc73xx_detect(struct vsc73xx *vsc) + return 0; + } + ++static int vsc73xx_mdio_busy_check(struct vsc73xx *vsc) ++{ ++ int ret, err; ++ u32 val; ++ ++ ret = read_poll_timeout(vsc73xx_read, err, ++ err < 0 || !(val & VSC73XX_MII_STAT_BUSY), ++ VSC73XX_MDIO_POLL_SLEEP_US, ++ VSC73XX_POLL_TIMEOUT_US, false, vsc, ++ VSC73XX_BLOCK_MII, VSC73XX_BLOCK_MII_INTERNAL, ++ VSC73XX_MII_STAT, &val); ++ if (ret) ++ return ret; ++ return err; ++} ++ + static int vsc73xx_phy_read(struct dsa_switch *ds, int phy, int regnum) + { + struct vsc73xx *vsc = ds->priv; +@@ -494,12 +517,20 @@ static int vsc73xx_phy_read(struct dsa_switch *ds, int phy, int regnum) + u32 val; + int ret; + ++ ret = vsc73xx_mdio_busy_check(vsc); ++ if (ret) ++ return ret; ++ + /* Setting bit 26 means "read" */ + cmd = BIT(26) | (phy << 21) | (regnum << 16); + ret = vsc73xx_write(vsc, VSC73XX_BLOCK_MII, 0, 1, cmd); + if (ret) + return ret; +- msleep(2); ++ ++ ret = vsc73xx_mdio_busy_check(vsc); ++ if (ret) ++ return ret; ++ + ret = vsc73xx_read(vsc, VSC73XX_BLOCK_MII, 0, 2, &val); + if (ret) + return ret; +@@ -523,6 +554,10 @@ static int vsc73xx_phy_write(struct dsa_switch *ds, int phy, int regnum, + u32 cmd; + int ret; + ++ ret = vsc73xx_mdio_busy_check(vsc); ++ if (ret) ++ return ret; ++ + /* It was found through tedious experiments that this router + * chip really hates to have it's PHYs reset. They + * never recover if that happens: autonegotiation stops +-- +2.43.0 + diff --git a/queue-6.6/net-dsa-vsc73xx-pass-value-in-phy_write-operation.patch b/queue-6.6/net-dsa-vsc73xx-pass-value-in-phy_write-operation.patch new file mode 100644 index 00000000000..fe43b519d68 --- /dev/null +++ b/queue-6.6/net-dsa-vsc73xx-pass-value-in-phy_write-operation.patch @@ -0,0 +1,40 @@ +From 3d9485f610eb2f9d0984d48a827708fdc36120b0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 9 Aug 2024 21:38:03 +0200 +Subject: net: dsa: vsc73xx: pass value in phy_write operation + +From: Pawel Dembicki + +[ Upstream commit 5b9eebc2c7a5f0cc7950d918c1e8a4ad4bed5010 ] + +In the 'vsc73xx_phy_write' function, the register value is missing, +and the phy write operation always sends zeros. + +This commit passes the value variable into the proper register. + +Fixes: 05bd97fc559d ("net: dsa: Add Vitesse VSC73xx DSA router driver") +Reviewed-by: Linus Walleij +Reviewed-by: Florian Fainelli +Signed-off-by: Pawel Dembicki +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/vitesse-vsc73xx-core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c +index c99fb1bd4c25a..dedebb95ece6c 100644 +--- a/drivers/net/dsa/vitesse-vsc73xx-core.c ++++ b/drivers/net/dsa/vitesse-vsc73xx-core.c +@@ -530,7 +530,7 @@ static int vsc73xx_phy_write(struct dsa_switch *ds, int phy, int regnum, + return 0; + } + +- cmd = (phy << 21) | (regnum << 16); ++ cmd = (phy << 21) | (regnum << 16) | val; + ret = vsc73xx_write(vsc, VSC73XX_BLOCK_MII, 0, 1, cmd); + if (ret) + return ret; +-- +2.43.0 + diff --git a/queue-6.6/net-dsa-vsc73xx-use-read_poll_timeout-instead-delay-.patch b/queue-6.6/net-dsa-vsc73xx-use-read_poll_timeout-instead-delay-.patch new file mode 100644 index 00000000000..4a2ca6068b3 --- /dev/null +++ b/queue-6.6/net-dsa-vsc73xx-use-read_poll_timeout-instead-delay-.patch @@ -0,0 +1,100 @@ +From 6b41a0ebc4aaa5410131fb3bdcd27f9476965601 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 17 Apr 2024 22:50:44 +0200 +Subject: net: dsa: vsc73xx: use read_poll_timeout instead delay loop + +From: Pawel Dembicki + +[ Upstream commit eb7e33d01db3aec128590391b2397384bab406b6 ] + +Switch the delay loop during the Arbiter empty check from +vsc73xx_adjust_link() to use read_poll_timeout(). Functionally, +one msleep() call is eliminated at the end of the loop in the timeout +case. + +As Russell King suggested: + +"This [change] avoids the issue that on the last iteration, the code reads +the register, tests it, finds the condition that's being waiting for is +false, _then_ waits and end up printing the error message - that last +wait is rather useless, and as the arbiter state isn't checked after +waiting, it could be that we had success during the last wait." + +Suggested-by: Russell King +Reviewed-by: Andrew Lunn +Reviewed-by: Linus Walleij +Reviewed-by: Florian Fainelli +Signed-off-by: Pawel Dembicki +Link: https://lore.kernel.org/r/20240417205048.3542839-2-paweldembicki@gmail.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: fa63c6434b6f ("net: dsa: vsc73xx: check busy flag in MDIO operations") +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/vitesse-vsc73xx-core.c | 30 ++++++++++++++------------ + 1 file changed, 16 insertions(+), 14 deletions(-) + +diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c +index dedebb95ece6c..d8f368df8b06f 100644 +--- a/drivers/net/dsa/vitesse-vsc73xx-core.c ++++ b/drivers/net/dsa/vitesse-vsc73xx-core.c +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -268,6 +269,9 @@ + #define IS_7398(a) ((a)->chipid == VSC73XX_CHIPID_ID_7398) + #define IS_739X(a) (IS_7395(a) || IS_7398(a)) + ++#define VSC73XX_POLL_SLEEP_US 1000 ++#define VSC73XX_POLL_TIMEOUT_US 10000 ++ + struct vsc73xx_counter { + u8 counter; + const char *name; +@@ -779,7 +783,7 @@ static void vsc73xx_adjust_link(struct dsa_switch *ds, int port, + * after a PHY or the CPU port comes up or down. + */ + if (!phydev->link) { +- int maxloop = 10; ++ int ret, err; + + dev_dbg(vsc->dev, "port %d: went down\n", + port); +@@ -794,19 +798,17 @@ static void vsc73xx_adjust_link(struct dsa_switch *ds, int port, + VSC73XX_ARBDISC, BIT(port), BIT(port)); + + /* Wait until queue is empty */ +- vsc73xx_read(vsc, VSC73XX_BLOCK_ARBITER, 0, +- VSC73XX_ARBEMPTY, &val); +- while (!(val & BIT(port))) { +- msleep(1); +- vsc73xx_read(vsc, VSC73XX_BLOCK_ARBITER, 0, +- VSC73XX_ARBEMPTY, &val); +- if (--maxloop == 0) { +- dev_err(vsc->dev, +- "timeout waiting for block arbiter\n"); +- /* Continue anyway */ +- break; +- } +- } ++ ret = read_poll_timeout(vsc73xx_read, err, ++ err < 0 || (val & BIT(port)), ++ VSC73XX_POLL_SLEEP_US, ++ VSC73XX_POLL_TIMEOUT_US, false, ++ vsc, VSC73XX_BLOCK_ARBITER, 0, ++ VSC73XX_ARBEMPTY, &val); ++ if (ret) ++ dev_err(vsc->dev, ++ "timeout waiting for block arbiter\n"); ++ else if (err < 0) ++ dev_err(vsc->dev, "error reading arbiter\n"); + + /* Put this port into reset */ + vsc73xx_write(vsc, VSC73XX_BLOCK_MAC, port, VSC73XX_MAC_CFG, +-- +2.43.0 + diff --git a/queue-6.6/net-ethernet-mtk_wed-fix-use-after-free-panic-in-mtk.patch b/queue-6.6/net-ethernet-mtk_wed-fix-use-after-free-panic-in-mtk.patch new file mode 100644 index 00000000000..d43608fce81 --- /dev/null +++ b/queue-6.6/net-ethernet-mtk_wed-fix-use-after-free-panic-in-mtk.patch @@ -0,0 +1,70 @@ +From 27604f03e6afa3369a7e71176b36887bad0b03fa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 10 Aug 2024 13:26:51 +0800 +Subject: net: ethernet: mtk_wed: fix use-after-free panic in + mtk_wed_setup_tc_block_cb() + +From: Zheng Zhang + +[ Upstream commit db1b4bedb9b97c6d34b03d03815147c04fffe8b4 ] + +When there are multiple ap interfaces on one band and with WED on, +turning the interface down will cause a kernel panic on MT798X. + +Previously, cb_priv was freed in mtk_wed_setup_tc_block() without +marking NULL,and mtk_wed_setup_tc_block_cb() didn't check the value, too. + +Assign NULL after free cb_priv in mtk_wed_setup_tc_block() and check NULL +in mtk_wed_setup_tc_block_cb(). + +---------- +Unable to handle kernel paging request at virtual address 0072460bca32b4f5 +Call trace: + mtk_wed_setup_tc_block_cb+0x4/0x38 + 0xffffffc0794084bc + tcf_block_playback_offloads+0x70/0x1e8 + tcf_block_unbind+0x6c/0xc8 +... +--------- + +Fixes: 799684448e3e ("net: ethernet: mtk_wed: introduce wed wo support") +Signed-off-by: Zheng Zhang +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mediatek/mtk_wed.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c +index c7196055c8c98..85a9ad2b86bff 100644 +--- a/drivers/net/ethernet/mediatek/mtk_wed.c ++++ b/drivers/net/ethernet/mediatek/mtk_wed.c +@@ -1762,14 +1762,15 @@ mtk_wed_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_pri + { + struct mtk_wed_flow_block_priv *priv = cb_priv; + struct flow_cls_offload *cls = type_data; +- struct mtk_wed_hw *hw = priv->hw; ++ struct mtk_wed_hw *hw = NULL; + +- if (!tc_can_offload(priv->dev)) ++ if (!priv || !tc_can_offload(priv->dev)) + return -EOPNOTSUPP; + + if (type != TC_SETUP_CLSFLOWER) + return -EOPNOTSUPP; + ++ hw = priv->hw; + return mtk_flow_offload_cmd(hw->eth, cls, hw->index); + } + +@@ -1825,6 +1826,7 @@ mtk_wed_setup_tc_block(struct mtk_wed_hw *hw, struct net_device *dev, + flow_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); + kfree(block_cb->cb_priv); ++ block_cb->cb_priv = NULL; + } + return 0; + default: +-- +2.43.0 + diff --git a/queue-6.6/net-hns3-fix-a-deadlock-problem-when-config-tc-durin.patch b/queue-6.6/net-hns3-fix-a-deadlock-problem-when-config-tc-durin.patch new file mode 100644 index 00000000000..901eb581731 --- /dev/null +++ b/queue-6.6/net-hns3-fix-a-deadlock-problem-when-config-tc-durin.patch @@ -0,0 +1,76 @@ +From 26503528b4064262e71958fce2e7e95e9c13e55e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 13 Aug 2024 22:10:22 +0800 +Subject: net: hns3: fix a deadlock problem when config TC during resetting +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jie Wang + +[ Upstream commit be5e816d00a506719e9dbb1a9c861c5ced30a109 ] + +When config TC during the reset process, may cause a deadlock, the flow is +as below: + pf reset start + │ + ▼ + ...... +setup tc │ + │ ▼ + ▼ DOWN: napi_disable() +napi_disable()(skip) │ + │ │ + ▼ ▼ + ...... ...... + │ │ + ▼ │ +napi_enable() │ + ▼ + UINIT: netif_napi_del() + │ + ▼ + ...... + │ + ▼ + INIT: netif_napi_add() + │ + ▼ + ...... global reset start + │ │ + ▼ ▼ + UP: napi_enable()(skip) ...... + │ │ + ▼ ▼ + ...... napi_disable() + +In reset process, the driver will DOWN the port and then UINIT, in this +case, the setup tc process will UP the port before UINIT, so cause the +problem. Adds a DOWN process in UINIT to fix it. + +Fixes: bb6b94a896d4 ("net: hns3: Add reset interface implementation in client") +Signed-off-by: Jie Wang +Signed-off-by: Jijie Shao +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +index db9574e9fb7bc..14d086b535a2d 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +@@ -5729,6 +5729,9 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle) + struct net_device *netdev = handle->kinfo.netdev; + struct hns3_nic_priv *priv = netdev_priv(netdev); + ++ if (!test_bit(HNS3_NIC_STATE_DOWN, &priv->state)) ++ hns3_nic_net_stop(netdev); ++ + if (!test_and_clear_bit(HNS3_NIC_STATE_INITED, &priv->state)) { + netdev_warn(netdev, "already uninitialized\n"); + return 0; +-- +2.43.0 + diff --git a/queue-6.6/net-hns3-fix-wrong-use-of-semaphore-up.patch b/queue-6.6/net-hns3-fix-wrong-use-of-semaphore-up.patch new file mode 100644 index 00000000000..9f4992d692c --- /dev/null +++ b/queue-6.6/net-hns3-fix-wrong-use-of-semaphore-up.patch @@ -0,0 +1,61 @@ +From 6172798f0aa47aa1827aa03aab32e389715cfe8f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 13 Aug 2024 22:10:20 +0800 +Subject: net: hns3: fix wrong use of semaphore up + +From: Jie Wang + +[ Upstream commit 8445d9d3c03101859663d34fda747f6a50947556 ] + +Currently, if hns3 PF or VF FLR reset failed after five times retry, +the reset done process will directly release the semaphore +which has already released in hclge_reset_prepare_general. +This will cause down operation fail. + +So this patch fixes it by adding reset state judgement. The up operation is +only called after successful PF FLR reset. + +Fixes: 8627bdedc435 ("net: hns3: refactor the precedure of PF FLR") +Fixes: f28368bb4542 ("net: hns3: refactor the procedure of VF FLR") +Signed-off-by: Jie Wang +Signed-off-by: Jijie Shao +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 4 ++-- + drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 4 ++-- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +index c8059d96f64be..ad8e56234b284 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +@@ -11430,8 +11430,8 @@ static void hclge_reset_done(struct hnae3_ae_dev *ae_dev) + dev_err(&hdev->pdev->dev, "fail to rebuild, ret=%d\n", ret); + + hdev->reset_type = HNAE3_NONE_RESET; +- clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); +- up(&hdev->reset_sem); ++ if (test_and_clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) ++ up(&hdev->reset_sem); + } + + static void hclge_clear_resetting_state(struct hclge_dev *hdev) +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +index 43ee20eb03d1f..affdd9d70549a 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +@@ -1710,8 +1710,8 @@ static void hclgevf_reset_done(struct hnae3_ae_dev *ae_dev) + ret); + + hdev->reset_type = HNAE3_NONE_RESET; +- clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state); +- up(&hdev->reset_sem); ++ if (test_and_clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) ++ up(&hdev->reset_sem); + } + + static u32 hclgevf_get_fw_version(struct hnae3_handle *handle) +-- +2.43.0 + diff --git a/queue-6.6/net-hns3-use-the-user-s-cfg-after-reset.patch b/queue-6.6/net-hns3-use-the-user-s-cfg-after-reset.patch new file mode 100644 index 00000000000..bc3c5a4f6f8 --- /dev/null +++ b/queue-6.6/net-hns3-use-the-user-s-cfg-after-reset.patch @@ -0,0 +1,122 @@ +From 68a3c11f5b9b6eeb12f10850d4e9fed31bd97e01 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 13 Aug 2024 22:10:21 +0800 +Subject: net: hns3: use the user's cfg after reset + +From: Peiyang Wang + +[ Upstream commit 30545e17eac1f50c5ef49644daf6af205100a965 ] + +Consider the followed case that the user change speed and reset the net +interface. Before the hw change speed successfully, the driver get old +old speed from hw by timer task. After reset, the previous speed is config +to hw. As a result, the new speed is configed successfully but lost after +PF reset. The followed pictured shows more dirrectly. + ++------+ +----+ +----+ +| USER | | PF | | HW | ++---+--+ +-+--+ +-+--+ + | ethtool -s 100G | | + +------------------>| set speed 100G | + | +--------------------->| + | | set successfully | + | |<---------------------+---+ + | |query cfg (timer task)| | + | +--------------------->| | handle speed + | | return 200G | | changing event + | ethtool --reset |<---------------------+ | (100G) + +------------------>| cfg previous speed |<--+ + | | after reset (200G) | + | +--------------------->| + | | +---+ + | |query cfg (timer task)| | + | +--------------------->| | handle speed + | | return 100G | | changing event + | |<---------------------+ | (200G) + | | |<--+ + | |query cfg (timer task)| + | +--------------------->| + | | return 200G | + | |<---------------------+ + | | | + v v v + +This patch save new speed if hw change speed successfully, which will be +used after reset successfully. + +Fixes: 2d03eacc0b7e ("net: hns3: Only update mac configuation when necessary") +Signed-off-by: Peiyang Wang +Signed-off-by: Jijie Shao +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + .../hisilicon/hns3/hns3pf/hclge_main.c | 24 ++++++++++++++----- + .../hisilicon/hns3/hns3pf/hclge_mdio.c | 3 +++ + 2 files changed, 21 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +index ad8e56234b284..92c592c177e67 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +@@ -2598,8 +2598,17 @@ static int hclge_cfg_mac_speed_dup_h(struct hnae3_handle *handle, int speed, + { + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; ++ int ret; ++ ++ ret = hclge_cfg_mac_speed_dup(hdev, speed, duplex, lane_num); + +- return hclge_cfg_mac_speed_dup(hdev, speed, duplex, lane_num); ++ if (ret) ++ return ret; ++ ++ hdev->hw.mac.req_speed = speed; ++ hdev->hw.mac.req_duplex = duplex; ++ ++ return 0; + } + + static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable) +@@ -2901,17 +2910,20 @@ static int hclge_mac_init(struct hclge_dev *hdev) + if (!test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + hdev->hw.mac.duplex = HCLGE_MAC_FULL; + +- ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.speed, +- hdev->hw.mac.duplex, hdev->hw.mac.lane_num); +- if (ret) +- return ret; +- + if (hdev->hw.mac.support_autoneg) { + ret = hclge_set_autoneg_en(hdev, hdev->hw.mac.autoneg); + if (ret) + return ret; + } + ++ if (!hdev->hw.mac.autoneg) { ++ ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.req_speed, ++ hdev->hw.mac.req_duplex, ++ hdev->hw.mac.lane_num); ++ if (ret) ++ return ret; ++ } ++ + mac->link = 0; + + if (mac->user_fec_mode & BIT(HNAE3_FEC_USER_DEF)) { +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +index 85fb11de43a12..80079657afebe 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +@@ -191,6 +191,9 @@ static void hclge_mac_adjust_link(struct net_device *netdev) + if (ret) + netdev_err(netdev, "failed to adjust link.\n"); + ++ hdev->hw.mac.req_speed = (u32)speed; ++ hdev->hw.mac.req_duplex = (u8)duplex; ++ + ret = hclge_cfg_flowctrl(hdev); + if (ret) + netdev_err(netdev, "failed to configure flow control.\n"); +-- +2.43.0 + diff --git a/queue-6.6/net-mlx5e-correctly-report-errors-for-ethtool-rx-flo.patch b/queue-6.6/net-mlx5e-correctly-report-errors-for-ethtool-rx-flo.patch new file mode 100644 index 00000000000..cba66dd4101 --- /dev/null +++ b/queue-6.6/net-mlx5e-correctly-report-errors-for-ethtool-rx-flo.patch @@ -0,0 +1,46 @@ +From 61442255cd8ad450052df8f0330b8b80755918e5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Aug 2024 17:41:05 +0300 +Subject: net/mlx5e: Correctly report errors for ethtool rx flows + +From: Cosmin Ratiu + +[ Upstream commit cbc796be1779c4dbc9a482c7233995e2a8b6bfb3 ] + +Previously, an ethtool rx flow with no attrs would not be added to the +NIC as it has no rules to configure the hw with, but it would be +reported as successful to the caller (return code 0). This is confusing +for the user as ethtool then reports "Added rule $num", but no rule was +actually added. + +This change corrects that by instead reporting these wrong rules as +-EINVAL. + +Fixes: b29c61dac3a2 ("net/mlx5e: Ethtool steering flow validation refactoring") +Signed-off-by: Cosmin Ratiu +Reviewed-by: Saeed Mahameed +Reviewed-by: Dragos Tatulea +Signed-off-by: Tariq Toukan +Link: https://patch.msgid.link/20240808144107.2095424-5-tariqt@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +index 3eccdadc03578..773624bb2c5d5 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +@@ -734,7 +734,7 @@ mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, + if (num_tuples <= 0) { + netdev_warn(priv->netdev, "%s: flow is not valid %d\n", + __func__, num_tuples); +- return num_tuples; ++ return num_tuples < 0 ? num_tuples : -EINVAL; + } + + eth_ft = get_flow_table(priv, fs, num_tuples); +-- +2.43.0 + diff --git a/queue-6.6/net-mlx5e-take-state-lock-during-tx-timeout-reporter.patch b/queue-6.6/net-mlx5e-take-state-lock-during-tx-timeout-reporter.patch new file mode 100644 index 00000000000..7d4691347f0 --- /dev/null +++ b/queue-6.6/net-mlx5e-take-state-lock-during-tx-timeout-reporter.patch @@ -0,0 +1,45 @@ +From d3c5d157234107e35ae25ec6e0b1175c3b332c8d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Aug 2024 17:41:04 +0300 +Subject: net/mlx5e: Take state lock during tx timeout reporter + +From: Dragos Tatulea + +[ Upstream commit e6b5afd30b99b43682a7764e1a74a42fe4d5f4b3 ] + +mlx5e_safe_reopen_channels() requires the state lock taken. The +referenced changed in the Fixes tag removed the lock to fix another +issue. This patch adds it back but at a later point (when calling +mlx5e_safe_reopen_channels()) to avoid the deadlock referenced in the +Fixes tag. + +Fixes: eab0da38912e ("net/mlx5e: Fix possible deadlock on mlx5e_tx_timeout_work") +Signed-off-by: Dragos Tatulea +Link: https://lore.kernel.org/all/ZplpKq8FKi3vwfxv@gmail.com/T/ +Reviewed-by: Breno Leitao +Reviewed-by: Moshe Shemesh +Signed-off-by: Tariq Toukan +Link: https://patch.msgid.link/20240808144107.2095424-4-tariqt@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +index ff8242f67c545..51a23345caa18 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +@@ -149,7 +149,9 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx) + return err; + } + ++ mutex_lock(&priv->state_lock); + err = mlx5e_safe_reopen_channels(priv); ++ mutex_unlock(&priv->state_lock); + if (!err) { + to_ctx->status = 1; /* all channels recovered */ + return err; +-- +2.43.0 + diff --git a/queue-6.6/netfilter-allow-ipv6-fragments-to-arrive-on-differen.patch b/queue-6.6/netfilter-allow-ipv6-fragments-to-arrive-on-differen.patch new file mode 100644 index 00000000000..303343b72a1 --- /dev/null +++ b/queue-6.6/netfilter-allow-ipv6-fragments-to-arrive-on-differen.patch @@ -0,0 +1,46 @@ +From c44c31eb9a64211d0d41e92cfc1fecdefd6a1a55 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 6 Aug 2024 12:40:52 +0100 +Subject: netfilter: allow ipv6 fragments to arrive on different devices + +From: Tom Hughes + +[ Upstream commit 3cd740b985963f874a1a094f1969e998b9d05554 ] + +Commit 264640fc2c5f4 ("ipv6: distinguish frag queues by device +for multicast and link-local packets") modified the ipv6 fragment +reassembly logic to distinguish frag queues by device for multicast +and link-local packets but in fact only the main reassembly code +limits the use of the device to those address types and the netfilter +reassembly code uses the device for all packets. + +This means that if fragments of a packet arrive on different interfaces +then netfilter will fail to reassemble them and the fragments will be +expired without going any further through the filters. + +Fixes: 648700f76b03 ("inet: frags: use rhashtables for reassembly units") +Signed-off-by: Tom Hughes +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/ipv6/netfilter/nf_conntrack_reasm.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c +index efbec7ee27d0a..c78b13ea5b196 100644 +--- a/net/ipv6/netfilter/nf_conntrack_reasm.c ++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c +@@ -155,6 +155,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, + }; + struct inet_frag_queue *q; + ++ if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST | ++ IPV6_ADDR_LINKLOCAL))) ++ key.iif = 0; ++ + q = inet_frag_find(nf_frag->fqdir, &key); + if (!q) + return NULL; +-- +2.43.0 + diff --git a/queue-6.6/netfilter-flowtable-initialise-extack-before-use.patch b/queue-6.6/netfilter-flowtable-initialise-extack-before-use.patch new file mode 100644 index 00000000000..56f59730826 --- /dev/null +++ b/queue-6.6/netfilter-flowtable-initialise-extack-before-use.patch @@ -0,0 +1,37 @@ +From e586e363d3f239c86468db4df24dca5f00b9ca7f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 6 Aug 2024 17:16:37 +0100 +Subject: netfilter: flowtable: initialise extack before use + +From: Donald Hunter + +[ Upstream commit e9767137308daf906496613fd879808a07f006a2 ] + +Fix missing initialisation of extack in flow offload. + +Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support") +Signed-off-by: Donald Hunter +Reviewed-by: Simon Horman +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_flow_table_offload.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c +index a010b25076ca0..3d46372b538e5 100644 +--- a/net/netfilter/nf_flow_table_offload.c ++++ b/net/netfilter/nf_flow_table_offload.c +@@ -841,8 +841,8 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, + struct list_head *block_cb_list) + { + struct flow_cls_offload cls_flow = {}; ++ struct netlink_ext_ack extack = {}; + struct flow_block_cb *block_cb; +- struct netlink_ext_ack extack; + __be16 proto = ETH_P_ALL; + int err, i = 0; + +-- +2.43.0 + diff --git a/queue-6.6/netfilter-nf_queue-drop-packets-with-cloned-unconfir.patch b/queue-6.6/netfilter-nf_queue-drop-packets-with-cloned-unconfir.patch new file mode 100644 index 00000000000..09125dd4dea --- /dev/null +++ b/queue-6.6/netfilter-nf_queue-drop-packets-with-cloned-unconfir.patch @@ -0,0 +1,109 @@ +From 5d23589c61e95780a06e20d2bcf60fd6d09786fd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 7 Aug 2024 21:28:41 +0200 +Subject: netfilter: nf_queue: drop packets with cloned unconfirmed conntracks + +From: Florian Westphal + +[ Upstream commit 7d8dc1c7be8d3509e8f5164dd5df64c8e34d7eeb ] + +Conntrack assumes an unconfirmed entry (not yet committed to global hash +table) has a refcount of 1 and is not visible to other cores. + +With multicast forwarding this assumption breaks down because such +skbs get cloned after being picked up, i.e. ct->use refcount is > 1. + +Likewise, bridge netfilter will clone broad/mutlicast frames and +all frames in case they need to be flood-forwarded during learning +phase. + +For ip multicast forwarding or plain bridge flood-forward this will +"work" because packets don't leave softirq and are implicitly +serialized. + +With nfqueue this no longer holds true, the packets get queued +and can be reinjected in arbitrary ways. + +Disable this feature, I see no other solution. + +After this patch, nfqueue cannot queue packets except the last +multicast/broadcast packet. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/bridge/br_netfilter_hooks.c | 6 +++++- + net/netfilter/nfnetlink_queue.c | 35 +++++++++++++++++++++++++++++++-- + 2 files changed, 38 insertions(+), 3 deletions(-) + +diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c +index d848c84ed030d..68d5538613032 100644 +--- a/net/bridge/br_netfilter_hooks.c ++++ b/net/bridge/br_netfilter_hooks.c +@@ -618,8 +618,12 @@ static unsigned int br_nf_local_in(void *priv, + if (likely(nf_ct_is_confirmed(ct))) + return NF_ACCEPT; + ++ if (WARN_ON_ONCE(refcount_read(&nfct->use) != 1)) { ++ nf_reset_ct(skb); ++ return NF_ACCEPT; ++ } ++ + WARN_ON_ONCE(skb_shared(skb)); +- WARN_ON_ONCE(refcount_read(&nfct->use) != 1); + + /* We can't call nf_confirm here, it would create a dependency + * on nf_conntrack module. +diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c +index dfc856b3e1fa4..09209b4952ad1 100644 +--- a/net/netfilter/nfnetlink_queue.c ++++ b/net/netfilter/nfnetlink_queue.c +@@ -668,10 +668,41 @@ static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry) + { + #if IS_ENABLED(CONFIG_NF_CONNTRACK) + static const unsigned long flags = IPS_CONFIRMED | IPS_DYING; +- const struct nf_conn *ct = (void *)skb_nfct(entry->skb); ++ struct nf_conn *ct = (void *)skb_nfct(entry->skb); ++ unsigned long status; ++ unsigned int use; + +- if (ct && ((ct->status & flags) == IPS_DYING)) ++ if (!ct) ++ return false; ++ ++ status = READ_ONCE(ct->status); ++ if ((status & flags) == IPS_DYING) + return true; ++ ++ if (status & IPS_CONFIRMED) ++ return false; ++ ++ /* in some cases skb_clone() can occur after initial conntrack ++ * pickup, but conntrack assumes exclusive skb->_nfct ownership for ++ * unconfirmed entries. ++ * ++ * This happens for br_netfilter and with ip multicast routing. ++ * We can't be solved with serialization here because one clone could ++ * have been queued for local delivery. ++ */ ++ use = refcount_read(&ct->ct_general.use); ++ if (likely(use == 1)) ++ return false; ++ ++ /* Can't decrement further? Exclusive ownership. */ ++ if (!refcount_dec_not_one(&ct->ct_general.use)) ++ return false; ++ ++ skb_set_nfct(entry->skb, 0); ++ /* No nf_ct_put(): we already decremented .use and it cannot ++ * drop down to 0. ++ */ ++ return true; + #endif + return false; + } +-- +2.43.0 + diff --git a/queue-6.6/netfilter-nf_tables-a-better-name-for-nft_obj_filter.patch b/queue-6.6/netfilter-nf_tables-a-better-name-for-nft_obj_filter.patch new file mode 100644 index 00000000000..7cb809c11b8 --- /dev/null +++ b/queue-6.6/netfilter-nf_tables-a-better-name-for-nft_obj_filter.patch @@ -0,0 +1,104 @@ +From e692ccbab70b4eb7a2ab559e89f7b2ac03a94d63 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 20 Oct 2023 19:34:30 +0200 +Subject: netfilter: nf_tables: A better name for nft_obj_filter + +From: Phil Sutter + +[ Upstream commit ecf49cad807061d880bea27a5da8e0114ddc7690 ] + +Name it for what it is supposed to become, a real nft_obj_dump_ctx. No +functional change intended. + +Signed-off-by: Phil Sutter +Signed-off-by: Pablo Neira Ayuso +Stable-dep-of: bd662c4218f9 ("netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests") +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 32 ++++++++++++++++---------------- + 1 file changed, 16 insertions(+), 16 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 35d5848cb3d0d..ca5fb700d15cf 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -7717,7 +7717,7 @@ static void audit_log_obj_reset(const struct nft_table *table, + kfree(buf); + } + +-struct nft_obj_filter { ++struct nft_obj_dump_ctx { + char *table; + u32 type; + }; +@@ -7727,7 +7727,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_table *table; + unsigned int idx = 0, s_idx = cb->args[0]; +- struct nft_obj_filter *filter = cb->data; ++ struct nft_obj_dump_ctx *ctx = cb->data; + struct net *net = sock_net(skb->sk); + int family = nfmsg->nfgen_family; + struct nftables_pernet *nft_net; +@@ -7753,10 +7753,10 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) + goto cont; + if (idx < s_idx) + goto cont; +- if (filter->table && strcmp(filter->table, table->name)) ++ if (ctx->table && strcmp(ctx->table, table->name)) + goto cont; +- if (filter->type != NFT_OBJECT_UNSPEC && +- obj->ops->type->type != filter->type) ++ if (ctx->type != NFT_OBJECT_UNSPEC && ++ obj->ops->type->type != ctx->type) + goto cont; + + rc = nf_tables_fill_obj_info(skb, net, +@@ -7788,33 +7788,33 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) + static int nf_tables_dump_obj_start(struct netlink_callback *cb) + { + const struct nlattr * const *nla = cb->data; +- struct nft_obj_filter *filter = NULL; ++ struct nft_obj_dump_ctx *ctx = NULL; + +- filter = kzalloc(sizeof(*filter), GFP_ATOMIC); +- if (!filter) ++ ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC); ++ if (!ctx) + return -ENOMEM; + + if (nla[NFTA_OBJ_TABLE]) { +- filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC); +- if (!filter->table) { +- kfree(filter); ++ ctx->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC); ++ if (!ctx->table) { ++ kfree(ctx); + return -ENOMEM; + } + } + + if (nla[NFTA_OBJ_TYPE]) +- filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); ++ ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); + +- cb->data = filter; ++ cb->data = ctx; + return 0; + } + + static int nf_tables_dump_obj_done(struct netlink_callback *cb) + { +- struct nft_obj_filter *filter = cb->data; ++ struct nft_obj_dump_ctx *ctx = cb->data; + +- kfree(filter->table); +- kfree(filter); ++ kfree(ctx->table); ++ kfree(ctx); + + return 0; + } +-- +2.43.0 + diff --git a/queue-6.6/netfilter-nf_tables-add-locking-for-nft_msg_getobj_r.patch b/queue-6.6/netfilter-nf_tables-add-locking-for-nft_msg_getobj_r.patch new file mode 100644 index 00000000000..087b8e8972a --- /dev/null +++ b/queue-6.6/netfilter-nf_tables-add-locking-for-nft_msg_getobj_r.patch @@ -0,0 +1,155 @@ +From 2d51ffdfa57dde37c52c5abb355ecf29c56a9ebb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 9 Aug 2024 15:07:32 +0200 +Subject: netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests + +From: Phil Sutter + +[ Upstream commit bd662c4218f9648e888bebde9468146965f3f8a0 ] + +Objects' dump callbacks are not concurrency-safe per-se with reset bit +set. If two CPUs perform a reset at the same time, at least counter and +quota objects suffer from value underrun. + +Prevent this by introducing dedicated locking callbacks for nfnetlink +and the asynchronous dump handling to serialize access. + +Fixes: 43da04a593d8 ("netfilter: nf_tables: atomic dump and reset for stateful objects") +Signed-off-by: Phil Sutter +Reviewed-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 72 ++++++++++++++++++++++++++++------- + 1 file changed, 59 insertions(+), 13 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index a3ed9437e21b4..fc99a5e91829d 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -7783,6 +7783,19 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) + return skb->len; + } + ++static int nf_tables_dumpreset_obj(struct sk_buff *skb, ++ struct netlink_callback *cb) ++{ ++ struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); ++ int ret; ++ ++ mutex_lock(&nft_net->commit_mutex); ++ ret = nf_tables_dump_obj(skb, cb); ++ mutex_unlock(&nft_net->commit_mutex); ++ ++ return ret; ++} ++ + static int nf_tables_dump_obj_start(struct netlink_callback *cb) + { + struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; +@@ -7799,12 +7812,18 @@ static int nf_tables_dump_obj_start(struct netlink_callback *cb) + if (nla[NFTA_OBJ_TYPE]) + ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); + +- if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) +- ctx->reset = true; +- + return 0; + } + ++static int nf_tables_dumpreset_obj_start(struct netlink_callback *cb) ++{ ++ struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; ++ ++ ctx->reset = true; ++ ++ return nf_tables_dump_obj_start(cb); ++} ++ + static int nf_tables_dump_obj_done(struct netlink_callback *cb) + { + struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; +@@ -7863,18 +7882,43 @@ nf_tables_getobj_single(u32 portid, const struct nfnl_info *info, + + static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, + const struct nlattr * const nla[]) ++{ ++ u32 portid = NETLINK_CB(skb).portid; ++ struct sk_buff *skb2; ++ ++ if (info->nlh->nlmsg_flags & NLM_F_DUMP) { ++ struct netlink_dump_control c = { ++ .start = nf_tables_dump_obj_start, ++ .dump = nf_tables_dump_obj, ++ .done = nf_tables_dump_obj_done, ++ .module = THIS_MODULE, ++ .data = (void *)nla, ++ }; ++ ++ return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); ++ } ++ ++ skb2 = nf_tables_getobj_single(portid, info, nla, false); ++ if (IS_ERR(skb2)) ++ return PTR_ERR(skb2); ++ ++ return nfnetlink_unicast(skb2, info->net, portid); ++} ++ ++static int nf_tables_getobj_reset(struct sk_buff *skb, ++ const struct nfnl_info *info, ++ const struct nlattr * const nla[]) + { + struct nftables_pernet *nft_net = nft_pernet(info->net); + u32 portid = NETLINK_CB(skb).portid; + struct net *net = info->net; + struct sk_buff *skb2; +- bool reset = false; + char *buf; + + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { +- .start = nf_tables_dump_obj_start, +- .dump = nf_tables_dump_obj, ++ .start = nf_tables_dumpreset_obj_start, ++ .dump = nf_tables_dumpreset_obj, + .done = nf_tables_dump_obj_done, + .module = THIS_MODULE, + .data = (void *)nla, +@@ -7883,16 +7927,18 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, + return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); + } + +- if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) +- reset = true; ++ if (!try_module_get(THIS_MODULE)) ++ return -EINVAL; ++ rcu_read_unlock(); ++ mutex_lock(&nft_net->commit_mutex); ++ skb2 = nf_tables_getobj_single(portid, info, nla, true); ++ mutex_unlock(&nft_net->commit_mutex); ++ rcu_read_lock(); ++ module_put(THIS_MODULE); + +- skb2 = nf_tables_getobj_single(portid, info, nla, reset); + if (IS_ERR(skb2)) + return PTR_ERR(skb2); + +- if (!reset) +- return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); +- + buf = kasprintf(GFP_ATOMIC, "%.*s:%u", + nla_len(nla[NFTA_OBJ_TABLE]), + (char *)nla_data(nla[NFTA_OBJ_TABLE]), +@@ -9179,7 +9225,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { + .policy = nft_obj_policy, + }, + [NFT_MSG_GETOBJ_RESET] = { +- .call = nf_tables_getobj, ++ .call = nf_tables_getobj_reset, + .type = NFNL_CB_RCU, + .attr_count = NFTA_OBJ_MAX, + .policy = nft_obj_policy, +-- +2.43.0 + diff --git a/queue-6.6/netfilter-nf_tables-audit-log-dump-reset-after-the-f.patch b/queue-6.6/netfilter-nf_tables-audit-log-dump-reset-after-the-f.patch new file mode 100644 index 00000000000..17289cd3930 --- /dev/null +++ b/queue-6.6/netfilter-nf_tables-audit-log-dump-reset-after-the-f.patch @@ -0,0 +1,91 @@ +From 97d2248669b572f9c98af0495cc0d27dc60882df Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 9 Aug 2024 15:07:30 +0200 +Subject: netfilter: nf_tables: Audit log dump reset after the fact + +From: Phil Sutter + +[ Upstream commit e0b6648b0446e59522819c75ba1dcb09e68d3e94 ] + +In theory, dumpreset may fail and invalidate the preceeding log message. +Fix this and use the occasion to prepare for object reset locking, which +benefits from a few unrelated changes: + +* Add an early call to nfnetlink_unicast if not resetting which + effectively skips the audit logging but also unindents it. +* Extract the table's name from the netlink attribute (which is verified + via earlier table lookup) to not rely upon validity of the looked up + table pointer. +* Do not use local variable family, it will vanish. + +Fixes: 8e6cf365e1d5 ("audit: log nftables configuration change events") +Signed-off-by: Phil Sutter +Reviewed-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 28 +++++++++++++--------------- + 1 file changed, 13 insertions(+), 15 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index ea139fca74cb9..c6296ffd9b91b 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -7832,6 +7832,7 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb) + static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, + const struct nlattr * const nla[]) + { ++ const struct nftables_pernet *nft_net = nft_pernet(info->net); + struct netlink_ext_ack *extack = info->extack; + u8 genmask = nft_genmask_cur(info->net); + u8 family = info->nfmsg->nfgen_family; +@@ -7841,6 +7842,7 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, + struct sk_buff *skb2; + bool reset = false; + u32 objtype; ++ char *buf; + int err; + + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { +@@ -7879,27 +7881,23 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, + if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) + reset = true; + +- if (reset) { +- const struct nftables_pernet *nft_net; +- char *buf; +- +- nft_net = nft_pernet(net); +- buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, nft_net->base_seq); +- +- audit_log_nfcfg(buf, +- family, +- 1, +- AUDIT_NFT_OP_OBJ_RESET, +- GFP_ATOMIC); +- kfree(buf); +- } +- + err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid, + info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, + family, table, obj, reset); + if (err < 0) + goto err_fill_obj_info; + ++ if (!reset) ++ return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); ++ ++ buf = kasprintf(GFP_ATOMIC, "%.*s:%u", ++ nla_len(nla[NFTA_OBJ_TABLE]), ++ (char *)nla_data(nla[NFTA_OBJ_TABLE]), ++ nft_net->base_seq); ++ audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1, ++ AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC); ++ kfree(buf); ++ + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); + + err_fill_obj_info: +-- +2.43.0 + diff --git a/queue-6.6/netfilter-nf_tables-carry-reset-boolean-in-nft_obj_d.patch b/queue-6.6/netfilter-nf_tables-carry-reset-boolean-in-nft_obj_d.patch new file mode 100644 index 00000000000..fd1fc6917cb --- /dev/null +++ b/queue-6.6/netfilter-nf_tables-carry-reset-boolean-in-nft_obj_d.patch @@ -0,0 +1,76 @@ +From a64d0a2dda5332dd89476a70b377564a9ccce6e6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 20 Oct 2023 19:34:33 +0200 +Subject: netfilter: nf_tables: Carry reset boolean in nft_obj_dump_ctx + +From: Phil Sutter + +[ Upstream commit a552339063d37b3b1133d9dfc31f851edafb27bb ] + +Relieve the dump callback from having to inspect nlmsg_type upon each +call, just do it once at start of the dump. + +Signed-off-by: Phil Sutter +Signed-off-by: Pablo Neira Ayuso +Stable-dep-of: bd662c4218f9 ("netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests") +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index e3e3ad532ec9f..170f6f624ac16 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -7721,6 +7721,7 @@ struct nft_obj_dump_ctx { + unsigned int s_idx; + char *table; + u32 type; ++ bool reset; + }; + + static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) +@@ -7734,12 +7735,8 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) + unsigned int entries = 0; + struct nft_object *obj; + unsigned int idx = 0; +- bool reset = false; + int rc = 0; + +- if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) +- reset = true; +- + rcu_read_lock(); + nft_net = nft_pernet(net); + cb->seq = READ_ONCE(nft_net->base_seq); +@@ -7766,7 +7763,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) + NFT_MSG_NEWOBJ, + NLM_F_MULTI | NLM_F_APPEND, + table->family, table, +- obj, reset); ++ obj, ctx->reset); + if (rc < 0) + break; + +@@ -7775,7 +7772,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) + cont: + idx++; + } +- if (reset && entries) ++ if (ctx->reset && entries) + audit_log_obj_reset(table, nft_net->base_seq, entries); + if (rc < 0) + break; +@@ -7802,6 +7799,9 @@ static int nf_tables_dump_obj_start(struct netlink_callback *cb) + if (nla[NFTA_OBJ_TYPE]) + ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); + ++ if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) ++ ctx->reset = true; ++ + return 0; + } + +-- +2.43.0 + diff --git a/queue-6.6/netfilter-nf_tables-carry-s_idx-in-nft_obj_dump_ctx.patch b/queue-6.6/netfilter-nf_tables-carry-s_idx-in-nft_obj_dump_ctx.patch new file mode 100644 index 00000000000..42edf23e1b9 --- /dev/null +++ b/queue-6.6/netfilter-nf_tables-carry-s_idx-in-nft_obj_dump_ctx.patch @@ -0,0 +1,70 @@ +From 53df14d702feb9add8deee060d469d130c2c1d08 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 20 Oct 2023 19:34:31 +0200 +Subject: netfilter: nf_tables: Carry s_idx in nft_obj_dump_ctx + +From: Phil Sutter + +[ Upstream commit 2eda95cfa2fc43bcb21a801dc1d16a0b7cc73860 ] + +Prep work for moving the context into struct netlink_callback scratch +area. + +Signed-off-by: Phil Sutter +Signed-off-by: Pablo Neira Ayuso +Stable-dep-of: bd662c4218f9 ("netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests") +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index ca5fb700d15cf..7d6146923819c 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -7718,6 +7718,7 @@ static void audit_log_obj_reset(const struct nft_table *table, + } + + struct nft_obj_dump_ctx { ++ unsigned int s_idx; + char *table; + u32 type; + }; +@@ -7725,14 +7726,14 @@ struct nft_obj_dump_ctx { + static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) + { + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); +- const struct nft_table *table; +- unsigned int idx = 0, s_idx = cb->args[0]; + struct nft_obj_dump_ctx *ctx = cb->data; + struct net *net = sock_net(skb->sk); + int family = nfmsg->nfgen_family; + struct nftables_pernet *nft_net; ++ const struct nft_table *table; + unsigned int entries = 0; + struct nft_object *obj; ++ unsigned int idx = 0; + bool reset = false; + int rc = 0; + +@@ -7751,7 +7752,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) + list_for_each_entry_rcu(obj, &table->objects, list) { + if (!nft_is_active(net, obj)) + goto cont; +- if (idx < s_idx) ++ if (idx < ctx->s_idx) + goto cont; + if (ctx->table && strcmp(ctx->table, table->name)) + goto cont; +@@ -7781,7 +7782,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) + } + rcu_read_unlock(); + +- cb->args[0] = idx; ++ ctx->s_idx = idx; + return skb->len; + } + +-- +2.43.0 + diff --git a/queue-6.6/netfilter-nf_tables-drop-pointless-memset-in-nf_tabl.patch b/queue-6.6/netfilter-nf_tables-drop-pointless-memset-in-nf_tabl.patch new file mode 100644 index 00000000000..77da0fd2736 --- /dev/null +++ b/queue-6.6/netfilter-nf_tables-drop-pointless-memset-in-nf_tabl.patch @@ -0,0 +1,37 @@ +From 7826d19d07c61f381e9dde879db6315798e790e1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 20 Oct 2023 19:34:28 +0200 +Subject: netfilter: nf_tables: Drop pointless memset in nf_tables_dump_obj + +From: Phil Sutter + +[ Upstream commit ff16111cc10c82ee065ffbd9fa8d6210394ff8c6 ] + +The code does not make use of cb->args fields past the first one, no +need to zero them. + +Signed-off-by: Phil Sutter +Signed-off-by: Pablo Neira Ayuso +Stable-dep-of: bd662c4218f9 ("netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests") +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index c6296ffd9b91b..d29c5803c3ff0 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -7753,9 +7753,6 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) + goto cont; + if (idx < s_idx) + goto cont; +- if (idx > s_idx) +- memset(&cb->args[1], 0, +- sizeof(cb->args) - sizeof(cb->args[0])); + if (filter && filter->table && + strcmp(filter->table, table->name)) + goto cont; +-- +2.43.0 + diff --git a/queue-6.6/netfilter-nf_tables-introduce-nf_tables_getobj_singl.patch b/queue-6.6/netfilter-nf_tables-introduce-nf_tables_getobj_singl.patch new file mode 100644 index 00000000000..160eee9c199 --- /dev/null +++ b/queue-6.6/netfilter-nf_tables-introduce-nf_tables_getobj_singl.patch @@ -0,0 +1,148 @@ +From 7fc3a5fb7bc90687d9801ff2b0f11d0e946cfa3e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 9 Aug 2024 15:07:31 +0200 +Subject: netfilter: nf_tables: Introduce nf_tables_getobj_single + +From: Phil Sutter + +[ Upstream commit 69fc3e9e90f1afc11f4015e6b75d18ab9acee348 ] + +Outsource the reply skb preparation for non-dump getrule requests into a +distinct function. Prep work for object reset locking. + +Signed-off-by: Phil Sutter +Reviewed-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Stable-dep-of: bd662c4218f9 ("netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests") +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 75 ++++++++++++++++++++--------------- + 1 file changed, 44 insertions(+), 31 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 170f6f624ac16..a3ed9437e21b4 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -7815,10 +7815,10 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb) + } + + /* called with rcu_read_lock held */ +-static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, +- const struct nlattr * const nla[]) ++static struct sk_buff * ++nf_tables_getobj_single(u32 portid, const struct nfnl_info *info, ++ const struct nlattr * const nla[], bool reset) + { +- const struct nftables_pernet *nft_net = nft_pernet(info->net); + struct netlink_ext_ack *extack = info->extack; + u8 genmask = nft_genmask_cur(info->net); + u8 family = info->nfmsg->nfgen_family; +@@ -7826,52 +7826,69 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, + struct net *net = info->net; + struct nft_object *obj; + struct sk_buff *skb2; +- bool reset = false; + u32 objtype; +- char *buf; + int err; + +- if (info->nlh->nlmsg_flags & NLM_F_DUMP) { +- struct netlink_dump_control c = { +- .start = nf_tables_dump_obj_start, +- .dump = nf_tables_dump_obj, +- .done = nf_tables_dump_obj_done, +- .module = THIS_MODULE, +- .data = (void *)nla, +- }; +- +- return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); +- } +- + if (!nla[NFTA_OBJ_NAME] || + !nla[NFTA_OBJ_TYPE]) +- return -EINVAL; ++ return ERR_PTR(-EINVAL); + + table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask, 0); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]); +- return PTR_ERR(table); ++ return ERR_CAST(table); + } + + objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); + obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask); + if (IS_ERR(obj)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]); +- return PTR_ERR(obj); ++ return ERR_CAST(obj); + } + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb2) +- return -ENOMEM; ++ return ERR_PTR(-ENOMEM); ++ ++ err = nf_tables_fill_obj_info(skb2, net, portid, ++ info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, ++ family, table, obj, reset); ++ if (err < 0) { ++ kfree_skb(skb2); ++ return ERR_PTR(err); ++ } ++ ++ return skb2; ++} ++ ++static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, ++ const struct nlattr * const nla[]) ++{ ++ struct nftables_pernet *nft_net = nft_pernet(info->net); ++ u32 portid = NETLINK_CB(skb).portid; ++ struct net *net = info->net; ++ struct sk_buff *skb2; ++ bool reset = false; ++ char *buf; ++ ++ if (info->nlh->nlmsg_flags & NLM_F_DUMP) { ++ struct netlink_dump_control c = { ++ .start = nf_tables_dump_obj_start, ++ .dump = nf_tables_dump_obj, ++ .done = nf_tables_dump_obj_done, ++ .module = THIS_MODULE, ++ .data = (void *)nla, ++ }; ++ ++ return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); ++ } + + if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) + reset = true; + +- err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid, +- info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, +- family, table, obj, reset); +- if (err < 0) +- goto err_fill_obj_info; ++ skb2 = nf_tables_getobj_single(portid, info, nla, reset); ++ if (IS_ERR(skb2)) ++ return PTR_ERR(skb2); + + if (!reset) + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); +@@ -7884,11 +7901,7 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, + AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC); + kfree(buf); + +- return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); +- +-err_fill_obj_info: +- kfree_skb(skb2); +- return err; ++ return nfnetlink_unicast(skb2, net, portid); + } + + static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj) +-- +2.43.0 + diff --git a/queue-6.6/netfilter-nf_tables-nft_obj_filter-fits-into-cb-ctx.patch b/queue-6.6/netfilter-nf_tables-nft_obj_filter-fits-into-cb-ctx.patch new file mode 100644 index 00000000000..3e620e5c1bc --- /dev/null +++ b/queue-6.6/netfilter-nf_tables-nft_obj_filter-fits-into-cb-ctx.patch @@ -0,0 +1,75 @@ +From 8fd7c0d99793107aeb24713bf5d1842dc36720ec Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 20 Oct 2023 19:34:32 +0200 +Subject: netfilter: nf_tables: nft_obj_filter fits into cb->ctx + +From: Phil Sutter + +[ Upstream commit 5a893b9cdf6fa5758f43d323a1d7fa6d1bf489ff ] + +No need to allocate it if one may just use struct netlink_callback's +scratch area for it. + +Signed-off-by: Phil Sutter +Signed-off-by: Pablo Neira Ayuso +Stable-dep-of: bd662c4218f9 ("netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests") +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 16 +++++----------- + 1 file changed, 5 insertions(+), 11 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 7d6146923819c..e3e3ad532ec9f 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -7726,7 +7726,7 @@ struct nft_obj_dump_ctx { + static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) + { + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); +- struct nft_obj_dump_ctx *ctx = cb->data; ++ struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; + struct net *net = sock_net(skb->sk); + int family = nfmsg->nfgen_family; + struct nftables_pernet *nft_net; +@@ -7788,34 +7788,28 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) + + static int nf_tables_dump_obj_start(struct netlink_callback *cb) + { ++ struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; + const struct nlattr * const *nla = cb->data; +- struct nft_obj_dump_ctx *ctx = NULL; + +- ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC); +- if (!ctx) +- return -ENOMEM; ++ BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); + + if (nla[NFTA_OBJ_TABLE]) { + ctx->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC); +- if (!ctx->table) { +- kfree(ctx); ++ if (!ctx->table) + return -ENOMEM; +- } + } + + if (nla[NFTA_OBJ_TYPE]) + ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); + +- cb->data = ctx; + return 0; + } + + static int nf_tables_dump_obj_done(struct netlink_callback *cb) + { +- struct nft_obj_dump_ctx *ctx = cb->data; ++ struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; + + kfree(ctx->table); +- kfree(ctx); + + return 0; + } +-- +2.43.0 + diff --git a/queue-6.6/netfilter-nf_tables-unconditionally-allocate-nft_obj.patch b/queue-6.6/netfilter-nf_tables-unconditionally-allocate-nft_obj.patch new file mode 100644 index 00000000000..5ee860bca14 --- /dev/null +++ b/queue-6.6/netfilter-nf_tables-unconditionally-allocate-nft_obj.patch @@ -0,0 +1,89 @@ +From 42600b4baf27a147470d1ca21a3ec20607084b4a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 20 Oct 2023 19:34:29 +0200 +Subject: netfilter: nf_tables: Unconditionally allocate nft_obj_filter + +From: Phil Sutter + +[ Upstream commit 4279cc60b354d2d2b970655a70a151cbfa1d958b ] + +Prep work for moving the filter into struct netlink_callback's scratch +area. + +Signed-off-by: Phil Sutter +Signed-off-by: Pablo Neira Ayuso +Stable-dep-of: bd662c4218f9 ("netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests") +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 36 +++++++++++++++-------------------- + 1 file changed, 15 insertions(+), 21 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index d29c5803c3ff0..35d5848cb3d0d 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -7753,11 +7753,9 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) + goto cont; + if (idx < s_idx) + goto cont; +- if (filter && filter->table && +- strcmp(filter->table, table->name)) ++ if (filter->table && strcmp(filter->table, table->name)) + goto cont; +- if (filter && +- filter->type != NFT_OBJECT_UNSPEC && ++ if (filter->type != NFT_OBJECT_UNSPEC && + obj->ops->type->type != filter->type) + goto cont; + +@@ -7792,23 +7790,21 @@ static int nf_tables_dump_obj_start(struct netlink_callback *cb) + const struct nlattr * const *nla = cb->data; + struct nft_obj_filter *filter = NULL; + +- if (nla[NFTA_OBJ_TABLE] || nla[NFTA_OBJ_TYPE]) { +- filter = kzalloc(sizeof(*filter), GFP_ATOMIC); +- if (!filter) +- return -ENOMEM; ++ filter = kzalloc(sizeof(*filter), GFP_ATOMIC); ++ if (!filter) ++ return -ENOMEM; + +- if (nla[NFTA_OBJ_TABLE]) { +- filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC); +- if (!filter->table) { +- kfree(filter); +- return -ENOMEM; +- } ++ if (nla[NFTA_OBJ_TABLE]) { ++ filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC); ++ if (!filter->table) { ++ kfree(filter); ++ return -ENOMEM; + } +- +- if (nla[NFTA_OBJ_TYPE]) +- filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); + } + ++ if (nla[NFTA_OBJ_TYPE]) ++ filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); ++ + cb->data = filter; + return 0; + } +@@ -7817,10 +7813,8 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb) + { + struct nft_obj_filter *filter = cb->data; + +- if (filter) { +- kfree(filter->table); +- kfree(filter); +- } ++ kfree(filter->table); ++ kfree(filter); + + return 0; + } +-- +2.43.0 + diff --git a/queue-6.6/s390-uv-panic-for-set-and-remove-shared-access-uvc-e.patch b/queue-6.6/s390-uv-panic-for-set-and-remove-shared-access-uvc-e.patch new file mode 100644 index 00000000000..1b1c9b0c152 --- /dev/null +++ b/queue-6.6/s390-uv-panic-for-set-and-remove-shared-access-uvc-e.patch @@ -0,0 +1,60 @@ +From 1ca677fedf4d45c07ff19c2af2285e578a2f529b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Aug 2024 13:25:48 +0200 +Subject: s390/uv: Panic for set and remove shared access UVC errors + +From: Claudio Imbrenda + +[ Upstream commit cff59d8631e1409ffdd22d9d717e15810181b32c ] + +The return value uv_set_shared() and uv_remove_shared() (which are +wrappers around the share() function) is not always checked. The system +integrity of a protected guest depends on the Share and Unshare UVCs +being successful. This means that any caller that fails to check the +return value will compromise the security of the protected guest. + +No code path that would lead to such violation of the security +guarantees is currently exercised, since all the areas that are shared +never get unshared during the lifetime of the system. This might +change and become an issue in the future. + +The Share and Unshare UVCs can only fail in case of hypervisor +misbehaviour (either a bug or malicious behaviour). In such cases there +is no reasonable way forward, and the system needs to panic. + +This patch replaces the return at the end of the share() function with +a panic, to guarantee system integrity. + +Fixes: 5abb9351dfd9 ("s390/uv: introduce guest side ultravisor code") +Signed-off-by: Claudio Imbrenda +Reviewed-by: Christian Borntraeger +Reviewed-by: Steffen Eiden +Reviewed-by: Janosch Frank +Link: https://lore.kernel.org/r/20240801112548.85303-1-imbrenda@linux.ibm.com +Message-ID: <20240801112548.85303-1-imbrenda@linux.ibm.com> +[frankja@linux.ibm.com: Fixed up patch subject] +Signed-off-by: Janosch Frank +Signed-off-by: Sasha Levin +--- + arch/s390/include/asm/uv.h | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h +index 0e7bd3873907f..b2e2f9a4163c5 100644 +--- a/arch/s390/include/asm/uv.h ++++ b/arch/s390/include/asm/uv.h +@@ -442,7 +442,10 @@ static inline int share(unsigned long addr, u16 cmd) + + if (!uv_call(0, (u64)&uvcb)) + return 0; +- return -EINVAL; ++ pr_err("%s UVC failed (rc: 0x%x, rrc: 0x%x), possible hypervisor bug.\n", ++ uvcb.header.cmd == UVC_CMD_SET_SHARED_ACCESS ? "Share" : "Unshare", ++ uvcb.header.rc, uvcb.header.rrc); ++ panic("System security cannot be guaranteed unless the system panics now.\n"); + } + + /* +-- +2.43.0 + diff --git a/queue-6.6/selftests-net-lib-ignore-possible-errors.patch b/queue-6.6/selftests-net-lib-ignore-possible-errors.patch new file mode 100644 index 00000000000..d1c787dafab --- /dev/null +++ b/queue-6.6/selftests-net-lib-ignore-possible-errors.patch @@ -0,0 +1,56 @@ +From d9e56c969f7e03acd72c9f737b1b9fbbdf4db2e7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Jun 2024 18:31:02 +0200 +Subject: selftests: net: lib: ignore possible errors + +From: Matthieu Baerts (NGI0) + +[ Upstream commit 7e0620bc6a5ec6b340a0be40054f294ca26c010f ] + +No need to disable errexit temporary, simply ignore the only possible +and not handled error. + +Reviewed-by: Geliang Tang +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://lore.kernel.org/r/20240607-upstream-net-next-20240607-selftests-mptcp-net-lib-v1-1-e36986faac94@kernel.org +Signed-off-by: Jakub Kicinski +Stable-dep-of: 7965a7f32a53 ("selftests: net: lib: kill PIDs before del netns") +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/lib.sh | 10 +--------- + 1 file changed, 1 insertion(+), 9 deletions(-) + +diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh +index a186490edb4ab..323a7c305ccd4 100644 +--- a/tools/testing/selftests/net/lib.sh ++++ b/tools/testing/selftests/net/lib.sh +@@ -38,25 +38,17 @@ busywait() + cleanup_ns() + { + local ns="" +- local errexit=0 + local ret=0 + +- # disable errexit temporary +- if [[ $- =~ "e" ]]; then +- errexit=1 +- set +e +- fi +- + for ns in "$@"; do + [ -z "${ns}" ] && continue +- ip netns delete "${ns}" &> /dev/null ++ ip netns delete "${ns}" &> /dev/null || true + if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then + echo "Warn: Failed to remove namespace $ns" + ret=1 + fi + done + +- [ $errexit -eq 1 ] && set -e + return $ret + } + +-- +2.43.0 + diff --git a/queue-6.6/selftests-net-lib-kill-pids-before-del-netns.patch b/queue-6.6/selftests-net-lib-kill-pids-before-del-netns.patch new file mode 100644 index 00000000000..cff5d3a8f03 --- /dev/null +++ b/queue-6.6/selftests-net-lib-kill-pids-before-del-netns.patch @@ -0,0 +1,45 @@ +From b517b7339dd2ccd66bb096bffe3dbda109ae33e9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 13 Aug 2024 15:39:34 +0200 +Subject: selftests: net: lib: kill PIDs before del netns + +From: Matthieu Baerts (NGI0) + +[ Upstream commit 7965a7f32a53d9ad807ce2c53bdda69ba104974f ] + +When deleting netns, it is possible to still have some tasks running, +e.g. background tasks like tcpdump running in the background, not +stopped because the test has been interrupted. + +Before deleting the netns, it is then safer to kill all attached PIDs, +if any. That should reduce some noises after the end of some tests, and +help with the debugging of some issues. That's why this modification is +seen as a "fix". + +Fixes: 25ae948b4478 ("selftests/net: add lib.sh") +Acked-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Acked-by: Florian Westphal +Reviewed-by: Hangbin Liu +Link: https://patch.msgid.link/20240813-upstream-net-20240813-selftests-net-lib-kill-v1-1-27b689b248b8@kernel.org +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/lib.sh | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh +index 323a7c305ccd4..e2c35eda230af 100644 +--- a/tools/testing/selftests/net/lib.sh ++++ b/tools/testing/selftests/net/lib.sh +@@ -42,6 +42,7 @@ cleanup_ns() + + for ns in "$@"; do + [ -z "${ns}" ] && continue ++ ip netns pids "${ns}" 2> /dev/null | xargs -r kill || true + ip netns delete "${ns}" &> /dev/null || true + if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then + echo "Warn: Failed to remove namespace $ns" +-- +2.43.0 + diff --git a/queue-6.6/series b/queue-6.6/series index 22d6d396d6e..452fc2bc48e 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -44,3 +44,39 @@ drm-amdgpu-actually-check-flags-for-all-context-ops.patch memcg_write_event_control-fix-a-user-triggerable-oops.patch drm-amdgpu-jpeg2-properly-set-atomics-vmid-field.patch drm-amdgpu-jpeg4-properly-set-atomics-vmid-field.patch +s390-uv-panic-for-set-and-remove-shared-access-uvc-e.patch +bpf-fix-updating-attached-freplace-prog-in-prog_arra.patch +igc-fix-packet-still-tx-after-gate-close-by-reducing.patch +igc-fix-qbv_config_change_errors-logics.patch +igc-fix-reset-adapter-logics-when-tx-mode-change.patch +net-mlx5e-take-state-lock-during-tx-timeout-reporter.patch +net-mlx5e-correctly-report-errors-for-ethtool-rx-flo.patch +atm-idt77252-prevent-use-after-free-in-dequeue_rx.patch +net-axienet-fix-register-defines-comment-description.patch +net-dsa-vsc73xx-pass-value-in-phy_write-operation.patch +net-dsa-vsc73xx-use-read_poll_timeout-instead-delay-.patch +net-dsa-vsc73xx-check-busy-flag-in-mdio-operations.patch +net-ethernet-mtk_wed-fix-use-after-free-panic-in-mtk.patch +mlxbf_gige-disable-rx-filters-until-rx-path-initiali.patch +mptcp-correct-mptcp_subflow_attr_ssn_offset-reserved.patch +tcp-update-window-clamping-condition.patch +netfilter-allow-ipv6-fragments-to-arrive-on-differen.patch +netfilter-flowtable-initialise-extack-before-use.patch +netfilter-nf_queue-drop-packets-with-cloned-unconfir.patch +netfilter-nf_tables-audit-log-dump-reset-after-the-f.patch +netfilter-nf_tables-drop-pointless-memset-in-nf_tabl.patch +netfilter-nf_tables-unconditionally-allocate-nft_obj.patch +netfilter-nf_tables-a-better-name-for-nft_obj_filter.patch +netfilter-nf_tables-carry-s_idx-in-nft_obj_dump_ctx.patch +netfilter-nf_tables-nft_obj_filter-fits-into-cb-ctx.patch +netfilter-nf_tables-carry-reset-boolean-in-nft_obj_d.patch +netfilter-nf_tables-introduce-nf_tables_getobj_singl.patch +netfilter-nf_tables-add-locking-for-nft_msg_getobj_r.patch +vsock-fix-recursive-recvmsg-calls.patch +selftests-net-lib-ignore-possible-errors.patch +selftests-net-lib-kill-pids-before-del-netns.patch +net-hns3-fix-wrong-use-of-semaphore-up.patch +net-hns3-use-the-user-s-cfg-after-reset.patch +net-hns3-fix-a-deadlock-problem-when-config-tc-durin.patch +gpio-mlxbf3-support-shutdown-function.patch +alsa-hda-realtek-fix-noise-from-speakers-on-lenovo-i.patch diff --git a/queue-6.6/tcp-update-window-clamping-condition.patch b/queue-6.6/tcp-update-window-clamping-condition.patch new file mode 100644 index 00000000000..8d581e7f55a --- /dev/null +++ b/queue-6.6/tcp-update-window-clamping-condition.patch @@ -0,0 +1,94 @@ +From 87ed2007bb3e2e4f71ef2f27713d1b2bad8bc521 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Aug 2024 16:06:40 -0700 +Subject: tcp: Update window clamping condition + +From: Subash Abhinov Kasiviswanathan + +[ Upstream commit a2cbb1603943281a604f5adc48079a148db5cb0d ] + +This patch is based on the discussions between Neal Cardwell and +Eric Dumazet in the link +https://lore.kernel.org/netdev/20240726204105.1466841-1-quic_subashab@quicinc.com/ + +It was correctly pointed out that tp->window_clamp would not be +updated in cases where net.ipv4.tcp_moderate_rcvbuf=0 or if +(copied <= tp->rcvq_space.space). While it is expected for most +setups to leave the sysctl enabled, the latter condition may +not end up hitting depending on the TCP receive queue size and +the pattern of arriving data. + +The updated check should be hit only on initial MSS update from +TCP_MIN_MSS to measured MSS value and subsequently if there was +an update to a larger value. + +Fixes: 05f76b2d634e ("tcp: Adjust clamping window for applications specifying SO_RCVBUF") +Signed-off-by: Sean Tranchetti +Signed-off-by: Subash Abhinov Kasiviswanathan +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_input.c | 28 ++++++++++++---------------- + 1 file changed, 12 insertions(+), 16 deletions(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index d0364cff65c9f..24c7c955dc955 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -243,9 +243,14 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) + */ + if (unlikely(len != icsk->icsk_ack.rcv_mss)) { + u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE; ++ u8 old_ratio = tcp_sk(sk)->scaling_ratio; + + do_div(val, skb->truesize); + tcp_sk(sk)->scaling_ratio = val ? val : 1; ++ ++ if (old_ratio != tcp_sk(sk)->scaling_ratio) ++ WRITE_ONCE(tcp_sk(sk)->window_clamp, ++ tcp_win_from_space(sk, sk->sk_rcvbuf)); + } + icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, + tcp_sk(sk)->advmss); +@@ -748,7 +753,8 @@ void tcp_rcv_space_adjust(struct sock *sk) + * + */ + +- if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)) { ++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) && ++ !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { + u64 rcvwin, grow; + int rcvbuf; + +@@ -764,22 +770,12 @@ void tcp_rcv_space_adjust(struct sock *sk) + + rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin), + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); +- if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { +- if (rcvbuf > sk->sk_rcvbuf) { +- WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); +- +- /* Make the window clamp follow along. */ +- WRITE_ONCE(tp->window_clamp, +- tcp_win_from_space(sk, rcvbuf)); +- } +- } else { +- /* Make the window clamp follow along while being bounded +- * by SO_RCVBUF. +- */ +- int clamp = tcp_win_from_space(sk, min(rcvbuf, sk->sk_rcvbuf)); ++ if (rcvbuf > sk->sk_rcvbuf) { ++ WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); + +- if (clamp > tp->window_clamp) +- WRITE_ONCE(tp->window_clamp, clamp); ++ /* Make the window clamp follow along. */ ++ WRITE_ONCE(tp->window_clamp, ++ tcp_win_from_space(sk, rcvbuf)); + } + } + tp->rcvq_space.space = copied; +-- +2.43.0 + diff --git a/queue-6.6/vsock-fix-recursive-recvmsg-calls.patch b/queue-6.6/vsock-fix-recursive-recvmsg-calls.patch new file mode 100644 index 00000000000..9df6884c205 --- /dev/null +++ b/queue-6.6/vsock-fix-recursive-recvmsg-calls.patch @@ -0,0 +1,172 @@ +From ec6fab95921c939e8fdacadd4231ec03af70a586 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 11 Aug 2024 19:21:53 -0700 +Subject: vsock: fix recursive ->recvmsg calls + +From: Cong Wang + +[ Upstream commit 69139d2919dd4aa9a553c8245e7c63e82613e3fc ] + +After a vsock socket has been added to a BPF sockmap, its prot->recvmsg +has been replaced with vsock_bpf_recvmsg(). Thus the following +recursiion could happen: + +vsock_bpf_recvmsg() + -> __vsock_recvmsg() + -> vsock_connectible_recvmsg() + -> prot->recvmsg() + -> vsock_bpf_recvmsg() again + +We need to fix it by calling the original ->recvmsg() without any BPF +sockmap logic in __vsock_recvmsg(). + +Fixes: 634f1a7110b4 ("vsock: support sockmap") +Reported-by: syzbot+bdb4bd87b5e22058e2a4@syzkaller.appspotmail.com +Tested-by: syzbot+bdb4bd87b5e22058e2a4@syzkaller.appspotmail.com +Cc: Bobby Eshleman +Cc: Michael S. Tsirkin +Cc: Stefano Garzarella +Signed-off-by: Cong Wang +Acked-by: Michael S. Tsirkin +Link: https://patch.msgid.link/20240812022153.86512-1-xiyou.wangcong@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + include/net/af_vsock.h | 4 ++++ + net/vmw_vsock/af_vsock.c | 50 +++++++++++++++++++++++---------------- + net/vmw_vsock/vsock_bpf.c | 4 ++-- + 3 files changed, 35 insertions(+), 23 deletions(-) + +diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h +index dc3cb16835b63..f8b09a82f62e1 100644 +--- a/include/net/af_vsock.h ++++ b/include/net/af_vsock.h +@@ -227,8 +227,12 @@ struct vsock_tap { + int vsock_add_tap(struct vsock_tap *vt); + int vsock_remove_tap(struct vsock_tap *vt); + void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque); ++int __vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, ++ int flags); + int vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags); ++int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, ++ size_t len, int flags); + int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags); + +diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c +index 4afb6a541cf38..f5eb737a677d9 100644 +--- a/net/vmw_vsock/af_vsock.c ++++ b/net/vmw_vsock/af_vsock.c +@@ -1270,25 +1270,28 @@ static int vsock_dgram_connect(struct socket *sock, + return err; + } + ++int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, ++ size_t len, int flags) ++{ ++ struct sock *sk = sock->sk; ++ struct vsock_sock *vsk = vsock_sk(sk); ++ ++ return vsk->transport->dgram_dequeue(vsk, msg, len, flags); ++} ++ + int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) + { + #ifdef CONFIG_BPF_SYSCALL ++ struct sock *sk = sock->sk; + const struct proto *prot; +-#endif +- struct vsock_sock *vsk; +- struct sock *sk; + +- sk = sock->sk; +- vsk = vsock_sk(sk); +- +-#ifdef CONFIG_BPF_SYSCALL + prot = READ_ONCE(sk->sk_prot); + if (prot != &vsock_proto) + return prot->recvmsg(sk, msg, len, flags, NULL); + #endif + +- return vsk->transport->dgram_dequeue(vsk, msg, len, flags); ++ return __vsock_dgram_recvmsg(sock, msg, len, flags); + } + EXPORT_SYMBOL_GPL(vsock_dgram_recvmsg); + +@@ -2124,15 +2127,12 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg, + } + + int +-vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, +- int flags) ++__vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, ++ int flags) + { + struct sock *sk; + struct vsock_sock *vsk; + const struct vsock_transport *transport; +-#ifdef CONFIG_BPF_SYSCALL +- const struct proto *prot; +-#endif + int err; + + sk = sock->sk; +@@ -2183,14 +2183,6 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + goto out; + } + +-#ifdef CONFIG_BPF_SYSCALL +- prot = READ_ONCE(sk->sk_prot); +- if (prot != &vsock_proto) { +- release_sock(sk); +- return prot->recvmsg(sk, msg, len, flags, NULL); +- } +-#endif +- + if (sk->sk_type == SOCK_STREAM) + err = __vsock_stream_recvmsg(sk, msg, len, flags); + else +@@ -2200,6 +2192,22 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + release_sock(sk); + return err; + } ++ ++int ++vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, ++ int flags) ++{ ++#ifdef CONFIG_BPF_SYSCALL ++ struct sock *sk = sock->sk; ++ const struct proto *prot; ++ ++ prot = READ_ONCE(sk->sk_prot); ++ if (prot != &vsock_proto) ++ return prot->recvmsg(sk, msg, len, flags, NULL); ++#endif ++ ++ return __vsock_connectible_recvmsg(sock, msg, len, flags); ++} + EXPORT_SYMBOL_GPL(vsock_connectible_recvmsg); + + static int vsock_set_rcvlowat(struct sock *sk, int val) +diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c +index a3c97546ab84a..c42c5cc18f324 100644 +--- a/net/vmw_vsock/vsock_bpf.c ++++ b/net/vmw_vsock/vsock_bpf.c +@@ -64,9 +64,9 @@ static int __vsock_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int + int err; + + if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) +- err = vsock_connectible_recvmsg(sock, msg, len, flags); ++ err = __vsock_connectible_recvmsg(sock, msg, len, flags); + else if (sk->sk_type == SOCK_DGRAM) +- err = vsock_dgram_recvmsg(sock, msg, len, flags); ++ err = __vsock_dgram_recvmsg(sock, msg, len, flags); + else + err = -EPROTOTYPE; + +-- +2.43.0 +