]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blame - queue-6.1/mlxbf_gige-stop-interface-during-shutdown.patch
6.1-stable patches
[thirdparty/kernel/stable-queue.git] / queue-6.1 / mlxbf_gige-stop-interface-during-shutdown.patch
CommitLineData
3eb7a00b
GKH
1From 09ba28e1cd3cf715daab1fca6e1623e22fd754a6 Mon Sep 17 00:00:00 2001
2From: David Thompson <davthompson@nvidia.com>
3Date: Mon, 25 Mar 2024 17:09:29 -0400
4Subject: mlxbf_gige: stop interface during shutdown
5
6From: David Thompson <davthompson@nvidia.com>
7
8commit 09ba28e1cd3cf715daab1fca6e1623e22fd754a6 upstream.
9
10The mlxbf_gige driver intermittantly encounters a NULL pointer
11exception while the system is shutting down via "reboot" command.
12The mlxbf_driver will experience an exception right after executing
13its shutdown() method. One example of this exception is:
14
15Unable to handle kernel NULL pointer dereference at virtual address 0000000000000070
16Mem abort info:
17 ESR = 0x0000000096000004
18 EC = 0x25: DABT (current EL), IL = 32 bits
19 SET = 0, FnV = 0
20 EA = 0, S1PTW = 0
21 FSC = 0x04: level 0 translation fault
22Data abort info:
23 ISV = 0, ISS = 0x00000004
24 CM = 0, WnR = 0
25user pgtable: 4k pages, 48-bit VAs, pgdp=000000011d373000
26[0000000000000070] pgd=0000000000000000, p4d=0000000000000000
27Internal error: Oops: 96000004 [#1] SMP
28CPU: 0 PID: 13 Comm: ksoftirqd/0 Tainted: G S OE 5.15.0-bf.6.gef6992a #1
29Hardware name: https://www.mellanox.com BlueField SoC/BlueField SoC, BIOS 4.0.2.12669 Apr 21 2023
30pstate: 20400009 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
31pc : mlxbf_gige_handle_tx_complete+0xc8/0x170 [mlxbf_gige]
32lr : mlxbf_gige_poll+0x54/0x160 [mlxbf_gige]
33sp : ffff8000080d3c10
34x29: ffff8000080d3c10 x28: ffffcce72cbb7000 x27: ffff8000080d3d58
35x26: ffff0000814e7340 x25: ffff331cd1a05000 x24: ffffcce72c4ea008
36x23: ffff0000814e4b40 x22: ffff0000814e4d10 x21: ffff0000814e4128
37x20: 0000000000000000 x19: ffff0000814e4a80 x18: ffffffffffffffff
38x17: 000000000000001c x16: ffffcce72b4553f4 x15: ffff80008805b8a7
39x14: 0000000000000000 x13: 0000000000000030 x12: 0101010101010101
40x11: 7f7f7f7f7f7f7f7f x10: c2ac898b17576267 x9 : ffffcce720fa5404
41x8 : ffff000080812138 x7 : 0000000000002e9a x6 : 0000000000000080
42x5 : ffff00008de3b000 x4 : 0000000000000000 x3 : 0000000000000001
43x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000
44Call trace:
45 mlxbf_gige_handle_tx_complete+0xc8/0x170 [mlxbf_gige]
46 mlxbf_gige_poll+0x54/0x160 [mlxbf_gige]
47 __napi_poll+0x40/0x1c8
48 net_rx_action+0x314/0x3a0
49 __do_softirq+0x128/0x334
50 run_ksoftirqd+0x54/0x6c
51 smpboot_thread_fn+0x14c/0x190
52 kthread+0x10c/0x110
53 ret_from_fork+0x10/0x20
54Code: 8b070000 f9000ea0 f95056c0 f86178a1 (b9407002)
55---[ end trace 7cc3941aa0d8e6a4 ]---
56Kernel panic - not syncing: Oops: Fatal exception in interrupt
57Kernel Offset: 0x4ce722520000 from 0xffff800008000000
58PHYS_OFFSET: 0x80000000
59CPU features: 0x000005c1,a3330e5a
60Memory Limit: none
61---[ end Kernel panic - not syncing: Oops: Fatal exception in interrupt ]---
62
63During system shutdown, the mlxbf_gige driver's shutdown() is always executed.
64However, the driver's stop() method will only execute if networking interface
65configuration logic within the Linux distribution has been setup to do so.
66
67If shutdown() executes but stop() does not execute, NAPI remains enabled
68and this can lead to an exception if NAPI is scheduled while the hardware
69interface has only been partially deinitialized.
70
71The networking interface managed by the mlxbf_gige driver must be properly
72stopped during system shutdown so that IFF_UP is cleared, the hardware
73interface is put into a clean state, and NAPI is fully deinitialized.
74
75Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver")
76Signed-off-by: David Thompson <davthompson@nvidia.com>
77Link: https://lore.kernel.org/r/20240325210929.25362-1-davthompson@nvidia.com
78Signed-off-by: Jakub Kicinski <kuba@kernel.org>
79Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
80---
81 drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c | 10 ++++++++--
82 1 file changed, 8 insertions(+), 2 deletions(-)
83
84--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
85+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
86@@ -14,6 +14,7 @@
87 #include <linux/module.h>
88 #include <linux/phy.h>
89 #include <linux/platform_device.h>
90+#include <linux/rtnetlink.h>
91 #include <linux/skbuff.h>
92
93 #include "mlxbf_gige.h"
94@@ -417,8 +418,13 @@ static void mlxbf_gige_shutdown(struct p
95 {
96 struct mlxbf_gige *priv = platform_get_drvdata(pdev);
97
98- writeq(0, priv->base + MLXBF_GIGE_INT_EN);
99- mlxbf_gige_clean_port(priv);
100+ rtnl_lock();
101+ netif_device_detach(priv->netdev);
102+
103+ if (netif_running(priv->netdev))
104+ dev_close(priv->netdev);
105+
106+ rtnl_unlock();
107 }
108
109 static const struct acpi_device_id __maybe_unused mlxbf_gige_acpi_match[] = {