]>
Commit | Line | Data |
---|---|---|
3eb7a00b GKH |
1 | From 09ba28e1cd3cf715daab1fca6e1623e22fd754a6 Mon Sep 17 00:00:00 2001 |
2 | From: David Thompson <davthompson@nvidia.com> | |
3 | Date: Mon, 25 Mar 2024 17:09:29 -0400 | |
4 | Subject: mlxbf_gige: stop interface during shutdown | |
5 | ||
6 | From: David Thompson <davthompson@nvidia.com> | |
7 | ||
8 | commit 09ba28e1cd3cf715daab1fca6e1623e22fd754a6 upstream. | |
9 | ||
10 | The mlxbf_gige driver intermittantly encounters a NULL pointer | |
11 | exception while the system is shutting down via "reboot" command. | |
12 | The mlxbf_driver will experience an exception right after executing | |
13 | its shutdown() method. One example of this exception is: | |
14 | ||
15 | Unable to handle kernel NULL pointer dereference at virtual address 0000000000000070 | |
16 | Mem abort info: | |
17 | ESR = 0x0000000096000004 | |
18 | EC = 0x25: DABT (current EL), IL = 32 bits | |
19 | SET = 0, FnV = 0 | |
20 | EA = 0, S1PTW = 0 | |
21 | FSC = 0x04: level 0 translation fault | |
22 | Data abort info: | |
23 | ISV = 0, ISS = 0x00000004 | |
24 | CM = 0, WnR = 0 | |
25 | user pgtable: 4k pages, 48-bit VAs, pgdp=000000011d373000 | |
26 | [0000000000000070] pgd=0000000000000000, p4d=0000000000000000 | |
27 | Internal error: Oops: 96000004 [#1] SMP | |
28 | CPU: 0 PID: 13 Comm: ksoftirqd/0 Tainted: G S OE 5.15.0-bf.6.gef6992a #1 | |
29 | Hardware name: https://www.mellanox.com BlueField SoC/BlueField SoC, BIOS 4.0.2.12669 Apr 21 2023 | |
30 | pstate: 20400009 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) | |
31 | pc : mlxbf_gige_handle_tx_complete+0xc8/0x170 [mlxbf_gige] | |
32 | lr : mlxbf_gige_poll+0x54/0x160 [mlxbf_gige] | |
33 | sp : ffff8000080d3c10 | |
34 | x29: ffff8000080d3c10 x28: ffffcce72cbb7000 x27: ffff8000080d3d58 | |
35 | x26: ffff0000814e7340 x25: ffff331cd1a05000 x24: ffffcce72c4ea008 | |
36 | x23: ffff0000814e4b40 x22: ffff0000814e4d10 x21: ffff0000814e4128 | |
37 | x20: 0000000000000000 x19: ffff0000814e4a80 x18: ffffffffffffffff | |
38 | x17: 000000000000001c x16: ffffcce72b4553f4 x15: ffff80008805b8a7 | |
39 | x14: 0000000000000000 x13: 0000000000000030 x12: 0101010101010101 | |
40 | x11: 7f7f7f7f7f7f7f7f x10: c2ac898b17576267 x9 : ffffcce720fa5404 | |
41 | x8 : ffff000080812138 x7 : 0000000000002e9a x6 : 0000000000000080 | |
42 | x5 : ffff00008de3b000 x4 : 0000000000000000 x3 : 0000000000000001 | |
43 | x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000 | |
44 | Call trace: | |
45 | mlxbf_gige_handle_tx_complete+0xc8/0x170 [mlxbf_gige] | |
46 | mlxbf_gige_poll+0x54/0x160 [mlxbf_gige] | |
47 | __napi_poll+0x40/0x1c8 | |
48 | net_rx_action+0x314/0x3a0 | |
49 | __do_softirq+0x128/0x334 | |
50 | run_ksoftirqd+0x54/0x6c | |
51 | smpboot_thread_fn+0x14c/0x190 | |
52 | kthread+0x10c/0x110 | |
53 | ret_from_fork+0x10/0x20 | |
54 | Code: 8b070000 f9000ea0 f95056c0 f86178a1 (b9407002) | |
55 | ---[ end trace 7cc3941aa0d8e6a4 ]--- | |
56 | Kernel panic - not syncing: Oops: Fatal exception in interrupt | |
57 | Kernel Offset: 0x4ce722520000 from 0xffff800008000000 | |
58 | PHYS_OFFSET: 0x80000000 | |
59 | CPU features: 0x000005c1,a3330e5a | |
60 | Memory Limit: none | |
61 | ---[ end Kernel panic - not syncing: Oops: Fatal exception in interrupt ]--- | |
62 | ||
63 | During system shutdown, the mlxbf_gige driver's shutdown() is always executed. | |
64 | However, the driver's stop() method will only execute if networking interface | |
65 | configuration logic within the Linux distribution has been setup to do so. | |
66 | ||
67 | If shutdown() executes but stop() does not execute, NAPI remains enabled | |
68 | and this can lead to an exception if NAPI is scheduled while the hardware | |
69 | interface has only been partially deinitialized. | |
70 | ||
71 | The networking interface managed by the mlxbf_gige driver must be properly | |
72 | stopped during system shutdown so that IFF_UP is cleared, the hardware | |
73 | interface is put into a clean state, and NAPI is fully deinitialized. | |
74 | ||
75 | Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver") | |
76 | Signed-off-by: David Thompson <davthompson@nvidia.com> | |
77 | Link: https://lore.kernel.org/r/20240325210929.25362-1-davthompson@nvidia.com | |
78 | Signed-off-by: Jakub Kicinski <kuba@kernel.org> | |
79 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
80 | --- | |
81 | drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c | 10 ++++++++-- | |
82 | 1 file changed, 8 insertions(+), 2 deletions(-) | |
83 | ||
84 | --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c | |
85 | +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c | |
86 | @@ -14,6 +14,7 @@ | |
87 | #include <linux/module.h> | |
88 | #include <linux/phy.h> | |
89 | #include <linux/platform_device.h> | |
90 | +#include <linux/rtnetlink.h> | |
91 | #include <linux/skbuff.h> | |
92 | ||
93 | #include "mlxbf_gige.h" | |
94 | @@ -417,8 +418,13 @@ static void mlxbf_gige_shutdown(struct p | |
95 | { | |
96 | struct mlxbf_gige *priv = platform_get_drvdata(pdev); | |
97 | ||
98 | - writeq(0, priv->base + MLXBF_GIGE_INT_EN); | |
99 | - mlxbf_gige_clean_port(priv); | |
100 | + rtnl_lock(); | |
101 | + netif_device_detach(priv->netdev); | |
102 | + | |
103 | + if (netif_running(priv->netdev)) | |
104 | + dev_close(priv->netdev); | |
105 | + | |
106 | + rtnl_unlock(); | |
107 | } | |
108 | ||
109 | static const struct acpi_device_id __maybe_unused mlxbf_gige_acpi_match[] = { |