]>
Commit | Line | Data |
---|---|---|
3fefb5f6 GKH |
1 | From foo@baz Thu Mar 14 23:19:55 PDT 2019 |
2 | From: Shiju Jose <shiju.jose@huawei.com> | |
3 | Date: Sun, 10 Mar 2019 14:47:51 +0800 | |
4 | Subject: net: hns3: fix to stop multiple HNS reset due to the AER changes | |
5 | ||
6 | From: Shiju Jose <shiju.jose@huawei.com> | |
7 | ||
8 | [ Upstream commit 69b51bbb03f73e04c486f79d1556b2d9becf4dbc ] | |
9 | ||
10 | The commit bfcb79fca19d | |
11 | ("PCI/ERR: Run error recovery callbacks for all affected devices") | |
12 | affected the non-fatal error recovery logic for the HNS and RDMA devices. | |
13 | This is because each HNS PF under PCIe bus receive callbacks | |
14 | from the AER driver when an error is reported for one of the PF. | |
15 | This causes unwanted PF resets because | |
16 | the HNS decides which PF to reset based on the reset type set. | |
17 | The HNS error handling code sets the reset type based on the hw error | |
18 | type detected. | |
19 | ||
20 | This patch provides fix for the above issue for the recovery of | |
21 | the hw errors in the HNS and RDMA devices. | |
22 | ||
23 | This patch needs backporting to the kernel v5.0+ | |
24 | ||
25 | Fixes: 332fbf576579 ("net: hns3: add handling of hw ras errors using new set of commands") | |
26 | Reported-by: Xiaofei Tan <tanxiaofei@huawei.com> | |
27 | Signed-off-by: Shiju Jose <shiju.jose@huawei.com> | |
28 | Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com> | |
29 | Signed-off-by: David S. Miller <davem@davemloft.net> | |
30 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
31 | --- | |
32 | drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 + | |
33 | drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 4 +++- | |
34 | drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 9 +++++++-- | |
35 | 3 files changed, 11 insertions(+), 3 deletions(-) | |
36 | ||
37 | --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h | |
38 | +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h | |
39 | @@ -192,6 +192,7 @@ struct hnae3_ae_dev { | |
40 | const struct hnae3_ae_ops *ops; | |
41 | struct list_head node; | |
42 | u32 flag; | |
43 | + u8 override_pci_need_reset; /* fix to stop multiple reset happening */ | |
44 | enum hnae3_dev_type dev_type; | |
45 | enum hnae3_reset_type reset_type; | |
46 | void *priv; | |
47 | --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | |
48 | +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | |
49 | @@ -1852,7 +1852,9 @@ static pci_ers_result_t hns3_slot_reset( | |
50 | ||
51 | /* request the reset */ | |
52 | if (ae_dev->ops->reset_event) { | |
53 | - ae_dev->ops->reset_event(pdev, NULL); | |
54 | + if (!ae_dev->override_pci_need_reset) | |
55 | + ae_dev->ops->reset_event(pdev, NULL); | |
56 | + | |
57 | return PCI_ERS_RESULT_RECOVERED; | |
58 | } | |
59 | ||
60 | --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | |
61 | +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | |
62 | @@ -1259,8 +1259,10 @@ pci_ers_result_t hclge_handle_hw_ras_err | |
63 | hclge_handle_all_ras_errors(hdev); | |
64 | } else { | |
65 | if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) || | |
66 | - hdev->pdev->revision < 0x21) | |
67 | + hdev->pdev->revision < 0x21) { | |
68 | + ae_dev->override_pci_need_reset = 1; | |
69 | return PCI_ERS_RESULT_RECOVERED; | |
70 | + } | |
71 | } | |
72 | ||
73 | if (status & HCLGE_RAS_REG_ROCEE_ERR_MASK) { | |
74 | @@ -1269,8 +1271,11 @@ pci_ers_result_t hclge_handle_hw_ras_err | |
75 | } | |
76 | ||
77 | if (status & HCLGE_RAS_REG_NFE_MASK || | |
78 | - status & HCLGE_RAS_REG_ROCEE_ERR_MASK) | |
79 | + status & HCLGE_RAS_REG_ROCEE_ERR_MASK) { | |
80 | + ae_dev->override_pci_need_reset = 0; | |
81 | return PCI_ERS_RESULT_NEED_RESET; | |
82 | + } | |
83 | + ae_dev->override_pci_need_reset = 1; | |
84 | ||
85 | return PCI_ERS_RESULT_RECOVERED; | |
86 | } |