rsv_qp may be double destroyed in error flow, first in free_mr_init(),
and then in hns_roce_exit(). Fix it by moving the free_mr_init() call
into hns_roce_v2_init().
list_del corruption,
ffff589732eb9b50->next is LIST_POISON1 (
dead000000000100)
WARNING: CPU: 8 PID:
1047115 at lib/list_debug.c:53 __list_del_entry_valid+0x148/0x240
...
Call trace:
__list_del_entry_valid+0x148/0x240
hns_roce_qp_remove+0x4c/0x3f0 [hns_roce_hw_v2]
hns_roce_v2_destroy_qp_common+0x1dc/0x5f4 [hns_roce_hw_v2]
hns_roce_v2_destroy_qp+0x22c/0x46c [hns_roce_hw_v2]
free_mr_exit+0x6c/0x120 [hns_roce_hw_v2]
hns_roce_v2_exit+0x170/0x200 [hns_roce_hw_v2]
hns_roce_exit+0x118/0x350 [hns_roce_hw_v2]
__hns_roce_hw_v2_init_instance+0x1c8/0x304 [hns_roce_hw_v2]
hns_roce_hw_v2_reset_notify_init+0x170/0x21c [hns_roce_hw_v2]
hns_roce_hw_v2_reset_notify+0x6c/0x190 [hns_roce_hw_v2]
hclge_notify_roce_client+0x6c/0x160 [hclge]
hclge_reset_rebuild+0x150/0x5c0 [hclge]
hclge_reset+0x10c/0x140 [hclge]
hclge_reset_subtask+0x80/0x104 [hclge]
hclge_reset_service_task+0x168/0x3ac [hclge]
hclge_service_task+0x50/0x100 [hclge]
process_one_work+0x250/0x9a0
worker_thread+0x324/0x990
kthread+0x190/0x210
ret_from_fork+0x10/0x18
Fixes: fd8489294dd2 ("RDMA/hns: Fix Use-After-Free of rsv_qp on HIP08")
Signed-off-by: wenglianfa <wenglianfa@huawei.com>
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
Link: https://patch.msgid.link/20250703113905.3597124-2-huangjunxian6@hisilicon.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
{
int ret;
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ ret = free_mr_init(hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to init free mr!\n");
+ return ret;
+ }
+ }
+
/* The hns ROCEE requires the extdb info to be cleared before using */
ret = hns_roce_clear_extdb_list_info(hr_dev);
if (ret)
- return ret;
+ goto err_clear_extdb_failed;
ret = get_hem_table(hr_dev);
if (ret)
- return ret;
+ goto err_clear_extdb_failed;
if (hr_dev->is_vf)
return 0;
err_llm_init_failed:
put_hem_table(hr_dev);
+err_clear_extdb_failed:
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ free_mr_exit(hr_dev);
return ret;
}
goto error_failed_roce_init;
}
- if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
- ret = free_mr_init(hr_dev);
- if (ret) {
- dev_err(hr_dev->dev, "failed to init free mr!\n");
- goto error_failed_free_mr_init;
- }
- }
handle->priv = hr_dev;
return 0;
-error_failed_free_mr_init:
- hns_roce_exit(hr_dev);
-
error_failed_roce_init:
kfree(hr_dev->priv);
spin_lock_init(&hr_dev->sm_lock);
+ INIT_LIST_HEAD(&hr_dev->qp_list);
+ spin_lock_init(&hr_dev->qp_list_lock);
+
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) {
INIT_LIST_HEAD(&hr_dev->pgdir_list);
}
}
- INIT_LIST_HEAD(&hr_dev->qp_list);
- spin_lock_init(&hr_dev->qp_list_lock);
-
ret = hns_roce_register_device(hr_dev);
if (ret)
goto error_failed_register_device;