- pre_destroy_cq: Destroy FW CQ object so that no new CQ event would
be generated;
- post_destroy_cq: Release all resources.
This patch, along with last one, fixes the crash below.
Unable to handle kernel paging request at virtual address
ffff8000114b1180
Mem abort info:
ESR = 0x96000047
EC = 0x25: DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
Data abort info:
ISV = 0, ISS = 0x00000047
CM = 0, WnR = 1
swapper pgtable: 4k pages, 48-bit VAs, pgdp=
00000000f4582000
[
ffff8000114b1180] pgd=
00000447fffff003, p4d=
00000447fffff003, pud=
00000447ffffe003, pmd=
00000447ffffb003, pte=
0000000000000000
Internal error: Oops:
96000047 [#1] SMP
Modules linked in: udp_diag uio_pci_generic uio tcp_diag inet_diag binfmt_misc sn_core_odd(OE) rpcrdma(OE) xprtrdma(OE) ib_isert(OE) ib_iser(OE) ib_srpt(OE) ib_srp(OE) ib_ipoib(OE) kpatch_9658536(OK) kpatch_9322385(OK) kpatch_8843421(OK) kpatch_8636216(OK) vfat fat aes_ce_blk crypto_simd cryptd aes_ce_cipher crct10dif_ce ghash_ce sm4_ce sha2_ce sha256_arm64 sha1_ce sbsa_gwdt sg acpi_ipmi ipmi_si ipmi_msghandler m1_uncore_ddrss_pmu m1_uncore_cmn_pmu team_yosemite9rc6(OE) vnic(OE) ip_tables mlx5_ib(OE) sd_mod ast mlx5_core(OE) i2c_algo_bit drm_vram_helper psample drm_kms_helper mlxdevm(OE) auxiliary(OE) mlxfw(OE) syscopyarea sysfillrect tls sysimgblt fb_sys_fops drm_ttm_helper nvme ttm nvme_core drm t10_pi i2c_designware_platform i2c_designware_core i2c_core ahci libahci libata rdma_ucm(OE) ib_uverbs(OE) rdma_cm(OE) iw_cm(OE) ib_cm(OE) ib_umad(OE) ib_core(OE) ib_ucm(OE) mlx_compat(OE) [last unloaded: ipmi_devintf]
CPU: 83 PID: 59375 Comm: kworker/u253:1 Kdump: loaded Tainted: G OE K 5.10.84-004.ali5000.alios7.aarch64 #1
Hardware name: Inspur AliServer-Xuanwu2.0AM-02-2UM1P-5B/AS1221MG1, BIOS 1.2.M1.AL.P.158.00 08/31/2023
Workqueue: ib-comp-unb-wq ib_cq_poll_work [ib_core]
pstate:
82c00089 (Nzcv daIf +PAN +UAO +TCO BTYPE=--)
pc : native_queued_spin_lock_slowpath+0x1c4/0x31c
lr : mlx5_ib_poll_cq+0x18c/0x2f8 [mlx5_ib]
sp :
ffff80002be1bc80
x29:
ffff80002be1bc80 x28:
ffff000810e69000
x27:
ffff000810e69000 x26:
ffff000810e69200
x25:
0000000000000000 x24:
ffff8000117db000
x23:
ffff04000156b780 x22:
0000000000000000
x21:
ffff04000ce6c160 x20:
ffff0008196a4000
x19:
0000000000000010 x18:
0000000000000020
x17:
0000000000000000 x16:
0000000000000000
x15:
ffff040055a364e8 x14:
ffffffffffffffff
x13:
ffff80002318bda8 x12:
ffff0400358836e8
x11:
0000000000000040 x10:
0000000000000eb0
x9 :
0000000000000000 x8 :
0000000000000000
x7 :
ffff04477fa20140 x6 :
ffff8000114b1140
x5 :
ffff04477fa20140 x4 :
ffff8000114b1180
x3 :
ffff000810e69200 x2 :
ffff8000114b1180
x1 :
0000000001500000 x0 :
ffff04477fa20148
Call trace:
native_queued_spin_lock_slowpath+0x1c4/0x31c
__ib_process_cq+0x74/0x1b8 [ib_core]
ib_cq_poll_work+0x34/0xa0 [ib_core]
process_one_work+0x1d8/0x4b0
worker_thread+0x180/0x440
kthread+0x114/0x120
Code:
910020e0 8b0400c4 f862d929 aa0403e2 (
f8296847)
---[ end trace
387be2290557729c ]---
Kernel panic - not syncing: Oops: Fatal exception
SMP: stopping secondary CPUs
Kernel Offset: disabled
CPU features: 0x9850817,
7a60aa38
Memory Limit: none
Starting crashdump kernel...
Bye!
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Link: https://patch.msgid.link/aaf0072f350d1c7e8731f43b79e11a560bafb9e0.1750070205.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
return err;
}
-int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+int mlx5_ib_pre_destroy_cq(struct ib_cq *cq)
{
struct mlx5_ib_dev *dev = to_mdev(cq->device);
struct mlx5_ib_cq *mcq = to_mcq(cq);
+
+ return mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
+}
+
+void mlx5_ib_post_destroy_cq(struct ib_cq *cq)
+{
+ destroy_cq_kernel(to_mdev(cq->device), to_mcq(cq));
+}
+
+int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+{
int ret;
- ret = mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
+ ret = mlx5_ib_pre_destroy_cq(cq);
if (ret)
return ret;
if (udata)
- destroy_cq_user(mcq, udata);
+ destroy_cq_user(to_mcq(cq), udata);
else
- destroy_cq_kernel(dev, mcq);
+ mlx5_ib_post_destroy_cq(cq);
return 0;
}
.modify_port = mlx5_ib_modify_port,
.modify_qp = mlx5_ib_modify_qp,
.modify_srq = mlx5_ib_modify_srq,
+ .pre_destroy_cq = mlx5_ib_pre_destroy_cq,
.poll_cq = mlx5_ib_poll_cq,
+ .post_destroy_cq = mlx5_ib_post_destroy_cq,
.post_recv = mlx5_ib_post_recv_nodrain,
.post_send = mlx5_ib_post_send_nodrain,
.post_srq_recv = mlx5_ib_post_srq_recv,
struct uverbs_attr_bundle *attrs);
int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int mlx5_ib_pre_destroy_cq(struct ib_cq *cq);
+void mlx5_ib_post_destroy_cq(struct ib_cq *cq);
int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);