1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
7 #include "diag/fw_tracer.h"
11 MLX5_FW_RESET_FLAGS_RESET_REQUESTED
,
12 MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST
,
13 MLX5_FW_RESET_FLAGS_PENDING_COMP
,
14 MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS
,
15 MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED
18 struct mlx5_fw_reset
{
19 struct mlx5_core_dev
*dev
;
21 struct workqueue_struct
*wq
;
22 struct work_struct fw_live_patch_work
;
23 struct work_struct reset_request_work
;
24 struct work_struct reset_unload_work
;
25 struct work_struct reset_reload_work
;
26 struct work_struct reset_now_work
;
27 struct work_struct reset_abort_work
;
28 unsigned long reset_flags
;
29 struct timer_list timer
;
30 struct completion done
;
35 MLX5_FW_RST_STATE_IDLE
= 0,
36 MLX5_FW_RST_STATE_TOGGLE_REQ
= 4,
40 MLX5_RST_STATE_BIT_NUM
= 12,
41 MLX5_RST_ACK_BIT_NUM
= 22,
44 static u8
mlx5_get_fw_rst_state(struct mlx5_core_dev
*dev
)
46 return (ioread32be(&dev
->iseg
->initializing
) >> MLX5_RST_STATE_BIT_NUM
) & 0xF;
49 static void mlx5_set_fw_rst_ack(struct mlx5_core_dev
*dev
)
51 iowrite32be(BIT(MLX5_RST_ACK_BIT_NUM
), &dev
->iseg
->initializing
);
54 static int mlx5_fw_reset_enable_remote_dev_reset_set(struct devlink
*devlink
, u32 id
,
55 struct devlink_param_gset_ctx
*ctx
)
57 struct mlx5_core_dev
*dev
= devlink_priv(devlink
);
58 struct mlx5_fw_reset
*fw_reset
;
60 fw_reset
= dev
->priv
.fw_reset
;
63 clear_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST
, &fw_reset
->reset_flags
);
65 set_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST
, &fw_reset
->reset_flags
);
69 static int mlx5_fw_reset_enable_remote_dev_reset_get(struct devlink
*devlink
, u32 id
,
70 struct devlink_param_gset_ctx
*ctx
)
72 struct mlx5_core_dev
*dev
= devlink_priv(devlink
);
73 struct mlx5_fw_reset
*fw_reset
;
75 fw_reset
= dev
->priv
.fw_reset
;
77 ctx
->val
.vbool
= !test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST
,
78 &fw_reset
->reset_flags
);
82 static int mlx5_reg_mfrl_set(struct mlx5_core_dev
*dev
, u8 reset_level
,
83 u8 reset_type_sel
, u8 sync_resp
, bool sync_start
)
85 u32 out
[MLX5_ST_SZ_DW(mfrl_reg
)] = {};
86 u32 in
[MLX5_ST_SZ_DW(mfrl_reg
)] = {};
88 MLX5_SET(mfrl_reg
, in
, reset_level
, reset_level
);
89 MLX5_SET(mfrl_reg
, in
, rst_type_sel
, reset_type_sel
);
90 MLX5_SET(mfrl_reg
, in
, pci_sync_for_fw_update_resp
, sync_resp
);
91 MLX5_SET(mfrl_reg
, in
, pci_sync_for_fw_update_start
, sync_start
);
93 return mlx5_core_access_reg(dev
, in
, sizeof(in
), out
, sizeof(out
), MLX5_REG_MFRL
, 0, 1);
96 static int mlx5_reg_mfrl_query(struct mlx5_core_dev
*dev
, u8
*reset_level
,
97 u8
*reset_type
, u8
*reset_state
)
99 u32 out
[MLX5_ST_SZ_DW(mfrl_reg
)] = {};
100 u32 in
[MLX5_ST_SZ_DW(mfrl_reg
)] = {};
103 err
= mlx5_core_access_reg(dev
, in
, sizeof(in
), out
, sizeof(out
), MLX5_REG_MFRL
, 0, 0);
108 *reset_level
= MLX5_GET(mfrl_reg
, out
, reset_level
);
110 *reset_type
= MLX5_GET(mfrl_reg
, out
, reset_type
);
112 *reset_state
= MLX5_GET(mfrl_reg
, out
, reset_state
);
117 int mlx5_fw_reset_query(struct mlx5_core_dev
*dev
, u8
*reset_level
, u8
*reset_type
)
119 return mlx5_reg_mfrl_query(dev
, reset_level
, reset_type
, NULL
);
122 static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev
*dev
,
123 struct netlink_ext_ack
*extack
)
127 if (mlx5_reg_mfrl_query(dev
, NULL
, NULL
, &reset_state
))
133 switch (reset_state
) {
134 case MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION
:
135 case MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS
:
136 NL_SET_ERR_MSG_MOD(extack
, "Sync reset still in progress");
138 case MLX5_MFRL_REG_RESET_STATE_NEG_TIMEOUT
:
139 NL_SET_ERR_MSG_MOD(extack
, "Sync reset negotiation timeout");
141 case MLX5_MFRL_REG_RESET_STATE_NACK
:
142 NL_SET_ERR_MSG_MOD(extack
, "One of the hosts disabled reset");
144 case MLX5_MFRL_REG_RESET_STATE_UNLOAD_TIMEOUT
:
145 NL_SET_ERR_MSG_MOD(extack
, "Sync reset unload timeout");
150 NL_SET_ERR_MSG_MOD(extack
, "Sync reset failed");
154 int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev
*dev
, u8 reset_type_sel
,
155 struct netlink_ext_ack
*extack
)
157 struct mlx5_fw_reset
*fw_reset
= dev
->priv
.fw_reset
;
158 u32 out
[MLX5_ST_SZ_DW(mfrl_reg
)] = {};
159 u32 in
[MLX5_ST_SZ_DW(mfrl_reg
)] = {};
162 set_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP
, &fw_reset
->reset_flags
);
164 MLX5_SET(mfrl_reg
, in
, reset_level
, MLX5_MFRL_REG_RESET_LEVEL3
);
165 MLX5_SET(mfrl_reg
, in
, rst_type_sel
, reset_type_sel
);
166 MLX5_SET(mfrl_reg
, in
, pci_sync_for_fw_update_start
, 1);
167 err
= mlx5_access_reg(dev
, in
, sizeof(in
), out
, sizeof(out
),
168 MLX5_REG_MFRL
, 0, 1, false);
172 clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP
, &fw_reset
->reset_flags
);
173 if (err
== -EREMOTEIO
&& MLX5_CAP_MCAM_FEATURE(dev
, reset_state
)) {
174 rst_res
= mlx5_fw_reset_get_reset_state_err(dev
, extack
);
175 return rst_res
? rst_res
: err
;
178 NL_SET_ERR_MSG_MOD(extack
, "Sync reset command failed");
179 return mlx5_cmd_check(dev
, err
, in
, out
);
182 int mlx5_fw_reset_verify_fw_complete(struct mlx5_core_dev
*dev
,
183 struct netlink_ext_ack
*extack
)
188 err
= mlx5_fw_reset_get_reset_state_err(dev
, extack
);
192 rst_state
= mlx5_get_fw_rst_state(dev
);
196 mlx5_core_err(dev
, "Sync reset did not complete, state=%d\n", rst_state
);
197 NL_SET_ERR_MSG_MOD(extack
, "Sync reset did not complete successfully");
201 int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev
*dev
)
203 return mlx5_reg_mfrl_set(dev
, MLX5_MFRL_REG_RESET_LEVEL0
, 0, 0, false);
206 static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev
*dev
, bool unloaded
)
208 struct mlx5_fw_reset
*fw_reset
= dev
->priv
.fw_reset
;
210 /* if this is the driver that initiated the fw reset, devlink completed the reload */
211 if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP
, &fw_reset
->reset_flags
)) {
212 complete(&fw_reset
->done
);
215 mlx5_unload_one(dev
, false);
216 if (mlx5_health_wait_pci_up(dev
))
217 mlx5_core_err(dev
, "reset reload flow aborted, PCI reads still not working\n");
219 mlx5_load_one(dev
, true);
220 devlink_remote_reload_actions_performed(priv_to_devlink(dev
), 0,
221 BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT
) |
222 BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE
));
226 static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev
*dev
)
228 struct mlx5_fw_reset
*fw_reset
= dev
->priv
.fw_reset
;
230 del_timer_sync(&fw_reset
->timer
);
233 static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev
*dev
, bool poll_health
)
235 struct mlx5_fw_reset
*fw_reset
= dev
->priv
.fw_reset
;
237 if (!test_and_clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED
, &fw_reset
->reset_flags
)) {
238 mlx5_core_warn(dev
, "Reset request was already cleared\n");
242 mlx5_stop_sync_reset_poll(dev
);
244 mlx5_start_health_poll(dev
);
248 static void mlx5_sync_reset_reload_work(struct work_struct
*work
)
250 struct mlx5_fw_reset
*fw_reset
= container_of(work
, struct mlx5_fw_reset
,
252 struct mlx5_core_dev
*dev
= fw_reset
->dev
;
254 mlx5_sync_reset_clear_reset_requested(dev
, false);
255 mlx5_enter_error_state(dev
, true);
256 mlx5_fw_reset_complete_reload(dev
, false);
259 #define MLX5_RESET_POLL_INTERVAL (HZ / 10)
260 static void poll_sync_reset(struct timer_list
*t
)
262 struct mlx5_fw_reset
*fw_reset
= from_timer(fw_reset
, t
, timer
);
263 struct mlx5_core_dev
*dev
= fw_reset
->dev
;
266 if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED
, &fw_reset
->reset_flags
))
269 fatal_error
= mlx5_health_check_fatal_sensors(dev
);
272 mlx5_core_warn(dev
, "Got Device Reset\n");
273 if (!test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS
, &fw_reset
->reset_flags
))
274 queue_work(fw_reset
->wq
, &fw_reset
->reset_reload_work
);
276 mlx5_core_err(dev
, "Device is being removed, Drop new reset work\n");
280 mod_timer(&fw_reset
->timer
, round_jiffies(jiffies
+ MLX5_RESET_POLL_INTERVAL
));
283 static void mlx5_start_sync_reset_poll(struct mlx5_core_dev
*dev
)
285 struct mlx5_fw_reset
*fw_reset
= dev
->priv
.fw_reset
;
287 timer_setup(&fw_reset
->timer
, poll_sync_reset
, 0);
288 fw_reset
->timer
.expires
= round_jiffies(jiffies
+ MLX5_RESET_POLL_INTERVAL
);
289 add_timer(&fw_reset
->timer
);
292 static int mlx5_fw_reset_set_reset_sync_ack(struct mlx5_core_dev
*dev
)
294 return mlx5_reg_mfrl_set(dev
, MLX5_MFRL_REG_RESET_LEVEL3
, 0, 1, false);
297 static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev
*dev
)
299 return mlx5_reg_mfrl_set(dev
, MLX5_MFRL_REG_RESET_LEVEL3
, 0, 2, false);
302 static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev
*dev
)
304 struct mlx5_fw_reset
*fw_reset
= dev
->priv
.fw_reset
;
306 if (test_and_set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED
, &fw_reset
->reset_flags
)) {
307 mlx5_core_warn(dev
, "Reset request was already set\n");
310 mlx5_stop_health_poll(dev
, true);
311 mlx5_start_sync_reset_poll(dev
);
315 static void mlx5_fw_live_patch_event(struct work_struct
*work
)
317 struct mlx5_fw_reset
*fw_reset
= container_of(work
, struct mlx5_fw_reset
,
319 struct mlx5_core_dev
*dev
= fw_reset
->dev
;
321 mlx5_core_info(dev
, "Live patch updated firmware version: %d.%d.%d\n", fw_rev_maj(dev
),
322 fw_rev_min(dev
), fw_rev_sub(dev
));
324 if (mlx5_fw_tracer_reload(dev
->tracer
))
325 mlx5_core_err(dev
, "Failed to reload FW tracer\n");
328 #if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)
329 static int mlx5_check_hotplug_interrupt(struct mlx5_core_dev
*dev
)
331 struct pci_dev
*bridge
= dev
->pdev
->bus
->self
;
338 err
= pcie_capability_read_word(bridge
, PCI_EXP_SLTCTL
, ®16
);
342 if ((reg16
& PCI_EXP_SLTCTL_HPIE
) && (reg16
& PCI_EXP_SLTCTL_DLLSCE
)) {
343 mlx5_core_warn(dev
, "FW reset is not supported as HotPlug is enabled\n");
351 static int mlx5_check_dev_ids(struct mlx5_core_dev
*dev
, u16 dev_id
)
353 struct pci_bus
*bridge_bus
= dev
->pdev
->bus
;
354 struct pci_dev
*sdev
;
358 /* Check that all functions under the pci bridge are PFs of
359 * this device otherwise fail this function.
361 list_for_each_entry(sdev
, &bridge_bus
->devices
, bus_list
) {
362 err
= pci_read_config_word(sdev
, PCI_DEVICE_ID
, &sdev_id
);
364 return pcibios_err_to_errno(err
);
365 if (sdev_id
!= dev_id
) {
366 mlx5_core_warn(dev
, "unrecognized dev_id (0x%x)\n", sdev_id
);
373 static bool mlx5_is_reset_now_capable(struct mlx5_core_dev
*dev
)
378 if (!MLX5_CAP_GEN(dev
, fast_teardown
)) {
379 mlx5_core_warn(dev
, "fast teardown is not supported by firmware\n");
383 #if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)
384 err
= mlx5_check_hotplug_interrupt(dev
);
389 err
= pci_read_config_word(dev
->pdev
, PCI_DEVICE_ID
, &dev_id
);
392 return (!mlx5_check_dev_ids(dev
, dev_id
));
395 static void mlx5_sync_reset_request_event(struct work_struct
*work
)
397 struct mlx5_fw_reset
*fw_reset
= container_of(work
, struct mlx5_fw_reset
,
399 struct mlx5_core_dev
*dev
= fw_reset
->dev
;
402 if (test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST
, &fw_reset
->reset_flags
) ||
403 !mlx5_is_reset_now_capable(dev
)) {
404 err
= mlx5_fw_reset_set_reset_sync_nack(dev
);
405 mlx5_core_warn(dev
, "PCI Sync FW Update Reset Nack %s",
406 err
? "Failed" : "Sent");
409 if (mlx5_sync_reset_set_reset_requested(dev
))
412 err
= mlx5_fw_reset_set_reset_sync_ack(dev
);
414 mlx5_core_warn(dev
, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err
);
416 mlx5_core_warn(dev
, "PCI Sync FW Update Reset Ack. Device reset is expected.\n");
419 static int mlx5_pci_link_toggle(struct mlx5_core_dev
*dev
)
421 struct pci_bus
*bridge_bus
= dev
->pdev
->bus
;
422 struct pci_dev
*bridge
= bridge_bus
->self
;
423 unsigned long timeout
;
424 struct pci_dev
*sdev
;
428 err
= pci_read_config_word(dev
->pdev
, PCI_DEVICE_ID
, &dev_id
);
430 return pcibios_err_to_errno(err
);
431 err
= mlx5_check_dev_ids(dev
, dev_id
);
434 cap
= pci_find_capability(bridge
, PCI_CAP_ID_EXP
);
438 list_for_each_entry(sdev
, &bridge_bus
->devices
, bus_list
) {
439 pci_save_state(sdev
);
440 pci_cfg_access_lock(sdev
);
442 /* PCI link toggle */
443 err
= pcie_capability_set_word(bridge
, PCI_EXP_LNKCTL
, PCI_EXP_LNKCTL_LD
);
445 return pcibios_err_to_errno(err
);
447 err
= pcie_capability_clear_word(bridge
, PCI_EXP_LNKCTL
, PCI_EXP_LNKCTL_LD
);
449 return pcibios_err_to_errno(err
);
452 if (!bridge
->link_active_reporting
) {
453 mlx5_core_warn(dev
, "No PCI link reporting capability\n");
458 timeout
= jiffies
+ msecs_to_jiffies(mlx5_tout_ms(dev
, PCI_TOGGLE
));
460 err
= pci_read_config_word(bridge
, cap
+ PCI_EXP_LNKSTA
, ®16
);
462 return pcibios_err_to_errno(err
);
463 if (reg16
& PCI_EXP_LNKSTA_DLLLA
)
466 } while (!time_after(jiffies
, timeout
));
468 if (reg16
& PCI_EXP_LNKSTA_DLLLA
) {
469 mlx5_core_info(dev
, "PCI Link up\n");
471 mlx5_core_err(dev
, "PCI link not ready (0x%04x) after %llu ms\n",
472 reg16
, mlx5_tout_ms(dev
, PCI_TOGGLE
));
478 err
= pci_read_config_word(dev
->pdev
, PCI_DEVICE_ID
, ®16
);
480 return pcibios_err_to_errno(err
);
484 } while (!time_after(jiffies
, timeout
));
486 if (reg16
== dev_id
) {
487 mlx5_core_info(dev
, "Firmware responds to PCI config cycles again\n");
489 mlx5_core_err(dev
, "Firmware is not responsive (0x%04x) after %llu ms\n",
490 reg16
, mlx5_tout_ms(dev
, PCI_TOGGLE
));
495 list_for_each_entry(sdev
, &bridge_bus
->devices
, bus_list
) {
496 pci_cfg_access_unlock(sdev
);
497 pci_restore_state(sdev
);
503 static void mlx5_sync_reset_now_event(struct work_struct
*work
)
505 struct mlx5_fw_reset
*fw_reset
= container_of(work
, struct mlx5_fw_reset
,
507 struct mlx5_core_dev
*dev
= fw_reset
->dev
;
510 if (mlx5_sync_reset_clear_reset_requested(dev
, false))
513 mlx5_core_warn(dev
, "Sync Reset now. Device is going to reset.\n");
515 err
= mlx5_cmd_fast_teardown_hca(dev
);
517 mlx5_core_warn(dev
, "Fast teardown failed, no reset done, err %d\n", err
);
521 err
= mlx5_pci_link_toggle(dev
);
523 mlx5_core_warn(dev
, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err
);
524 set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED
, &fw_reset
->reset_flags
);
527 mlx5_enter_error_state(dev
, true);
530 mlx5_fw_reset_complete_reload(dev
, false);
533 static void mlx5_sync_reset_unload_event(struct work_struct
*work
)
535 struct mlx5_fw_reset
*fw_reset
;
536 struct mlx5_core_dev
*dev
;
537 unsigned long timeout
;
542 fw_reset
= container_of(work
, struct mlx5_fw_reset
, reset_unload_work
);
545 if (mlx5_sync_reset_clear_reset_requested(dev
, false))
548 mlx5_core_warn(dev
, "Sync Reset Unload. Function is forced down.\n");
550 err
= mlx5_cmd_fast_teardown_hca(dev
);
552 mlx5_core_warn(dev
, "Fast teardown failed, unloading, err %d\n", err
);
554 mlx5_enter_error_state(dev
, true);
556 if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP
, &fw_reset
->reset_flags
))
557 mlx5_unload_one_devl_locked(dev
, false);
559 mlx5_unload_one(dev
, false);
561 mlx5_set_fw_rst_ack(dev
);
562 mlx5_core_warn(dev
, "Sync Reset Unload done, device reset expected\n");
564 reset_action
= false;
565 timeout
= jiffies
+ msecs_to_jiffies(mlx5_tout_ms(dev
, RESET_UNLOAD
));
567 rst_state
= mlx5_get_fw_rst_state(dev
);
568 if (rst_state
== MLX5_FW_RST_STATE_TOGGLE_REQ
||
569 rst_state
== MLX5_FW_RST_STATE_IDLE
) {
574 } while (!time_after(jiffies
, timeout
));
577 mlx5_core_err(dev
, "Got timeout waiting for sync reset action, state = %u\n",
579 fw_reset
->ret
= -ETIMEDOUT
;
583 mlx5_core_warn(dev
, "Sync Reset, got reset action. rst_state = %u\n", rst_state
);
584 if (rst_state
== MLX5_FW_RST_STATE_TOGGLE_REQ
) {
585 err
= mlx5_pci_link_toggle(dev
);
587 mlx5_core_warn(dev
, "mlx5_pci_link_toggle failed, err %d\n", err
);
593 mlx5_fw_reset_complete_reload(dev
, true);
596 static void mlx5_sync_reset_abort_event(struct work_struct
*work
)
598 struct mlx5_fw_reset
*fw_reset
= container_of(work
, struct mlx5_fw_reset
,
600 struct mlx5_core_dev
*dev
= fw_reset
->dev
;
602 if (mlx5_sync_reset_clear_reset_requested(dev
, true))
604 mlx5_core_warn(dev
, "PCI Sync FW Update Reset Aborted.\n");
607 static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset
*fw_reset
, struct mlx5_eqe
*eqe
)
609 struct mlx5_eqe_sync_fw_update
*sync_fw_update_eqe
;
610 u8 sync_event_rst_type
;
612 sync_fw_update_eqe
= &eqe
->data
.sync_fw_update
;
613 sync_event_rst_type
= sync_fw_update_eqe
->sync_rst_state
& SYNC_RST_STATE_MASK
;
614 switch (sync_event_rst_type
) {
615 case MLX5_SYNC_RST_STATE_RESET_REQUEST
:
616 queue_work(fw_reset
->wq
, &fw_reset
->reset_request_work
);
618 case MLX5_SYNC_RST_STATE_RESET_UNLOAD
:
619 queue_work(fw_reset
->wq
, &fw_reset
->reset_unload_work
);
621 case MLX5_SYNC_RST_STATE_RESET_NOW
:
622 queue_work(fw_reset
->wq
, &fw_reset
->reset_now_work
);
624 case MLX5_SYNC_RST_STATE_RESET_ABORT
:
625 queue_work(fw_reset
->wq
, &fw_reset
->reset_abort_work
);
630 static int fw_reset_event_notifier(struct notifier_block
*nb
, unsigned long action
, void *data
)
632 struct mlx5_fw_reset
*fw_reset
= mlx5_nb_cof(nb
, struct mlx5_fw_reset
, nb
);
633 struct mlx5_eqe
*eqe
= data
;
635 if (test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS
, &fw_reset
->reset_flags
))
638 switch (eqe
->sub_type
) {
639 case MLX5_GENERAL_SUBTYPE_FW_LIVE_PATCH_EVENT
:
640 queue_work(fw_reset
->wq
, &fw_reset
->fw_live_patch_work
);
642 case MLX5_GENERAL_SUBTYPE_PCI_SYNC_FOR_FW_UPDATE_EVENT
:
643 mlx5_sync_reset_events_handle(fw_reset
, eqe
);
652 int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev
*dev
)
654 unsigned long pci_sync_update_timeout
= mlx5_tout_ms(dev
, PCI_SYNC_UPDATE
);
655 struct mlx5_fw_reset
*fw_reset
= dev
->priv
.fw_reset
;
656 unsigned long timeout
;
659 if (MLX5_CAP_GEN(dev
, pci_sync_for_fw_update_with_driver_unload
))
660 pci_sync_update_timeout
+= mlx5_tout_ms(dev
, RESET_UNLOAD
);
661 timeout
= msecs_to_jiffies(pci_sync_update_timeout
);
662 if (!wait_for_completion_timeout(&fw_reset
->done
, timeout
)) {
663 mlx5_core_warn(dev
, "FW sync reset timeout after %lu seconds\n",
664 pci_sync_update_timeout
/ 1000);
669 if (test_and_clear_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED
, &fw_reset
->reset_flags
)) {
670 mlx5_unload_one_devl_locked(dev
, false);
671 mlx5_load_one_devl_locked(dev
, true);
674 clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP
, &fw_reset
->reset_flags
);
678 void mlx5_fw_reset_events_start(struct mlx5_core_dev
*dev
)
680 struct mlx5_fw_reset
*fw_reset
= dev
->priv
.fw_reset
;
682 MLX5_NB_INIT(&fw_reset
->nb
, fw_reset_event_notifier
, GENERAL_EVENT
);
683 mlx5_eq_notifier_register(dev
, &fw_reset
->nb
);
686 void mlx5_fw_reset_events_stop(struct mlx5_core_dev
*dev
)
688 mlx5_eq_notifier_unregister(dev
, &dev
->priv
.fw_reset
->nb
);
691 void mlx5_drain_fw_reset(struct mlx5_core_dev
*dev
)
693 struct mlx5_fw_reset
*fw_reset
= dev
->priv
.fw_reset
;
695 set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS
, &fw_reset
->reset_flags
);
696 cancel_work_sync(&fw_reset
->fw_live_patch_work
);
697 cancel_work_sync(&fw_reset
->reset_request_work
);
698 cancel_work_sync(&fw_reset
->reset_unload_work
);
699 cancel_work_sync(&fw_reset
->reset_reload_work
);
700 cancel_work_sync(&fw_reset
->reset_now_work
);
701 cancel_work_sync(&fw_reset
->reset_abort_work
);
704 static const struct devlink_param mlx5_fw_reset_devlink_params
[] = {
705 DEVLINK_PARAM_GENERIC(ENABLE_REMOTE_DEV_RESET
, BIT(DEVLINK_PARAM_CMODE_RUNTIME
),
706 mlx5_fw_reset_enable_remote_dev_reset_get
,
707 mlx5_fw_reset_enable_remote_dev_reset_set
, NULL
),
710 int mlx5_fw_reset_init(struct mlx5_core_dev
*dev
)
712 struct mlx5_fw_reset
*fw_reset
= kzalloc(sizeof(*fw_reset
), GFP_KERNEL
);
717 fw_reset
->wq
= create_singlethread_workqueue("mlx5_fw_reset_events");
724 dev
->priv
.fw_reset
= fw_reset
;
726 err
= devl_params_register(priv_to_devlink(dev
),
727 mlx5_fw_reset_devlink_params
,
728 ARRAY_SIZE(mlx5_fw_reset_devlink_params
));
730 destroy_workqueue(fw_reset
->wq
);
735 INIT_WORK(&fw_reset
->fw_live_patch_work
, mlx5_fw_live_patch_event
);
736 INIT_WORK(&fw_reset
->reset_request_work
, mlx5_sync_reset_request_event
);
737 INIT_WORK(&fw_reset
->reset_unload_work
, mlx5_sync_reset_unload_event
);
738 INIT_WORK(&fw_reset
->reset_reload_work
, mlx5_sync_reset_reload_work
);
739 INIT_WORK(&fw_reset
->reset_now_work
, mlx5_sync_reset_now_event
);
740 INIT_WORK(&fw_reset
->reset_abort_work
, mlx5_sync_reset_abort_event
);
742 init_completion(&fw_reset
->done
);
746 void mlx5_fw_reset_cleanup(struct mlx5_core_dev
*dev
)
748 struct mlx5_fw_reset
*fw_reset
= dev
->priv
.fw_reset
;
750 devl_params_unregister(priv_to_devlink(dev
),
751 mlx5_fw_reset_devlink_params
,
752 ARRAY_SIZE(mlx5_fw_reset_devlink_params
));
753 destroy_workqueue(fw_reset
->wq
);
754 kfree(dev
->priv
.fw_reset
);