if (r)
return r;
- if (amdgpu_sriov_vf(adev))
- return r;
-
- if (adev->gfx.cp_ecc_error_irq.funcs) {
+ if (!amdgpu_sriov_vf(adev) && adev->gfx.cp_ecc_error_irq.funcs) {
r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
if (r)
goto late_fini;
return r;
}
+void amdgpu_gfx_ras_suspend(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block)
+{
+ if (!amdgpu_sriov_vf(adev) && adev->gfx.cp_ecc_error_irq.funcs)
+ amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
+}
+
+void amdgpu_gfx_ras_fini(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block)
+{
+ if (!amdgpu_sriov_vf(adev) && adev->gfx.cp_ecc_error_irq.funcs)
+ amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
+ amdgpu_ras_block_late_fini(adev, ras_block);
+}
+
int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev)
{
int err = 0;
if (!ras->ras_block.ras_late_init)
ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
+ if (!ras->ras_block.ras_suspend)
+ ras->ras_block.ras_suspend = amdgpu_gfx_ras_suspend;
+
+ if (!ras->ras_block.ras_fini)
+ ras->ras_block.ras_fini = amdgpu_gfx_ras_fini;
+
/* If not defined special ras_cb function, use default ras_cb */
if (!ras->ras_block.ras_cb)
ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
void amdgpu_gfx_off_ctrl_immediate(struct amdgpu_device *adev, bool enable);
int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value);
int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
-void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
+void amdgpu_gfx_ras_suspend(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+void amdgpu_gfx_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block);
int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value);
int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *residency);
int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value);
struct list_head node;
struct amdgpu_ras_block_object *ras_obj;
+
+ /* set by ras_late_init, cleared by ras_suspend/ras_fini */
+ bool active;
};
const char *get_ras_block_str(struct ras_common_if *ras_block)
void amdgpu_ras_suspend(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_block_list *node;
+ struct amdgpu_ras_block_object *obj;
if (!adev->ras_enabled || !con)
return;
+ /* run per-block ras_suspend before tearing down the RAS context */
+ list_for_each_entry(node, &adev->ras_list, node) {
+ if (!node->active)
+ continue;
+
+ obj = node->ras_obj;
+ if (obj && obj->ras_suspend)
+ obj->ras_suspend(adev, &obj->ras_comm);
+ node->active = false;
+ }
+
amdgpu_ras_disable_all_features(adev, 0);
/* Make sure all ras objects are disabled. */
if (AMDGPU_RAS_GET_FEATURES(con->features))
obj->ras_comm.name, r);
return r;
}
- } else
- amdgpu_ras_block_late_init_default(adev, &obj->ras_comm);
+ } else {
+ r = amdgpu_ras_block_late_init_default(adev, &obj->ras_comm);
+ if (r) {
+ dev_err(adev->dev, "%s failed to execute ras_block_late_init_default! ret:%d\n",
+ obj->ras_comm.name, r);
+ return r;
+ }
+ }
+ node->active = true;
}
amdgpu_ras_check_bad_page_status(adev);
list_for_each_entry_safe(ras_node, tmp, &adev->ras_list, node) {
if (ras_node->ras_obj) {
obj = ras_node->ras_obj;
- if (amdgpu_ras_is_supported(adev, obj->ras_comm.block) &&
- obj->ras_fini)
+ /* fall back to default cleanup if ras_suspend already ran */
+ if (ras_node->active && obj->ras_fini)
obj->ras_fini(adev, &obj->ras_comm);
else
amdgpu_ras_block_late_fini_default(adev, &obj->ras_comm);
+ ras_node->active = false;
}
/* Clear ras blocks from ras_list and free ras block list node */
int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj,
enum amdgpu_ras_block block, uint32_t sub_block_index);
int (*ras_late_init)(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+ void (*ras_suspend)(struct amdgpu_device *adev, struct ras_common_if *ras_block);
void (*ras_fini)(struct amdgpu_device *adev, struct ras_common_if *ras_block);
ras_ih_cb ras_cb;
const struct amdgpu_ras_block_hw_ops *hw_ops;
{
struct amdgpu_device *adev = ip_block->adev;
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
- amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);