In case of unexpected errors, check if device is alive on the bus.
Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Asad Kamal <asad.kamal@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
void amdgpu_set_init_level(struct amdgpu_device *adev,
enum amdgpu_init_lvl_id lvl);
+
+static inline int amdgpu_device_bus_status_check(struct amdgpu_device *adev)
+{
+ u32 status;
+ int r;
+
+ r = pci_read_config_dword(adev->pdev, PCI_COMMAND, &status);
+ if (r || PCI_POSSIBLE_ERROR(status)) {
+ dev_err(adev->dev, "device lost from bus!");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
#endif
{
struct amdgpu_device *tmp_adev;
int ret = 0;
- u32 status;
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
- pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status);
- if (PCI_POSSIBLE_ERROR(status)) {
- dev_err(tmp_adev->dev, "device lost from bus!");
- ret = -ENODEV;
- }
+ ret |= amdgpu_device_bus_status_check(tmp_adev);
}
return ret;
if (adev->nbio.funcs->get_compute_partition_mode) {
mode = adev->nbio.funcs->get_compute_partition_mode(adev);
- if (mode != derv_mode)
+ if (mode != derv_mode) {
dev_warn(
adev->dev,
"Mismatch in compute partition mode - reported : %d derived : %d",
mode, derv_mode);
+ if (derv_mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
+ amdgpu_device_bus_status_check(adev);
+ }
}
return mode;
#define SMU_RESP_BUSY_OTHER 0xFC
#define SMU_RESP_DEBUG_END 0xFB
+#define SMU_RESP_UNEXP (~0U)
/**
* __smu_cmn_poll_stat -- poll for a status from the SMU
* @smu: a pointer to SMU context
dev_err_ratelimited(adev->dev,
"SMU: I'm debugging!");
break;
+ case SMU_RESP_UNEXP:
+ if (amdgpu_device_bus_status_check(smu->adev)) {
+ /* print error immediately if device is off the bus */
+ dev_err(adev->dev,
+ "SMU: response:0x%08X for index:%d param:0x%08X message:%s?",
+ reg_c2pmsg_90, msg_index, param, message);
+ break;
+ }
+ fallthrough;
default:
dev_err_ratelimited(adev->dev,
"SMU: response:0x%08X for index:%d param:0x%08X message:%s?",