struct amdgpu_reset_context reset_context;
u32 memsize;
struct list_head device_list;
- struct amdgpu_hive_info *hive;
- int hive_ras_recovery = 0;
- struct amdgpu_ras *ras;
/* PCI error slot reset should be skipped During RAS recovery */
- hive = amdgpu_get_xgmi_hive(adev);
- if (hive) {
- hive_ras_recovery = atomic_read(&hive->ras_recovery);
- amdgpu_put_xgmi_hive(hive);
- }
- ras = amdgpu_ras_get_context(adev);
if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
- ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
+ amdgpu_ras_in_recovery(adev))
return PCI_ERS_RESULT_RECOVERED;
DRM_INFO("PCI error: slot reset callback!!\n");
{
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
struct amdgpu_ring *kiq_ring = &kiq->ring;
- struct amdgpu_hive_info *hive;
- struct amdgpu_ras *ras;
- int hive_ras_recovery = 0;
int i, r = 0;
int j;
* This is workaround: only skip kiq_ring test
* during ras recovery in suspend stage for gfx9.4.3
*/
- hive = amdgpu_get_xgmi_hive(adev);
- if (hive) {
- hive_ras_recovery = atomic_read(&hive->ras_recovery);
- amdgpu_put_xgmi_hive(hive);
- }
-
- ras = amdgpu_ras_get_context(adev);
if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
- ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery)) {
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
+ amdgpu_ras_in_recovery(adev)) {
spin_unlock(&kiq->ring_lock);
return 0;
}
enum amdgpu_ras_block block)
{
struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
- struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs;
- struct amdgpu_hive_info *hive;
- int hive_ras_recovery = 0;
if (!block_obj || !block_obj->hw_ops) {
dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
!amdgpu_ras_get_aca_debug_mode(adev))
return -EOPNOTSUPP;
- hive = amdgpu_get_xgmi_hive(adev);
- if (hive) {
- hive_ras_recovery = atomic_read(&hive->ras_recovery);
- amdgpu_put_xgmi_hive(hive);
- }
-
/* skip ras error reset in gpu reset */
- if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery) ||
- hive_ras_recovery) &&
+ if ((amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) &&
((smu_funcs && smu_funcs->set_debug_mode) ||
(mca_funcs && mca_funcs->mca_set_debug_mode)))
return -EOPNOTSUPP;
}
}
+bool amdgpu_ras_in_recovery(struct amdgpu_device *adev)
+{
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int hive_ras_recovery = 0;
+
+ if (hive) {
+ hive_ras_recovery = atomic_read(&hive->ras_recovery);
+ amdgpu_put_xgmi_hive(hive);
+ }
+
+ if (ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
+ return true;
+
+ return false;
+}
+
static void amdgpu_ras_do_recovery(struct work_struct *work)
{
struct amdgpu_ras *ras =
struct ras_err_data err_data;
unsigned long err_cnt;
- if (amdgpu_in_reset(adev) || atomic_read(&con->in_recovery))
+ if (amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev))
return;
amdgpu_ras_error_data_init(&err_data);
enum amdgpu_ras_block block, uint16_t pasid,
pasid_notify pasid_fn, void *data, uint32_t reset);
+bool amdgpu_ras_in_recovery(struct amdgpu_device *adev);
+
__printf(3, 4)
void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id,
const char *fmt, ...);
u32 fatal_err, param;
int ret = 0;
struct amdgpu_device *adev = smu->adev;
- struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
fatal_err = 0;
param = SMU_RESET_MODE_1;
} else {
/* fatal error triggered by ras, PMFW supports the flag
from 68.44.0 */
- if ((smu->smc_fw_version >= 0x00442c00) && ras &&
- atomic_read(&ras->in_recovery))
+ if ((smu->smc_fw_version >= 0x00442c00) &&
+ amdgpu_ras_in_recovery(adev))
fatal_err = 1;
param |= (fatal_err << 16);
uint32_t *param)
{
struct amdgpu_device *adev = smu->adev;
- struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
if ((smu->smc_fw_version >= supported_version) &&
- ras && atomic_read(&ras->in_recovery))
+ amdgpu_ras_in_recovery(adev))
/* Set RAS fatal error reset flag */
*param = 1 << 16;
else
static int smu_v13_0_6_mode1_reset(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
- struct amdgpu_hive_info *hive = NULL;
- u32 hive_ras_recovery = 0;
- struct amdgpu_ras *ras;
u32 fatal_err, param;
int ret = 0;
- hive = amdgpu_get_xgmi_hive(adev);
- ras = amdgpu_ras_get_context(adev);
fatal_err = 0;
param = SMU_RESET_MODE_1;
- if (hive) {
- hive_ras_recovery = atomic_read(&hive->ras_recovery);
- amdgpu_put_xgmi_hive(hive);
- }
-
/* fatal error triggered by ras, PMFW supports the flag */
- if (ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
+ if (amdgpu_ras_in_recovery(adev))
fatal_err = 1;
param |= (fatal_err << 16);