PMFW may return invalid values due to internal calculation errors.
so, the kmd driver must validate and sanitize the returned values to
prevent issues caused by firmware calculation errors.
For example, values 0xfffe (-2) and 0xffff (-1) are treated
as invalid and clamped to 0.
this applies to devices with CAB (Cache As Buffer) functionality.
Closes: https://gitlab.freedesktop.org/drm/amd/-/work_items/4905
Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
smu_feature_list_clear_all(smu, SMU_FEATURE_LIST_ALLOWED);
}
+/*
+ * smu_safe_u16_nn - Make u16 safe by filtering negative overflow errors
+ * @val: Input u16 value, may contain invalid negative overflows
+ *
+ * Convert u16 to non-negative value. Cast to s16 to detect negative values
+ * caused by calculation errors. Return 0 for negative errors, return
+ * original value if valid.
+ *
+ * Return: Valid u16 value or 0
+ */
+static inline u16 smu_safe_u16_nn(u16 val)
+{
+ s16 tmp = (s16)val;
+
+ return tmp < 0 ? 0 : val;
+}
+
#endif
*value = metrics->AverageGfxclkFrequencyPreDs;
break;
case METRICS_AVERAGE_FCLK:
- if (metrics->AverageUclkActivity <= SMU_13_0_0_BUSY_THRESHOLD)
+ if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_13_0_0_BUSY_THRESHOLD)
*value = metrics->AverageFclkFrequencyPostDs;
else
*value = metrics->AverageFclkFrequencyPreDs;
break;
case METRICS_AVERAGE_UCLK:
- if (metrics->AverageUclkActivity <= SMU_13_0_0_BUSY_THRESHOLD)
+ if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_13_0_0_BUSY_THRESHOLD)
*value = metrics->AverageMemclkFrequencyPostDs;
else
*value = metrics->AverageMemclkFrequencyPreDs;
*value = metrics->AverageGfxActivity;
break;
case METRICS_AVERAGE_MEMACTIVITY:
- *value = metrics->AverageUclkActivity;
+ *value = smu_safe_u16_nn(metrics->AverageUclkActivity);
break;
case METRICS_AVERAGE_VCNACTIVITY:
*value = max(metrics->Vcn0ActivityPercentage,
metrics->AvgTemperature[TEMP_VR_MEM1]);
gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity;
- gpu_metrics->average_umc_activity = metrics->AverageUclkActivity;
+ gpu_metrics->average_umc_activity = smu_safe_u16_nn(metrics->AverageUclkActivity);
gpu_metrics->average_mm_activity = max(metrics->Vcn0ActivityPercentage,
metrics->Vcn1ActivityPercentage);
else
gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPreDs;
- if (metrics->AverageUclkActivity <= SMU_13_0_0_BUSY_THRESHOLD)
+ if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_13_0_0_BUSY_THRESHOLD)
gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPostDs;
else
gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPreDs;
*value = metrics->AverageGfxclkFrequencyPreDs;
break;
case METRICS_AVERAGE_FCLK:
- if (metrics->AverageUclkActivity <= SMU_13_0_7_BUSY_THRESHOLD)
+ if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_13_0_7_BUSY_THRESHOLD)
*value = metrics->AverageFclkFrequencyPostDs;
else
*value = metrics->AverageFclkFrequencyPreDs;
break;
case METRICS_AVERAGE_UCLK:
- if (metrics->AverageUclkActivity <= SMU_13_0_7_BUSY_THRESHOLD)
+ if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_13_0_7_BUSY_THRESHOLD)
*value = metrics->AverageMemclkFrequencyPostDs;
else
*value = metrics->AverageMemclkFrequencyPreDs;
*value = metrics->AverageGfxActivity;
break;
case METRICS_AVERAGE_MEMACTIVITY:
- *value = metrics->AverageUclkActivity;
+ *value = smu_safe_u16_nn(metrics->AverageUclkActivity);
break;
case METRICS_AVERAGE_SOCKETPOWER:
*value = metrics->AverageSocketPower << 8;
metrics->AvgTemperature[TEMP_VR_MEM1]);
gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity;
- gpu_metrics->average_umc_activity = metrics->AverageUclkActivity;
+ gpu_metrics->average_umc_activity = smu_safe_u16_nn(metrics->AverageUclkActivity);
gpu_metrics->average_mm_activity = max(metrics->Vcn0ActivityPercentage,
metrics->Vcn1ActivityPercentage);
else
gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPreDs;
- if (metrics->AverageUclkActivity <= SMU_13_0_7_BUSY_THRESHOLD)
+ if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_13_0_7_BUSY_THRESHOLD)
gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPostDs;
else
gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPreDs;
*value = metrics->AverageGfxclkFrequencyPreDs;
break;
case METRICS_AVERAGE_FCLK:
- if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD)
+ if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_14_0_2_BUSY_THRESHOLD)
*value = metrics->AverageFclkFrequencyPostDs;
else
*value = metrics->AverageFclkFrequencyPreDs;
break;
case METRICS_AVERAGE_UCLK:
- if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD)
+ if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_14_0_2_BUSY_THRESHOLD)
*value = metrics->AverageMemclkFrequencyPostDs;
else
*value = metrics->AverageMemclkFrequencyPreDs;
*value = metrics->AverageGfxActivity;
break;
case METRICS_AVERAGE_MEMACTIVITY:
- *value = metrics->AverageUclkActivity;
+ *value = smu_safe_u16_nn(metrics->AverageUclkActivity);
break;
case METRICS_AVERAGE_VCNACTIVITY:
*value = max(metrics->AverageVcn0ActivityPercentage,
metrics->AvgTemperature[TEMP_VR_MEM1]);
gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity;
- gpu_metrics->average_umc_activity = metrics->AverageUclkActivity;
+ gpu_metrics->average_umc_activity = smu_safe_u16_nn(metrics->AverageUclkActivity);
gpu_metrics->average_mm_activity = max(metrics->AverageVcn0ActivityPercentage,
metrics->Vcn1ActivityPercentage);
else
gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPreDs;
- if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD)
+ if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= SMU_14_0_2_BUSY_THRESHOLD)
gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPostDs;
else
gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPreDs;