From: Eero Tamminen Date: Wed, 23 Feb 2022 14:42:37 +0000 (+0200) Subject: gpu_sysman: Fine-tune RAS error counter descriptions X-Git-Tag: 6.0.0-rc0~117 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=cfb9c913b355b8d6fde1c9a008d795caa4a95d1c;p=thirdparty%2Fcollectd.git gpu_sysman: Fine-tune RAS error counter descriptions * "number" -> "count" (as they are counters) * "occurred" -> "that have occurred" (consistency with Sysman spec) --- diff --git a/src/gpu_sysman.c b/src/gpu_sysman.c index e98ff7a87..ea9653e12 100644 --- a/src/gpu_sysman.c +++ b/src/gpu_sysman.c @@ -806,43 +806,46 @@ static bool gpu_ras(gpu_device_t *gpu) { // categories which are not correctable, see: // https://spec.oneapi.io/level-zero/latest/sysman/PROG.html#querying-ras-errors case ZES_RAS_ERROR_CAT_RESET: - help = "Total number of GPU reset attempts by the driver"; + help = "Total count of HW accelerator resets attempted by the driver"; catname = METRIC_PREFIX "resets_total"; correctable = false; break; case ZES_RAS_ERROR_CAT_PROGRAMMING_ERRORS: - help = "Total number of non-correctable HW exceptions generated by the " - "way workloads have programmed the HW"; + help = + "Total count of (non-correctable) HW exceptions generated by the " + "way workloads program the HW"; catname = METRIC_PREFIX "programming_errors_total"; correctable = false; break; case ZES_RAS_ERROR_CAT_DRIVER_ERRORS: - help = "total number of non-correctable low level driver communication " - "errors"; + help = + "total count of (non-correctable) low-level driver communication " + "errors"; catname = METRIC_PREFIX "driver_errors_total"; correctable = false; break; // categories which can have both correctable and uncorrectable errors case ZES_RAS_ERROR_CAT_COMPUTE_ERRORS: - help = "Total number of errors occurrend in the accelerator HW"; + help = "Total count of errors that have occurred in the (shader) " + "accelerator HW"; catname = METRIC_PREFIX "compute_errors_total"; break; case ZES_RAS_ERROR_CAT_NON_COMPUTE_ERRORS: - help = "Total number of errors occurred in the fixed-function " + help = "Total count of errors that have occurred in the fixed-function " "accelerator HW"; catname = METRIC_PREFIX "fixed_function_errors_total"; break; case ZES_RAS_ERROR_CAT_CACHE_ERRORS: - help = "Total number of ECC errors that have occurred in the on-chip " + help = "Total count of ECC errors that have occurred in the on-chip " "caches"; catname = METRIC_PREFIX "cache_errors_total"; break; case ZES_RAS_ERROR_CAT_DISPLAY_ERRORS: - help = "Total number of ECC errors that have occurred in the display"; + help = "Total count of ECC errors that have occurred in the display"; catname = METRIC_PREFIX "display_errors_total"; break; default: - help = "Total number of errors in unsupported categories"; + help = "Total count of errors in unsupported categories"; catname = METRIC_PREFIX "unknown_errors_total"; } if (correctable) { @@ -852,7 +855,7 @@ static bool gpu_ras(gpu_device_t *gpu) { } } catname = METRIC_PREFIX "all_errors_total"; - help = "Total number of errors in all categories"; + help = "Total count of errors in all categories"; ras_submit(gpu, catname, help, type, subdev, total); ok = true; }