From: Eero Tamminen Date: Thu, 22 Sep 2022 18:44:34 +0000 (+0300) Subject: gpu_sysman: Output device ECC state with other GPU info at start X-Git-Tag: 6.0.0-rc0~51 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ace9bb3c3a149651f194321e3a952683b10e47cf;p=thirdparty%2Fcollectd.git gpu_sysman: Output device ECC state with other GPU info at start Added in L0 spec v1.4. Requires loader 1.8.0 version released in May 2022. (With minor cleanup comments from Alexey applied.) Signed-off-by: Eero Tamminen --- diff --git a/src/gpu_sysman.c b/src/gpu_sysman.c index 69ec4b8af..1910fc68a 100644 --- a/src/gpu_sysman.c +++ b/src/gpu_sysman.c @@ -411,6 +411,22 @@ static bool gpu_info(zes_device_handle_t dev, char **pci_bdf, char **pci_dev) { WARNING(PLUGIN_NAME ": failed to get GPU device state => 0x%x", ret); } + const char *eccstate = "unavailable"; + zes_device_ecc_properties_t ecc = {.pNext = NULL}; + if (zesDeviceGetEccState(dev, &ecc) == ZE_RESULT_SUCCESS) { + switch (ecc.currentState) { + case ZES_DEVICE_ECC_STATE_ENABLED: + eccstate = "enabled"; + break; + case ZES_DEVICE_ECC_STATE_DISABLED: + eccstate = "disabled"; + break; + default: + break; + } + } + INFO("- ECC state: %s", eccstate); + INFO("HW identification:"); zes_device_properties_t props = {.pNext = NULL}; if (ret = zesDeviceGetProperties(dev, &props), ret == ZE_RESULT_SUCCESS) { diff --git a/src/gpu_sysman_test.c b/src/gpu_sysman_test.c index 8d2887fb3..4b9389b62 100644 --- a/src/gpu_sysman_test.c +++ b/src/gpu_sysman_test.c @@ -250,21 +250,26 @@ ze_result_t zeDeviceGetMemoryProperties(ze_device_handle_t dev, uint32_t *count, /* mock up level-zero sysman device handling API, called during gpu_init() */ -#define DEV_GET_ZEROED_STRUCT(callbit, getname, structtype) \ +#define DEV_GET_SET_STRUCT(callbit, getname, structtype, setval) \ ze_result_t getname(zes_device_handle_t dev, structtype *to_zero) { \ ze_result_t ret = dev_args_check(callbit, #getname, dev, to_zero); \ if (ret == ZE_RESULT_SUCCESS) { \ assert(!to_zero->pNext); \ memset(to_zero, 0, sizeof(*to_zero)); \ + setval; \ } \ return ret; \ } -DEV_GET_ZEROED_STRUCT(5, zesDeviceGetProperties, zes_device_properties_t) -DEV_GET_ZEROED_STRUCT(6, zesDevicePciGetProperties, zes_pci_properties_t) -DEV_GET_ZEROED_STRUCT(7, zesDeviceGetState, zes_device_state_t) +DEV_GET_SET_STRUCT(5, zesDeviceGetProperties, zes_device_properties_t, ) +DEV_GET_SET_STRUCT(6, zesDevicePciGetProperties, zes_pci_properties_t, ) +DEV_GET_SET_STRUCT(7, zesDeviceGetState, zes_device_state_t, + to_zero->reset = (ZES_RESET_REASON_FLAG_WEDGED | + ZES_RESET_REASON_FLAG_REPAIR)) +DEV_GET_SET_STRUCT(8, zesDeviceGetEccState, zes_device_ecc_properties_t, + to_zero->currentState = ZES_DEVICE_ECC_STATE_ENABLED) -#define INIT_CALL_FUNCS 8 +#define INIT_CALL_FUNCS 9 #define INIT_CALL_BITS (((uint64_t)1 << INIT_CALL_FUNCS) - 1) /* ------------------------------------------------------------------------- */