From: Eero Tamminen Date: Thu, 17 Feb 2022 18:02:50 +0000 (+0200) Subject: gpu_sysman: Add fabric port metric type and related metrics support X-Git-Tag: 6.0.0-rc0~77 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8afca6b2ee7dd0de5396c98092ecafd40bce5168;p=thirdparty%2Fcollectd.git gpu_sysman: Add fabric port metric type and related metrics support Already in L0 spec v1.0. Fabric ports have a lot of properties which means lot of labels and extra functions. This required increasing some things in the test code too. --- diff --git a/src/collectd.conf.in b/src/collectd.conf.in index badccd899..13e4a798d 100644 --- a/src/collectd.conf.in +++ b/src/collectd.conf.in @@ -799,6 +799,7 @@ # MetricsOutput "counter:rate:ratio" # DisableMemory false # DisableMemoryBandwidth false +# DisableFabric false # DisableFrequency false # DisableThrottleTime false # DisableTemperature false diff --git a/src/collectd.conf.pod b/src/collectd.conf.pod index 9a0a81dee..f7d0183ae 100644 --- a/src/collectd.conf.pod +++ b/src/collectd.conf.pod @@ -3765,6 +3765,10 @@ Disable memory usage metrics collection. Disable memory bandwidth metrics collection. +=item B + +Disable fabric port metrics collection. + =item B Disable actual / requested frequency metrics collection. diff --git a/src/gpu_sysman.c b/src/gpu_sysman.c index dc6e101e6..0e79e2b7d 100644 --- a/src/gpu_sysman.c +++ b/src/gpu_sysman.c @@ -78,6 +78,7 @@ typedef struct { bool all; /* no metrics from whole GPU */ bool engine; bool engine_single; + bool fabric; bool freq; bool mem; bool membw; @@ -100,14 +101,16 @@ typedef struct { uint32_t temp_count; /* number of types for each counter metric */ uint32_t engine_count; + uint32_t fabric_count; uint32_t membw_count; uint32_t power_count; uint32_t throttle_count; /* number of types for each sampled metric */ uint32_t frequency_count; uint32_t memory_count; - /* previous values for counters */ + /* previous values for counters, must have matching _count */ zes_engine_stats_t *engine; + zes_fabric_port_throughput_t *fabric; zes_mem_bandwidth_t *membw; zes_power_energy_counter_t *power; zes_freq_throttle_time_t *throttle; @@ -147,6 +150,7 @@ static struct { /* Sysman GPU plugin config options (defines to ease catching typos) */ #define KEY_DISABLE_ENGINE "DisableEngine" #define KEY_DISABLE_ENGINE_SINGLE "DisableEngineSingle" +#define KEY_DISABLE_FABRIC "DisableFabric" #define KEY_DISABLE_FREQ "DisableFrequency" #define KEY_DISABLE_MEM "DisableMemory" #define KEY_DISABLE_MEMBW "DisableMemoryBandwidth" @@ -221,6 +225,7 @@ static int gpu_config_free(void) { for (uint32_t i = 0; i < gpu_count; i++) { /* free previous values for counters & zero their counts */ FREE_GPU_ARRAY(i, engine); + FREE_GPU_ARRAY(i, fabric); FREE_GPU_ARRAY(i, membw); FREE_GPU_ARRAY(i, power); FREE_GPU_ARRAY(i, throttle); @@ -275,6 +280,7 @@ static int gpu_config_check(void) { bool value; } options[] = {{KEY_DISABLE_ENGINE, config.disabled.engine}, {KEY_DISABLE_ENGINE_SINGLE, config.disabled.engine_single}, + {KEY_DISABLE_FABRIC, config.disabled.fabric}, {KEY_DISABLE_FREQ, config.disabled.freq}, {KEY_DISABLE_MEM, config.disabled.mem}, {KEY_DISABLE_MEMBW, config.disabled.membw}, @@ -1708,6 +1714,241 @@ static bool gpu_temps(gpu_device_t *gpu) { return ok; } +/* status / health labels */ +static void add_fabric_state_labels(metric_t *metric, + zes_fabric_port_state_t *state) { + const char *status; + switch (state->status) { + case ZES_FABRIC_PORT_STATUS_UNKNOWN: + status = "unknown"; + break; + case ZES_FABRIC_PORT_STATUS_HEALTHY: + status = "healthy"; + break; + case ZES_FABRIC_PORT_STATUS_DEGRADED: + status = "degraded"; + break; + case ZES_FABRIC_PORT_STATUS_FAILED: + status = "failed"; + break; + case ZES_FABRIC_PORT_STATUS_DISABLED: + status = "disabled"; + break; + default: + status = "unsupported"; + } + metric_label_set(metric, "status", status); + + const char *issues = NULL; + switch (state->qualityIssues) { + case 0: + break; + case ZES_FABRIC_PORT_QUAL_ISSUE_FLAG_LINK_ERRORS: + issues = "link"; + break; + case ZES_FABRIC_PORT_QUAL_ISSUE_FLAG_SPEED: + issues = "speed"; + break; + default: + issues = "link+speed"; + } + switch (state->failureReasons) { + case 0: + break; + case ZES_FABRIC_PORT_FAILURE_FLAG_FAILED: + issues = "failure"; + break; + case ZES_FABRIC_PORT_FAILURE_FLAG_TRAINING_TIMEOUT: + issues = "training"; + break; + case ZES_FABRIC_PORT_FAILURE_FLAG_FLAPPING: + issues = "flapping"; + break; + default: + issues = "multiple"; + } + if (issues) { + metric_label_set(metric, "issues", issues); + } +} + +/* Report metrics for relevant fabric ports, return true for success */ +static bool gpu_fabrics(gpu_device_t *gpu) { + uint32_t i, port_count = 0; + zes_device_handle_t dev = gpu->handle; + ze_result_t ret = zesDeviceEnumFabricPorts(dev, &port_count, NULL); + if (ret != ZE_RESULT_SUCCESS) { + ERROR(PLUGIN_NAME ": failed to get fabric port count => 0x%x", ret); + return false; + } + zes_fabric_port_handle_t *ports; + ports = scalloc(port_count, sizeof(*ports)); + if (ret = zesDeviceEnumFabricPorts(dev, &port_count, ports), + ret != ZE_RESULT_SUCCESS) { + ERROR(PLUGIN_NAME ": failed to get %d fabric ports => 0x%x", port_count, + ret); + free(ports); + return false; + } + if (gpu->fabric_count != port_count) { + INFO(PLUGIN_NAME ": Sysman reports %d fabric ports", port_count); + if (gpu->fabric) { + free(gpu->fabric); + } + gpu->fabric = scalloc(port_count, sizeof(*gpu->fabric)); + gpu->fabric_count = port_count; + } + + metric_family_t fam_ratio = { + .help = + "Average fabric port bandwidth usage ratio (0-1) over query interval", + .name = METRIC_PREFIX "fabric_port_ratio", + .type = METRIC_TYPE_GAUGE, + }; + metric_family_t fam_rate = { + .help = "Fabric port throughput rate (in bytes per second)", + .name = METRIC_PREFIX "fabric_port_bytes_per_second", + .type = METRIC_TYPE_GAUGE, + }; + metric_family_t fam_counter = { + .help = "Fabric port throughput total (in bytes)", + .name = METRIC_PREFIX "fabric_port_bytes_total", + .type = METRIC_TYPE_COUNTER, + }; + metric_t metric = {0}; + + bool reported_rate = false, reported_ratio = false, reported_counter = false; + + bool ok = false; + for (i = 0; i < port_count; i++) { + + /* fetch all information before allocing labels */ + + zes_fabric_port_state_t state = {.pNext = NULL}; + if (ret = zesFabricPortGetState(ports[i], &state), + ret != ZE_RESULT_SUCCESS) { + ERROR(PLUGIN_NAME ": failed to get fabric port %d state => 0x%x", i, ret); + ok = false; + break; + } + zes_fabric_port_properties_t props = {.pNext = NULL}; + if (ret = zesFabricPortGetProperties(ports[i], &props), + ret != ZE_RESULT_SUCCESS) { + ERROR(PLUGIN_NAME ": failed to get fabric port %d properties => 0x%x", i, + ret); + ok = false; + break; + } + zes_fabric_port_config_t conf = {.pNext = NULL}; + if (ret = zesFabricPortGetConfig(ports[i], &conf), + ret != ZE_RESULT_SUCCESS) { + ERROR(PLUGIN_NAME ": failed to get fabric port %d config => 0x%x", i, + ret); + ok = false; + break; + } + zes_fabric_port_throughput_t bw; + if (ret = zesFabricPortGetThroughput(ports[i], &bw), + ret != ZE_RESULT_SUCCESS) { + ERROR(PLUGIN_NAME ": failed to get fabric port %d throughput => 0x%x", i, + ret); + ok = false; + break; + } + zes_fabric_link_type_t link; + if (ret = zesFabricPortGetLinkType(ports[i], &link), + ret != ZE_RESULT_SUCCESS) { + ERROR(PLUGIN_NAME ": failed to get fabric port %d link type => 0x%x", i, + ret); + ok = false; + break; + } + + /* port setting / identity setting labels */ + + link.desc[sizeof(link.desc) - 1] = '\0'; + metric_label_set(&metric, "link", link.desc); + metric_label_set(&metric, "enabled", conf.enabled ? "on" : "off"); + metric_label_set(&metric, "beaconing", conf.beaconing ? "on" : "off"); + + props.model[sizeof(props.model) - 1] = '\0'; + metric_label_set(&metric, "model", props.model); + metric_set_subdev(&metric, props.onSubdevice, props.subdeviceId); + + /* topology labels */ + + char buf[32]; + zes_fabric_port_id_t *pid = &props.portId; + snprintf(buf, sizeof(buf), "%08x.%08x.%02x", pid->fabricId, pid->attachId, + pid->portNumber); + metric_label_set(&metric, "port", buf); + + pid = &state.remotePortId; + snprintf(buf, sizeof(buf), "%08x.%08x.%02x", pid->fabricId, pid->attachId, + pid->portNumber); + metric_label_set(&metric, "remote", buf); + + /* status / health labels */ + + add_fabric_state_labels(&metric, &state); + + /* add counters with direction labels */ + + if (config.output & OUTPUT_COUNTER) { + metric.value.counter = bw.txCounter; + metric_label_set(&metric, "direction", "write"); + metric_family_metric_append(&fam_counter, metric); + + metric.value.counter = bw.rxCounter; + metric_label_set(&metric, "direction", "read"); + metric_family_metric_append(&fam_counter, metric); + reported_counter = true; + } + + /* add rate + ratio gauges with direction labels */ + + zes_fabric_port_throughput_t *old = &gpu->fabric[i]; + if (old->timestamp && bw.timestamp > old->timestamp && + (config.output & (OUTPUT_RATIO | OUTPUT_RATE))) { + /* https://spec.oneapi.io/level-zero/latest/sysman/api.html#zes-fabric-port-throughput-t + */ + uint64_t writes = bw.txCounter - old->txCounter; + uint64_t reads = bw.rxCounter - old->rxCounter; + uint64_t timediff = bw.timestamp - old->timestamp; + + if (config.output & OUTPUT_RATE) { + double factor = 1.0e6 / timediff; + add_bw_gauges(&metric, &fam_rate, factor * reads, factor * writes); + reported_rate = true; + } + if (config.output & OUTPUT_RATIO) { + int64_t maxr = props.maxRxSpeed.bitRate * props.maxRxSpeed.width / 8; + int64_t maxw = props.maxTxSpeed.bitRate * props.maxTxSpeed.width / 8; + if (maxr > 0 && maxw > 0) { + double rfactor = 1.0e6 / (maxr * timediff); + double wfactor = 1.0e6 / (maxw * timediff); + add_bw_gauges(&metric, &fam_ratio, rfactor * reads, wfactor * writes); + reported_ratio = true; + } + } + } + metric_reset(&metric); + *old = bw; + ok = true; + } + if (reported_ratio) { + gpu_submit(gpu, &fam_ratio); + } + if (reported_rate) { + gpu_submit(gpu, &fam_rate); + } + if (reported_counter) { + gpu_submit(gpu, &fam_counter); + } + free(ports); + return ok; +} + /* Report power usage for relevant domains, return true for success */ static bool gpu_powers(gpu_device_t *gpu) { uint32_t i, power_count = 0; @@ -2088,6 +2329,12 @@ static int gpu_read(void) { i); disabled->engine = true; } + if (!disabled->fabric && !gpu_fabrics(gpu)) { + WARNING(PLUGIN_NAME + ": GPU-%d fabric query fail / no fabric ports => disabled", + i); + disabled->fabric = true; + } if (!disabled->membw && !gpu_mems_bw(gpu)) { WARNING(PLUGIN_NAME ": GPU-%d mem BW query fail / no modules => disabled", i); @@ -2115,9 +2362,9 @@ static int gpu_read(void) { i); gpu->disabled.throttle = true; } - if (disabled->engine && disabled->mem && disabled->freq && - disabled->membw && disabled->power && disabled->ras && disabled->temp && - disabled->throttle) { + if (disabled->engine && disabled->fabric && disabled->freq && + disabled->mem && disabled->membw && disabled->power && disabled->ras && + disabled->temp && disabled->throttle) { /* all metrics missing -> disable use of that GPU */ ERROR(PLUGIN_NAME ": No metrics from GPU-%d, disabling its querying", i); disabled->all = true; @@ -2134,6 +2381,8 @@ static int gpu_config_parse(const char *key, const char *value) { config.disabled.engine = IS_TRUE(value); } else if (strcasecmp(key, KEY_DISABLE_ENGINE_SINGLE) == 0) { config.disabled.engine_single = IS_TRUE(value); + } else if (strcasecmp(key, KEY_DISABLE_FABRIC) == 0) { + config.disabled.fabric = IS_TRUE(value); } else if (strcasecmp(key, KEY_DISABLE_FREQ) == 0) { config.disabled.freq = IS_TRUE(value); } else if (strcasecmp(key, KEY_DISABLE_MEM) == 0) { @@ -2205,11 +2454,13 @@ static int gpu_config_parse(const char *key, const char *value) { void module_register(void) { /* NOTE: key strings *must* be static */ static const char *config_keys[] = { - KEY_DISABLE_ENGINE, KEY_DISABLE_ENGINE_SINGLE, KEY_DISABLE_FREQ, - KEY_DISABLE_MEM, KEY_DISABLE_MEMBW, KEY_DISABLE_POWER, - KEY_DISABLE_RAS, KEY_DISABLE_RAS_SEPARATE, KEY_DISABLE_TEMP, - KEY_DISABLE_THROTTLE, KEY_METRICS_OUTPUT, KEY_LOG_GPU_INFO, - KEY_SAMPLES}; + KEY_DISABLE_ENGINE, KEY_DISABLE_ENGINE_SINGLE, + KEY_DISABLE_FABRIC, KEY_DISABLE_FREQ, + KEY_DISABLE_MEM, KEY_DISABLE_MEMBW, + KEY_DISABLE_POWER, KEY_DISABLE_RAS, + KEY_DISABLE_RAS_SEPARATE, KEY_DISABLE_TEMP, + KEY_DISABLE_THROTTLE, KEY_METRICS_OUTPUT, + KEY_LOG_GPU_INFO, KEY_SAMPLES}; const int config_keys_num = STATIC_ARRAY_SIZE(config_keys); plugin_register_config(PLUGIN_NAME, gpu_config_parse, config_keys, diff --git a/src/gpu_sysman_test.c b/src/gpu_sysman_test.c index 2cb697731..8d2887fb3 100644 --- a/src/gpu_sysman_test.c +++ b/src/gpu_sysman_test.c @@ -90,7 +90,7 @@ static struct { unsigned int api_calls, api_limit; /* to verify that all mocked Level-Zero/Sysman functions get called */ - unsigned int callbits; + unsigned long callbits; /* how many errors & warnings have been logged */ unsigned int warnings; @@ -122,7 +122,7 @@ static void set_verbose(unsigned int callmask, unsigned int metricmask) { * return true if given call should be failed (call=limit) */ static bool call_limit(int callbit, const char *name) { - globs.callbits |= 1u << callbit; + globs.callbits |= 1ul << callbit; globs.api_calls++; if (globs.verbose & VERBOSE_CALLS) { @@ -497,7 +497,60 @@ ze_result_t zesPowerGetLimits(zes_pwr_handle_t handle, return metric_args_check(20, "zesPowerGetLimits", handle, check); } -#define QUERY_CALL_FUNCS 21 +static zes_fabric_port_properties_t fabric_props = { + .maxRxSpeed = {.width = 8, .bitRate = COUNTER_MAX}, + .maxTxSpeed = {.width = 8, .bitRate = COUNTER_MAX}}; +static zes_fabric_port_state_t port_state = { + .status = ZES_FABRIC_PORT_STATUS_HEALTHY}; + +/* .quality should be set only on degraded, .reasons on failed .status, this + * increases them without changing status to increase coverage */ +ADD_METRIC(21, zesDeviceEnumFabricPorts, zes_fabric_port_handle_t, + zesFabricPortGetProperties, zes_fabric_port_properties_t, + fabric_props, zesFabricPortGetState, zes_fabric_port_state_t, + port_state, port_state.qualityIssues += 1, + port_state.failureReasons += 1) + +/* fabric ports have more functions than the other metrics */ +ze_result_t zesFabricPortGetLinkType(zes_fabric_port_handle_t handle, + zes_fabric_link_type_t *state) { + ze_result_t ret = + metric_args_check(24, "zesFabricPortGetLinkType", handle, state); + if (ret == ZE_RESULT_SUCCESS) { + static zes_fabric_link_type_t port = {.desc = "DummyLink"}; + *state = port; + } + return ret; +} + +ze_result_t zesFabricPortGetConfig(zes_fabric_port_handle_t handle, + zes_fabric_port_config_t *config) { + ze_result_t ret = + metric_args_check(25, "zesFabricPortGetConfig", handle, config); + if (ret == ZE_RESULT_SUCCESS) { + assert(!config->pNext); + memset(config, 0, sizeof(*config)); + } + return ret; +} + +ze_result_t zesFabricPortGetThroughput(zes_fabric_port_handle_t handle, + zes_fabric_port_throughput_t *state) { + ze_result_t ret = + metric_args_check(26, "zesFabricPortGetThroughput", handle, state); + if (ret == ZE_RESULT_SUCCESS) { + static zes_fabric_port_throughput_t bw = {.rxCounter = 2 * COUNTER_START, + .txCounter = COUNTER_START, + .timestamp = TIME_START}; + *state = bw; + bw.timestamp += TIME_INC; + bw.rxCounter += 2 * COUNTER_INC; + bw.txCounter += COUNTER_INC; + } + return ret; +} + +#define QUERY_CALL_FUNCS 27 #define QUERY_CALL_BITS (((uint64_t)1 << QUERY_CALL_FUNCS) - 1) /* ------------------------------------------------------------------------- */ @@ -578,6 +631,18 @@ static metrics_validation_t valid_metrics[] = { {"engine_ratio/all", true, false, COUNTER_RATIO, 0, 0, 0.0}, {"engine_use_usecs_total/all", true, false, COUNTER_START, COUNTER_INC, 0, 0.0}, + {"fabric_port_bytes_total/healthy/off/read", true, false, 2 * COUNTER_START, + 2 * COUNTER_INC, 0, 0.0}, + {"fabric_port_bytes_total/healthy/off/write", true, false, COUNTER_START, + COUNTER_INC, 0, 0.0}, + {"fabric_port_bytes_per_second/healthy/off/read", true, false, + 2 * COUNTER_RATE, 0, 0, 0.0}, + {"fabric_port_bytes_per_second/healthy/off/write", true, false, + COUNTER_RATE, 0, 0, 0.0}, + {"fabric_port_ratio/healthy/off/read", true, false, 2 * COUNTER_MAX_RATIO, + 0, 0, 0.0}, + {"fabric_port_ratio/healthy/off/write", true, false, COUNTER_MAX_RATIO, 0, + 0, 0.0}, {"memory_bw_bytes_total/HBM/system/read", true, false, 2 * COUNTER_START, 2 * COUNTER_INC, 0, 0.0}, {"memory_bw_bytes_total/HBM/system/write", true, false, COUNTER_START, @@ -709,8 +774,11 @@ static void compose_name(char *buf, size_t bufsize, const char *name, const char *name = label[i].name; const char *value = label[i].value; assert(name && value); - if (strcmp(name, "pci_bdf") == 0 || strcmp(name, "sub_dev") == 0) { - /* do not add device PCI ID / sub device IDs to metric name */ + if (strcmp(name, "pci_bdf") == 0 || strcmp(name, "sub_dev") == 0 || + strcmp(name, "remote") == 0 || strcmp(name, "port") == 0 || + strcmp(name, "link") == 0 || strcmp(name, "model") == 0 || + strcmp(name, "issues") == 0) { + /* do not add numeric IDs, HW labels, or issues to metric name */ continue; } len += snprintf(buf + len, bufsize - len, "/%s", value); @@ -769,7 +837,7 @@ int plugin_dispatch_metric_family(metric_family_t const *fam) { return 0; } -#define MAX_LABELS 8 +#define MAX_LABELS 16 /* mock function uses just one large enough metrics array (for testing) * instead of increasing it one-by-one, like the real collectd metrics @@ -1093,12 +1161,16 @@ static int get_reset_disabled(gpu_disable_t *disabled, bool value, int *mask, struct { const char *name; bool *flag; - } flags[] = { - {"engine", &disabled->engine}, {"frequency", &disabled->freq}, - {"memory", &disabled->mem}, {"membw", &disabled->membw}, - {"power", &disabled->power}, {"power_ratio", &disabled->power_ratio}, - {"errors", &disabled->ras}, {"temperature", &disabled->temp}, - {"throttle", &disabled->throttle}}; + } flags[] = {{"engine", &disabled->engine}, + {"fabric", &disabled->fabric}, + {"frequency", &disabled->freq}, + {"memory", &disabled->mem}, + {"membw", &disabled->membw}, + {"power", &disabled->power}, + {"power_ratio", &disabled->power_ratio}, + {"errors", &disabled->ras}, + {"temperature", &disabled->temp}, + {"throttle", &disabled->throttle}}; *all = 0; int count = 0; for (int i = 0; i < (int)STATIC_ARRAY_SIZE(flags); i++) {