From: Eero Tamminen Date: Mon, 12 Sep 2022 11:02:03 +0000 (+0300) Subject: gpu_sysman: Add ratio variant for temperature metric type X-Git-Tag: 6.0.0-rc0~107 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=edf21dcfa86ebcdaa0693681d2e68b9504408ada;p=thirdparty%2Fcollectd.git gpu_sysman: Add ratio variant for temperature metric type Signed-off-by: Eero Tamminen --- diff --git a/src/gpu_sysman.c b/src/gpu_sysman.c index c0f41ee37..a86107d8e 100644 --- a/src/gpu_sysman.c +++ b/src/gpu_sysman.c @@ -1469,14 +1469,19 @@ static bool gpu_temps(gpu_device_t *gpu) { gpu->temp_count = temp_count; } - metric_family_t fam = { + metric_family_t fam_temp = { .help = "Temperature sensor value (in Celsius) when queried", .name = METRIC_PREFIX "temperature_celsius", .type = METRIC_TYPE_GAUGE, }; + metric_family_t fam_ratio = { + .help = "Temperature sensor value ratio to its max value when queried", + .name = METRIC_PREFIX "temperature_ratio", + .type = METRIC_TYPE_GAUGE, + }; metric_t metric = {0}; - bool ok = false; + bool reported_ratio = false, ok = false; for (i = 0; i < temp_count; i++) { zes_temp_properties_t props; if (zesTemperatureGetProperties(temps[i], &props) != ZE_RESULT_SUCCESS) { @@ -1523,12 +1528,21 @@ static bool gpu_temps(gpu_device_t *gpu) { metric.value.gauge = value; metric_label_set(&metric, "location", type); metric_set_subdev(&metric, props.onSubdevice, props.subdeviceId); - metric_family_metric_append(&fam, metric); + metric_family_metric_append(&fam_temp, metric); + + if (props.maxTemperature > 0 && (config.output & OUTPUT_RATIO)) { + metric.value.gauge = value / props.maxTemperature; + metric_family_metric_append(&fam_ratio, metric); + reported_ratio = true; + } ok = true; } if (ok) { metric_reset(&metric); - gpu_submit(gpu, &fam); + gpu_submit(gpu, &fam_temp); + if (reported_ratio) { + gpu_submit(gpu, &fam_ratio); + } } free(temps); return ok; diff --git a/src/gpu_sysman_test.c b/src/gpu_sysman_test.c index 325c0a5f5..960f73bfc 100644 --- a/src/gpu_sysman_test.c +++ b/src/gpu_sysman_test.c @@ -304,6 +304,7 @@ static ze_result_t metric_args_check(int callbit, const char *name, #define RAS_INIT 0 #define RAS_INC 1 +#define TEMP_LIMIT 95 #define TEMP_INIT 10 #define TEMP_INC 5 @@ -393,7 +394,7 @@ ADD_METRIC(9, zesDeviceEnumPowerDomains, zes_pwr_handle_t, power_counter.energy += COUNTER_INC, power_counter.timestamp += TIME_INC) -static zes_temp_properties_t temp_props; +static zes_temp_properties_t temp_props = {.maxTemperature = TEMP_LIMIT}; static double temperature = TEMP_INIT; static int dummy; @@ -480,6 +481,9 @@ typedef struct { double last; } metrics_validation_t; +#define TEMP_RATIO_INIT ((double)(TEMP_INIT) / (TEMP_LIMIT)) +#define TEMP_RATIO_INC ((double)(TEMP_INC) / (TEMP_LIMIT)) + #define MEM_RATIO_INIT ((double)MEMORY_INIT / MEMORY_SIZE) #define MEM_RATIO_INC ((double)MEMORY_INC / MEMORY_SIZE) @@ -508,6 +512,7 @@ static metrics_validation_t valid_metrics[] = { {"memory_usage_ratio/HBM/system", false, false, MEM_RATIO_INIT, MEM_RATIO_INC, 0, 0.0}, {"temperature_celsius", true, false, TEMP_INIT, TEMP_INC, 0, 0.0}, + {"temperature_ratio", true, false, TEMP_RATIO_INIT, TEMP_RATIO_INC, 0, 0.0}, /* while counters increase, per-time incremented value should stay same */ {"energy_ujoules_total", true, false, COUNTER_START, COUNTER_INC, 0, 0.0},