From: Eero Tamminen Date: Tue, 8 Nov 2022 16:34:31 +0000 (+0200) Subject: gpu_sysman: do metric reset on every loop round X-Git-Tag: 6.0.0-rc0~91 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ccbd648d12c0e0724b7581459e5426d7f9486bcb;p=thirdparty%2Fcollectd.git gpu_sysman: do metric reset on every loop round Not doing metric reset between loop rounds could result in extra incorrect metric label being reported for a metric, when earlier metric in the loop had a conditional label, but latter metric does not satisfy that condition (Sysman call for the info failed, but fail is ignored, or Sysman struct value used for given label is not set). This can happen e.g. with the conditional memory "health", frequency "throttled_by" and power "limit" labels. Other alternative would be either setting or removing (= using NULL) values for each of the possible labels on every round. Just reseting metric labels on every round seemed more robust (easier to review), and allowed simplifying the code slightly. Looking at collectd metric implementation, it causes more allocs / deallocs for the label array & label names though. Signed-off-by: Eero Tamminen --- diff --git a/src/gpu_sysman.c b/src/gpu_sysman.c index 7c37cbb0c..9da5f78fc 100644 --- a/src/gpu_sysman.c +++ b/src/gpu_sysman.c @@ -1122,9 +1122,9 @@ static bool gpu_mems(gpu_device_t *gpu, unsigned int cache_idx) { } reported = true; } + metric_reset(&metric); } if (reported) { - metric_reset(&metric); gpu_submit(gpu, &fam_bytes); if (reported_ratio) { gpu_submit(gpu, &fam_ratio); @@ -1238,20 +1238,18 @@ static bool gpu_mems_bw(gpu_device_t *gpu) { reported_ratio = true; } } + metric_reset(&metric); *old = bw; ok = true; } - if (ok) { - metric_reset(&metric); - if (reported_ratio) { - gpu_submit(gpu, &fam_ratio); - } - if (reported_rate) { - gpu_submit(gpu, &fam_rate); - } - if (reported_counter) { - gpu_submit(gpu, &fam_counter); - } + if (reported_ratio) { + gpu_submit(gpu, &fam_ratio); + } + if (reported_rate) { + gpu_submit(gpu, &fam_rate); + } + if (reported_counter) { + gpu_submit(gpu, &fam_counter); } free(mems); return ok; @@ -1484,6 +1482,7 @@ static bool gpu_freqs(gpu_device_t *gpu, unsigned int cache_idx) { reported = true; } } + metric_reset(&metric); if (!reported) { ERROR(PLUGIN_NAME ": neither requests nor actual frequencies supported " "for domain %d", @@ -1493,7 +1492,6 @@ static bool gpu_freqs(gpu_device_t *gpu, unsigned int cache_idx) { } } if (reported) { - metric_reset(&metric); gpu_submit(gpu, &fam_freq); if (reported_ratio) { gpu_submit(gpu, &fam_ratio); @@ -1588,17 +1586,15 @@ static bool gpu_freqs_throttle(gpu_device_t *gpu) { metric_family_metric_append(&fam_ratio, metric); reported_ratio = true; } + metric_reset(&metric); *old = throttle; ok = true; } - if (ok) { - metric_reset(&metric); - if (reported_ratio) { - gpu_submit(gpu, &fam_ratio); - } - if (reported_counter) { - gpu_submit(gpu, &fam_counter); - } + if (reported_ratio) { + gpu_submit(gpu, &fam_ratio); + } + if (reported_counter) { + gpu_submit(gpu, &fam_counter); } free(freqs); return ok; @@ -1698,10 +1694,10 @@ static bool gpu_temps(gpu_device_t *gpu) { metric_family_metric_append(&fam_ratio, metric); reported_ratio = true; } + metric_reset(&metric); ok = true; } if (ok) { - metric_reset(&metric); gpu_submit(gpu, &fam_temp); if (reported_ratio) { gpu_submit(gpu, &fam_ratio); @@ -1833,20 +1829,18 @@ static bool gpu_powers(gpu_device_t *gpu) { } } } + metric_reset(&metric); *old = counter; ok = true; } - if (ok) { - metric_reset(&metric); - if (reported_energy) { - gpu_submit(gpu, &fam_energy); - } - if (reported_power) { - gpu_submit(gpu, &fam_power); - } - if (reported_ratio) { - gpu_submit(gpu, &fam_ratio); - } + if (reported_energy) { + gpu_submit(gpu, &fam_energy); + } + if (reported_power) { + gpu_submit(gpu, &fam_power); + } + if (reported_ratio) { + gpu_submit(gpu, &fam_ratio); } free(powers); return ok; @@ -2007,17 +2001,15 @@ static bool gpu_engines(gpu_device_t *gpu) { metric_family_metric_append(&fam_ratio, metric); reported_ratio = true; } + metric_reset(&metric); *old = stats; ok = true; } - if (ok) { - metric_reset(&metric); - if (reported_ratio) { - gpu_submit(gpu, &fam_ratio); - } - if (reported_counter) { - gpu_submit(gpu, &fam_counter); - } + if (reported_ratio) { + gpu_submit(gpu, &fam_ratio); + } + if (reported_counter) { + gpu_submit(gpu, &fam_counter); } free(engines); return ok;