/* set frequency metric labels based on its properties, return true for success
*/
-static bool set_freq_labels(zes_freq_handle_t freq, metric_t *metric) {
+static bool set_freq_labels(zes_freq_handle_t freq, metric_t *metric,
+ double *maxfreq) {
zes_freq_properties_t props;
if (zesFrequencyGetProperties(freq, &props) != ZE_RESULT_SUCCESS) {
return false;
}
+ *maxfreq = props.max;
const char *type;
switch (props.type) {
case ZES_FREQ_DOMAIN_GPU:
assert(gpu->frequency);
}
- metric_family_t fam = {
+ metric_family_t fam_freq = {
.help = "Sampled HW frequency (in MHz)",
.name = METRIC_PREFIX "frequency_mhz",
.type = METRIC_TYPE_GAUGE,
};
+ metric_family_t fam_ratio = {
+ .help = "Sampled HW frequency ratio vs (non-overclocked) max frequency",
+ .name = METRIC_PREFIX "frequency_ratio",
+ .type = METRIC_TYPE_GAUGE,
+ };
metric_t metric = {0};
- bool reported = false, ok = false;
+ bool reported_ratio = false, reported = false, ok = false;
for (i = 0; i < freq_count; i++) {
/* fetch freq samples */
if (zesFrequencyGetState(freqs[i], &(gpu->frequency[cache_idx][i])) !=
continue;
}
/* process samples */
- if (!set_freq_labels(freqs[i], &metric)) {
+ double maxfreq;
+ if (!set_freq_labels(freqs[i], &metric, &maxfreq)) {
ERROR(PLUGIN_NAME ": failed to get frequency domain %d properties", i);
ok = false;
break;
if (value >= 0) {
metric.value.gauge = value;
metric_label_set(&metric, "type", "request");
- metric_family_metric_append(&fam, metric);
+ metric_family_metric_append(&fam_freq, metric);
+ if ((config.output & OUTPUT_RATIO) && maxfreq > 0) {
+ metric.value.gauge = value / maxfreq;
+ metric_family_metric_append(&fam_ratio, metric);
+ reported_ratio = true;
+ }
freq_ok = true;
}
value = gpu->frequency[0][i].actual;
if (value >= 0) {
metric.value.gauge = value;
metric_label_set(&metric, "type", "actual");
- metric_family_metric_append(&fam, metric);
+ metric_family_metric_append(&fam_freq, metric);
+ if ((config.output & OUTPUT_RATIO) && maxfreq > 0) {
+ metric.value.gauge = value / maxfreq;
+ metric_family_metric_append(&fam_ratio, metric);
+ reported_ratio = true;
+ }
freq_ok = true;
}
} else {
metric.value.gauge = req_min;
metric_label_set(&metric, "type", "request");
metric_label_set(&metric, "function", "min");
- metric_family_metric_append(&fam, metric);
-
+ metric_family_metric_append(&fam_freq, metric);
+ if ((config.output & OUTPUT_RATIO) && maxfreq > 0) {
+ metric.value.gauge = req_min / maxfreq;
+ metric_family_metric_append(&fam_ratio, metric);
+ reported_ratio = true;
+ }
metric.value.gauge = req_max;
metric_label_set(&metric, "function", "max");
- metric_family_metric_append(&fam, metric);
+ metric_family_metric_append(&fam_freq, metric);
+ if ((config.output & OUTPUT_RATIO) && maxfreq > 0) {
+ metric.value.gauge = req_max / maxfreq;
+ metric_family_metric_append(&fam_ratio, metric);
+ reported_ratio = true;
+ }
freq_ok = true;
}
if (act_max >= 0.0) {
metric.value.gauge = act_min;
metric_label_set(&metric, "type", "actual");
metric_label_set(&metric, "function", "min");
- metric_family_metric_append(&fam, metric);
-
+ metric_family_metric_append(&fam_freq, metric);
+ if ((config.output & OUTPUT_RATIO) && maxfreq > 0) {
+ metric.value.gauge = act_min / maxfreq;
+ metric_family_metric_append(&fam_ratio, metric);
+ reported_ratio = true;
+ }
metric.value.gauge = act_max;
metric_label_set(&metric, "function", "max");
- metric_family_metric_append(&fam, metric);
+ metric_family_metric_append(&fam_freq, metric);
+ if ((config.output & OUTPUT_RATIO) && maxfreq > 0) {
+ metric.value.gauge = act_max / maxfreq;
+ metric_family_metric_append(&fam_ratio, metric);
+ reported_ratio = true;
+ }
freq_ok = true;
}
}
}
if (reported) {
metric_reset(&metric);
- gpu_submit(gpu, &fam);
+ gpu_submit(gpu, &fam_freq);
+ if (reported_ratio) {
+ gpu_submit(gpu, &fam_ratio);
+ }
}
free(freqs);
return ok;
ok = false;
break;
}
- if (!set_freq_labels(freqs[i], &metric)) {
+ double dummy;
+ if (!set_freq_labels(freqs[i], &metric, &dummy)) {
ERROR(PLUGIN_NAME ": failed to get frequency domain %d properties", i);
ok = false;
break;
#define COUNTER_MAX_RATIO \
(1.0e6 * COUNTER_INC / ((double)COUNTER_MAX * TIME_INC))
+#define FREQ_LIMIT 1600
#define FREQ_INIT 300
#define FREQ_INC 50
engine_stats.activeTime += COUNTER_INC,
engine_stats.timestamp += TIME_INC)
-static zes_freq_properties_t freq_props;
+static zes_freq_properties_t freq_props = {.max = FREQ_LIMIT};
static zes_freq_state_t freq_state = {.request = FREQ_INIT,
.actual = FREQ_INIT};
double last;
} metrics_validation_t;
+#define FREQ_RATIO_INIT ((double)(FREQ_INIT) / (FREQ_LIMIT))
+#define FREQ_RATIO_INC ((double)(FREQ_INC) / (FREQ_LIMIT))
+
#define TEMP_RATIO_INIT ((double)(TEMP_INIT) / (TEMP_LIMIT))
#define TEMP_RATIO_INC ((double)(TEMP_INC) / (TEMP_LIMIT))
0.0},
{"frequency_mhz/request/gpu", false, false, FREQ_INIT, 2 * FREQ_INC, 0,
0.0},
+ {"frequency_ratio/actual/gpu/min", true, true, FREQ_RATIO_INIT,
+ FREQ_RATIO_INC, 0, 0.0},
+ {"frequency_ratio/actual/gpu/max", true, true, FREQ_RATIO_INIT,
+ FREQ_RATIO_INC, 0, 0.0},
+ {"frequency_ratio/actual/gpu", false, false, FREQ_RATIO_INIT,
+ FREQ_RATIO_INC, 0, 0.0},
+ {"frequency_ratio/request/gpu/min", true, true, FREQ_RATIO_INIT,
+ 2 * FREQ_RATIO_INC, 0, 0.0},
+ {"frequency_ratio/request/gpu/max", true, true, FREQ_RATIO_INIT,
+ 2 * FREQ_RATIO_INC, 0, 0.0},
+ {"frequency_ratio/request/gpu", false, false, FREQ_RATIO_INIT,
+ 2 * FREQ_RATIO_INC, 0, 0.0},
{"memory_used_bytes/HBM/system/min", true, true, MEMORY_INIT, MEMORY_INC, 0,
0.0},
{"memory_used_bytes/HBM/system/max", true, true, MEMORY_INIT, MEMORY_INC, 0,