#include <stdint.h>
#include <stdio.h>
-#define MAX_DEVNAME_LEN 256
#define PLUGIN_NAME "gpu_nvidia"
static nvmlReturn_t nv_status = NVML_SUCCESS;
#define KEY_GPUINDEX "GPUIndex"
#define KEY_IGNORESELECTED "IgnoreSelected"
+#define KEY_INSTANCE_BY_GPUINDEX "InstanceByGPUIndex"
+#define KEY_INSTANCE_BY_GPUNAME "InstanceByGPUName"
static const char *config_keys[] = {
KEY_GPUINDEX,
KEY_IGNORESELECTED,
+ KEY_INSTANCE_BY_GPUINDEX,
+ KEY_INSTANCE_BY_GPUNAME
};
static const unsigned int n_config_keys = STATIC_ARRAY_SIZE(config_keys);
static uint64_t conf_match_mask = 0;
static bool conf_mask_is_exclude = 0;
+// use GPU index and device name by default
+static uint8_t instance_by = 0x02;
+
static int nvml_config(const char *key, const char *value) {
if (strcasecmp(key, KEY_GPUINDEX) == 0) {
conf_match_mask |= (1 << device_ix);
} else if (strcasecmp(key, KEY_IGNORESELECTED)) {
conf_mask_is_exclude = IS_TRUE(value);
+ } else if (strcasecmp(key, KEY_INSTANCE_BY_GPUINDEX)) {
+ if (IS_FALSE(value)) {
+ instance_by &= ~(1 << 0);
+ }
+ } else if (strcasecmp(key, KEY_INSTANCE_BY_GPUNAME)) {
+ if (IS_FALSE(value)) {
+ instance_by &= ~(1 << 1);
+ }
} else {
ERROR(PLUGIN_NAME ": Unrecognized config option %s", key);
return -10;
return -1;
}
-static void nvml_submit_gauge(const char *plugin_instance, const char *type,
- const char *type_instance, gauge_t nvml) {
+static void nvml_submit_gauge(int device_idx, const char *device_name,
+ const char *type, const char *type_instance,
+ gauge_t nvml) {
value_list_t vl = VALUE_LIST_INIT;
vl.values_len = 1;
sstrncpy(vl.plugin, PLUGIN_NAME, sizeof(vl.plugin));
- sstrncpy(vl.plugin_instance, plugin_instance, sizeof(vl.plugin_instance));
+
+ if (instance_by & 0x02) {
+ snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%i-%s",
+ device_idx, device_name);
+ } else if (instance_by & (1 << 0)) {
+ snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%i", device_idx);
+ } else if (instance_by & (1 << 1)) {
+ sstrncpy(vl.plugin_instance, device_name, sizeof(vl.plugin_instance));
+ }
sstrncpy(vl.type, type, sizeof(vl.type));
nvmlDevice_t dev;
TRY(nvmlDeviceGetHandleByIndex(ix, &dev));
- char dev_name[MAX_DEVNAME_LEN + 1] = {0};
- TRY(nvmlDeviceGetName(dev, dev_name, sizeof(dev_name) - 1));
-
+ char dev_name[NVML_DEVICE_NAME_BUFFER_SIZE] = {0};
+ if( instance_by & (1 << 1) ) {
+ TRY(nvmlDeviceGetName(dev, dev_name, sizeof(dev_name) - 1));
+ }
+
// Try to be as lenient as possible with the variety of devices that are
// out there, ignoring any NOT_SUPPORTED errors gently.
nvmlMemory_t meminfo;
TRYOPT(nvmlDeviceGetMemoryInfo(dev, &meminfo))
if (nv_status == NVML_SUCCESS) {
- nvml_submit_gauge(dev_name, "memory", "used", meminfo.used);
- nvml_submit_gauge(dev_name, "memory", "free", meminfo.free);
+ nvml_submit_gauge(ix, dev_name, "memory", "used", meminfo.used);
+ nvml_submit_gauge(ix, dev_name, "memory", "free", meminfo.free);
}
nvmlUtilization_t utilization;
TRYOPT(nvmlDeviceGetUtilizationRates(dev, &utilization))
if (nv_status == NVML_SUCCESS)
- nvml_submit_gauge(dev_name, "percent", "gpu_used", utilization.gpu);
+ nvml_submit_gauge(ix, dev_name, "percent", "gpu_used", utilization.gpu);
unsigned int fan_speed;
TRYOPT(nvmlDeviceGetFanSpeed(dev, &fan_speed))
if (nv_status == NVML_SUCCESS)
- nvml_submit_gauge(dev_name, "fanspeed", NULL, fan_speed);
+ nvml_submit_gauge(ix, dev_name, "fanspeed", NULL, fan_speed);
unsigned int core_temp;
TRYOPT(nvmlDeviceGetTemperature(dev, NVML_TEMPERATURE_GPU, &core_temp))
if (nv_status == NVML_SUCCESS)
- nvml_submit_gauge(dev_name, "temperature", "core", core_temp);
+ nvml_submit_gauge(ix, dev_name, "temperature", "core", core_temp);
unsigned int sm_clk_mhz;
TRYOPT(nvmlDeviceGetClockInfo(dev, NVML_CLOCK_SM, &sm_clk_mhz))
if (nv_status == NVML_SUCCESS)
- nvml_submit_gauge(dev_name, "frequency", "multiprocessor",
+ nvml_submit_gauge(ix, dev_name, "frequency", "multiprocessor",
1e6 * sm_clk_mhz);
unsigned int mem_clk_mhz;
TRYOPT(nvmlDeviceGetClockInfo(dev, NVML_CLOCK_MEM, &mem_clk_mhz))
if (nv_status == NVML_SUCCESS)
- nvml_submit_gauge(dev_name, "frequency", "memory", 1e6 * mem_clk_mhz);
+ nvml_submit_gauge(ix, dev_name, "frequency", "memory", 1e6 * mem_clk_mhz);
unsigned int power_mW;
TRYOPT(nvmlDeviceGetPowerUsage(dev, &power_mW))
if (nv_status == NVML_SUCCESS)
- nvml_submit_gauge(dev_name, "power", NULL, 1e-3 * power_mW);
+ nvml_submit_gauge(ix, dev_name, "power", NULL, 1e-3 * power_mW);
continue;