From: Michael Tremer Date: Tue, 21 Oct 2025 19:25:52 +0000 (+0000) Subject: sources: disk: Read SMART data if available X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e2d788399b83ee1acf3f42e864bd76942f8a678f;p=telemetry.git sources: disk: Read SMART data if available Signed-off-by: Michael Tremer --- diff --git a/Makefile.am b/Makefile.am index 4d84ac8..93c1622 100644 --- a/Makefile.am +++ b/Makefile.am @@ -193,17 +193,20 @@ telemetryd_CPPFLAGS = \ telemetryd_CFLAGS = \ $(AM_CFLAGS) \ + $(LIBATASMART_CFLAGS) \ $(RRD_CFLAGS) \ $(SYSTEMD_CFLAGS) \ $(UDEV_CFLAGS) telemetryd_LDFLAGS = \ $(AM_LDFLAGS) \ + $(LIBATASMART_LDFLAGS) \ $(RRD_LDFLAGS) \ $(SYSTEMD_LDFLAGS) \ $(UDEV_LDFLAGS) telemetryd_LDADD = \ + $(LIBATASMART_LIBS) \ $(RRD_LIBS) \ $(SYSTEMD_LIBS) \ $(UDEV_LIBS) diff --git a/src/daemon/sources/disk.c b/src/daemon/sources/disk.c index 75e901c..da0ab21 100644 --- a/src/daemon/sources/disk.c +++ b/src/daemon/sources/disk.c @@ -23,6 +23,10 @@ #include +#ifdef HAVE_LIBATASMART +# include +#endif /* HAVE_LIBATASMART */ + #include "../ctx.h" #include "../file.h" #include "../parse.h" @@ -63,8 +67,137 @@ typedef struct td_disk { uint64_t discard_merges; uint64_t discard_sectors; uint64_t discard_ticks; + + // Is the disk awake? + SkBool awake; + + // Power On Time (in msec) + uint64_t power_on_time; + + // SMART Status + SkBool smart_status; + + // Power Cycles + uint64_t power_cycles; + + // Bad Sectors + uint64_t bad_sectors; + + // Temperature (in mK) + uint64_t temperature; } td_disk; +#ifdef HAVE_LIBATASMART +static int disk_read_smart(td_ctx* ctx, const char* node, td_disk* stat) { + SkBool available = 0; + SkDisk* disk = NULL; + int r; + + // Open the disk + r = sk_disk_open(node, &disk); + if (r < 0) { + ERROR(ctx, "Failed to open disk %s for SMART data: %m\n", node); + r = -errno; + goto ERROR; + } + + // Check if SMART is available + r = sk_disk_smart_is_available(disk, &available); + if (r < 0) { + switch (errno) { + // Receiving ENOTSUP here means that SMART is not available, too + case ENOTSUP: + r = 0; + break; + + // Handle any other errors + default: + ERROR(ctx, "Failed to check if %s supports SMART: %m\n", node); + r = -errno; + goto ERROR; + } + } + + // Skip if this disk does not support SMART + if (!available) + goto ERROR; + + // Check if the disk is asleep + r = sk_disk_check_sleep_mode(disk, &stat->awake); + if (r < 0) { + ERROR(ctx, "Failed to check if %s is asleep: %m\n", node); + r = -errno; + goto ERROR; + } + + // Skip if the disk is asleep (because fetching SMART data might wake up the device) + if (!stat->awake) + goto ERROR; + + // Read the SMART data + r = sk_disk_smart_read_data(disk); + if (r < 0) { + ERROR(ctx, "Failed to read SMART data from %s: %m\n", node); + r = -errno; + goto ERROR; + } + + // Read SMART status + r = sk_disk_smart_status(disk, &stat->smart_status); + if (r < 0) { + ERROR(ctx, "Failed to read SMART status from %s: %m\n", node); + r = -errno; + goto ERROR; + } + + // Read power on time + r = sk_disk_smart_get_power_on(disk, &stat->power_on_time); + if (r < 0) { + ERROR(ctx, "Failed to read power on time from %s: %m\n", node); + r = -errno; + goto ERROR; + } + + // Read power cycles + r = sk_disk_smart_get_power_cycle(disk, &stat->power_cycles); + if (r < 0) { + ERROR(ctx, "Failed to read power cycles from %s: %m\n", node); + r = -errno; + goto ERROR; + } + + // Read bad sectors + r = sk_disk_smart_get_bad(disk, &stat->bad_sectors); + if (r < 0) { + switch (errno) { + // ENOENT means that there are no bad sectors + case ENOENT: + break; + + // Handle any other errors + default: + ERROR(ctx, "Failed to read bad sectors from %s: %m\n", node); + r = -errno; + goto ERROR; + } + } + + // Read temperature + r = sk_disk_smart_get_temperature(disk, &stat->temperature); + if (r < 0) { + ERROR(ctx, "Failed to read temperature from %s: %m\n", node); + r = -errno; + goto ERROR; + } + +ERROR: + if (disk) + sk_disk_free(disk); + + return r; +} +#endif /* HAVE_LIBATASMART */ + static int __disk_read_stat(td_ctx* ctx, td_file* file, unsigned long lineno, char* line, size_t length, void* data) { unsigned int token = 0; @@ -232,13 +365,22 @@ static int disk_heartbeat_device(td_ctx* ctx, td_source* source, struct udev_dev if (r < 0) return r; +#ifdef HAVE_LIBATASMART + // Fetch SMART information + r = disk_read_smart(ctx, node, &disk); + if (r < 0) + return r; +#endif /* HAVE_LIBATASMART */ + // Submit stats return td_source_submit(source, serial, - "%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu", + "%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%d:%lu:%d:%lu:%lu:%lf", disk.read_ios, disk.read_merges, disk.read_sectors, disk.read_ticks, disk.write_ios, disk.write_merges, disk.write_sectors, disk.write_ticks, disk.in_flight, disk.io_ticks, disk.time_in_queue, - disk.discard_ios, disk.discard_merges, disk.discard_sectors, disk.discard_ticks + disk.discard_ios, disk.discard_merges, disk.discard_sectors, disk.discard_ticks, + (disk.awake) ? 1 : 0, disk.power_on_time, (disk.smart_status) ? 1 : 0, + disk.power_cycles, disk.bad_sectors, (double)disk.temperature / 1000 ); } @@ -315,21 +457,31 @@ const td_source_impl disk_source = { // RRD Data Sources .rrd_dss = { - { "read_ios", "DERIVE", 0, -1 }, - { "read_merges", "DERIVE", 0, -1 }, - { "read_sectors", "DERIVE", 0, -1 }, - { "read_ticks", "DERIVE", 0, -1 }, - { "write_ios", "DERIVE", 0, -1 }, - { "write_merges", "DERIVE", 0, -1 }, - { "write_sectors", "DERIVE", 0, -1 }, - { "write_ticks", "DERIVE", 0, -1 }, - { "in_flight", "DERIVE", 0, -1 }, - { "io_ticks", "DERIVE", 0, -1 }, - { "time_in_queue", "DERIVE", 0, -1 }, - { "discard_ios", "DERIVE", 0, -1 }, - { "discard_merges", "DERIVE", 0, -1 }, - { "discard_sectors", "DERIVE", 0, -1 }, - { "discard_ticks", "DERIVE", 0, -1 }, + // stats + { "read_ios", "DERIVE", 0, -1 }, + { "read_merges", "DERIVE", 0, -1 }, + { "read_sectors", "DERIVE", 0, -1 }, + { "read_ticks", "DERIVE", 0, -1 }, + { "write_ios", "DERIVE", 0, -1 }, + { "write_merges", "DERIVE", 0, -1 }, + { "write_sectors", "DERIVE", 0, -1 }, + { "write_ticks", "DERIVE", 0, -1 }, + { "in_flight", "DERIVE", 0, -1 }, + { "io_ticks", "DERIVE", 0, -1 }, + { "time_in_queue", "DERIVE", 0, -1 }, + { "discard_ios", "DERIVE", 0, -1 }, + { "discard_merges", "DERIVE", 0, -1 }, + { "discard_sectors", "DERIVE", 0, -1 }, + { "discard_ticks", "DERIVE", 0, -1 }, + + // SMART + { "awake", "GAUGE", 0, 1 }, + { "power_on_time", "COUNTER", 0, -1 }, + { "smart_status", "GAUGE", 0, 1 }, + { "power_cycle", "COUNTER", 0, -1 }, + { "bad_sectors", "COUNTER", 0, -1 }, + { "temperature", "GAUGE", 0, -1 }, + { NULL }, },