]> git.ipfire.org Git - collecty.git/commitdiff
sources: disk: Read SMART data if available
authorMichael Tremer <michael.tremer@ipfire.org>
Tue, 21 Oct 2025 19:25:52 +0000 (19:25 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Tue, 21 Oct 2025 19:25:52 +0000 (19:25 +0000)
Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
Makefile.am
src/daemon/sources/disk.c

index 4d84ac878e3ccd004df2d0112ee942ce0a4da93f..93c1622f14afb620c55f1877d138f200e9599c07 100644 (file)
@@ -193,17 +193,20 @@ telemetryd_CPPFLAGS = \
 
 telemetryd_CFLAGS = \
        $(AM_CFLAGS) \
+       $(LIBATASMART_CFLAGS) \
        $(RRD_CFLAGS) \
        $(SYSTEMD_CFLAGS) \
        $(UDEV_CFLAGS)
 
 telemetryd_LDFLAGS = \
        $(AM_LDFLAGS) \
+       $(LIBATASMART_LDFLAGS) \
        $(RRD_LDFLAGS) \
        $(SYSTEMD_LDFLAGS) \
        $(UDEV_LDFLAGS)
 
 telemetryd_LDADD = \
+       $(LIBATASMART_LIBS) \
        $(RRD_LIBS) \
        $(SYSTEMD_LIBS) \
        $(UDEV_LIBS)
index 75e901cc7229d213bf517b89fd1a3f2c44099e8b..da0ab21e5c11a1dd6c66eaeafdfcc8bb688c7f30 100644 (file)
 
 #include <libudev.h>
 
+#ifdef HAVE_LIBATASMART
+# include <atasmart.h>
+#endif /* HAVE_LIBATASMART */
+
 #include "../ctx.h"
 #include "../file.h"
 #include "../parse.h"
@@ -63,8 +67,137 @@ typedef struct td_disk {
        uint64_t discard_merges;
        uint64_t discard_sectors;
        uint64_t discard_ticks;
+
+       // Is the disk awake?
+       SkBool awake;
+
+       // Power On Time (in msec)
+       uint64_t power_on_time;
+
+       // SMART Status
+       SkBool smart_status;
+
+       // Power Cycles
+       uint64_t power_cycles;
+
+       // Bad Sectors
+       uint64_t bad_sectors;
+
+       // Temperature (in mK)
+       uint64_t temperature;
 } td_disk;
 
+#ifdef HAVE_LIBATASMART
+static int disk_read_smart(td_ctx* ctx, const char* node, td_disk* stat) {
+       SkBool available = 0;
+       SkDisk* disk = NULL;
+       int r;
+
+       // Open the disk
+       r = sk_disk_open(node, &disk);
+       if (r < 0) {
+               ERROR(ctx, "Failed to open disk %s for SMART data: %m\n", node);
+               r = -errno;
+               goto ERROR;
+       }
+
+       // Check if SMART is available
+       r = sk_disk_smart_is_available(disk, &available);
+       if (r < 0) {
+               switch (errno) {
+                       // Receiving ENOTSUP here means that SMART is not available, too
+                       case ENOTSUP:
+                               r = 0;
+                               break;
+
+                       // Handle any other errors
+                       default:
+                               ERROR(ctx, "Failed to check if %s supports SMART: %m\n", node);
+                               r = -errno;
+                               goto ERROR;
+               }
+       }
+
+       // Skip if this disk does not support SMART
+       if (!available)
+               goto ERROR;
+
+       // Check if the disk is asleep
+       r = sk_disk_check_sleep_mode(disk, &stat->awake);
+       if (r < 0) {
+               ERROR(ctx, "Failed to check if %s is asleep: %m\n", node);
+               r = -errno;
+               goto ERROR;
+       }
+
+       // Skip if the disk is asleep (because fetching SMART data might wake up the device)
+       if (!stat->awake)
+               goto ERROR;
+
+       // Read the SMART data
+       r = sk_disk_smart_read_data(disk);
+       if (r < 0) {
+               ERROR(ctx, "Failed to read SMART data from %s: %m\n", node);
+               r = -errno;
+               goto ERROR;
+       }
+
+       // Read SMART status
+       r = sk_disk_smart_status(disk, &stat->smart_status);
+       if (r < 0) {
+               ERROR(ctx, "Failed to read SMART status from %s: %m\n", node);
+               r = -errno;
+               goto ERROR;
+       }
+
+       // Read power on time
+       r = sk_disk_smart_get_power_on(disk, &stat->power_on_time);
+       if (r < 0) {
+               ERROR(ctx, "Failed to read power on time from %s: %m\n", node);
+               r = -errno;
+               goto ERROR;
+       }
+
+       // Read power cycles
+       r = sk_disk_smart_get_power_cycle(disk, &stat->power_cycles);
+       if (r < 0) {
+               ERROR(ctx, "Failed to read power cycles from %s: %m\n", node);
+               r = -errno;
+               goto ERROR;
+       }
+
+       // Read bad sectors
+       r = sk_disk_smart_get_bad(disk, &stat->bad_sectors);
+       if (r < 0) {
+               switch (errno) {
+                       // ENOENT means that there are no bad sectors
+                       case ENOENT:
+                               break;
+
+                       // Handle any other errors
+                       default:
+                               ERROR(ctx, "Failed to read bad sectors from %s: %m\n", node);
+                               r = -errno;
+                               goto ERROR;
+               }
+       }
+
+       // Read temperature
+       r = sk_disk_smart_get_temperature(disk, &stat->temperature);
+       if (r < 0) {
+               ERROR(ctx, "Failed to read temperature from %s: %m\n", node);
+               r = -errno;
+               goto ERROR;
+       }
+
+ERROR:
+       if (disk)
+               sk_disk_free(disk);
+
+       return r;
+}
+#endif /* HAVE_LIBATASMART */
+
 static int __disk_read_stat(td_ctx* ctx, td_file* file, unsigned long lineno,
                char* line, size_t length, void* data) {
        unsigned int token = 0;
@@ -232,13 +365,22 @@ static int disk_heartbeat_device(td_ctx* ctx, td_source* source, struct udev_dev
        if (r < 0)
                return r;
 
+#ifdef HAVE_LIBATASMART
+       // Fetch SMART information
+       r = disk_read_smart(ctx, node, &disk);
+       if (r < 0)
+               return r;
+#endif /* HAVE_LIBATASMART */
+
        // Submit stats
        return td_source_submit(source, serial,
-               "%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu",
+               "%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%lu:%d:%lu:%d:%lu:%lu:%lf",
                disk.read_ios, disk.read_merges, disk.read_sectors, disk.read_ticks,
                disk.write_ios, disk.write_merges, disk.write_sectors, disk.write_ticks,
                disk.in_flight, disk.io_ticks, disk.time_in_queue,
-               disk.discard_ios, disk.discard_merges, disk.discard_sectors, disk.discard_ticks
+               disk.discard_ios, disk.discard_merges, disk.discard_sectors, disk.discard_ticks,
+               (disk.awake) ? 1 : 0, disk.power_on_time, (disk.smart_status) ? 1 : 0,
+               disk.power_cycles, disk.bad_sectors, (double)disk.temperature / 1000
        );
 }
 
@@ -315,21 +457,31 @@ const td_source_impl disk_source = {
 
        // RRD Data Sources
        .rrd_dss = {
-               { "read_ios",        "DERIVE", 0, -1 },
-               { "read_merges",     "DERIVE", 0, -1 },
-               { "read_sectors",    "DERIVE", 0, -1 },
-               { "read_ticks",      "DERIVE", 0, -1 },
-               { "write_ios",       "DERIVE", 0, -1 },
-               { "write_merges",    "DERIVE", 0, -1 },
-               { "write_sectors",   "DERIVE", 0, -1 },
-               { "write_ticks",     "DERIVE", 0, -1 },
-               { "in_flight",       "DERIVE", 0, -1 },
-               { "io_ticks",        "DERIVE", 0, -1 },
-               { "time_in_queue",   "DERIVE", 0, -1 },
-               { "discard_ios",     "DERIVE", 0, -1 },
-               { "discard_merges",  "DERIVE", 0, -1 },
-               { "discard_sectors", "DERIVE", 0, -1 },
-               { "discard_ticks",   "DERIVE", 0, -1 },
+               // stats
+               { "read_ios",        "DERIVE",  0, -1 },
+               { "read_merges",     "DERIVE",  0, -1 },
+               { "read_sectors",    "DERIVE",  0, -1 },
+               { "read_ticks",      "DERIVE",  0, -1 },
+               { "write_ios",       "DERIVE",  0, -1 },
+               { "write_merges",    "DERIVE",  0, -1 },
+               { "write_sectors",   "DERIVE",  0, -1 },
+               { "write_ticks",     "DERIVE",  0, -1 },
+               { "in_flight",       "DERIVE",  0, -1 },
+               { "io_ticks",        "DERIVE",  0, -1 },
+               { "time_in_queue",   "DERIVE",  0, -1 },
+               { "discard_ios",     "DERIVE",  0, -1 },
+               { "discard_merges",  "DERIVE",  0, -1 },
+               { "discard_sectors", "DERIVE",  0, -1 },
+               { "discard_ticks",   "DERIVE",  0, -1 },
+
+               // SMART
+               { "awake",           "GAUGE",   0,  1 },
+               { "power_on_time",   "COUNTER", 0, -1 },
+               { "smart_status",    "GAUGE",   0,  1 },
+               { "power_cycle",     "COUNTER", 0, -1 },
+               { "bad_sectors",     "COUNTER", 0, -1 },
+               { "temperature",     "GAUGE",   0, -1 },
+
                { NULL },
        },