]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
xfs: start creating infrastructure for health monitoring
authorDarrick J. Wong <djwong@kernel.org>
Wed, 21 Jan 2026 02:06:45 +0000 (18:06 -0800)
committerDarrick J. Wong <djwong@kernel.org>
Wed, 21 Jan 2026 02:06:45 +0000 (18:06 -0800)
Start creating helper functions and infrastructure to pass filesystem
health events to a health monitoring file.  Since this is an
administrative interface, we only support a single health monitor
process per filesystem, so we don't need to use anything fancy such as
notifier chains (== tons of indirect calls).

Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
fs/xfs/Makefile
fs/xfs/libxfs/xfs_fs.h
fs/xfs/xfs_health.c
fs/xfs/xfs_healthmon.c [new file with mode: 0644]
fs/xfs/xfs_healthmon.h [new file with mode: 0644]
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_mount.h

index 5bf501cf827172f836f718faafc1695ec8ecb979..1b7385e23b3463a15dfb97a88517b476a6cca730 100644 (file)
@@ -88,6 +88,7 @@ xfs-y                         += xfs_aops.o \
                                   xfs_globals.o \
                                   xfs_handle.o \
                                   xfs_health.o \
+                                  xfs_healthmon.o \
                                   xfs_icache.o \
                                   xfs_ioctl.o \
                                   xfs_iomap.o \
index 12463ba766da053530e19b34c6a8344c67da3cd8..c58e55b3df409960310c32d4378e7d228d6c27e7 100644 (file)
@@ -1003,6 +1003,12 @@ struct xfs_rtgroup_geometry {
 #define XFS_RTGROUP_GEOM_SICK_RMAPBT   (1U << 3)  /* reverse mappings */
 #define XFS_RTGROUP_GEOM_SICK_REFCNTBT (1U << 4)  /* reference counts */
 
+struct xfs_health_monitor {
+       __u64   flags;          /* flags */
+       __u8    format;         /* output format */
+       __u8    pad[23];        /* zeroes */
+};
+
 /*
  * ioctl commands that are used by Linux filesystems
  */
@@ -1042,6 +1048,7 @@ struct xfs_rtgroup_geometry {
 #define XFS_IOC_GETPARENTS_BY_HANDLE _IOWR('X', 63, struct xfs_getparents_by_handle)
 #define XFS_IOC_SCRUBV_METADATA        _IOWR('X', 64, struct xfs_scrub_vec_head)
 #define XFS_IOC_RTGROUP_GEOMETRY _IOWR('X', 65, struct xfs_rtgroup_geometry)
+#define XFS_IOC_HEALTH_MONITOR _IOW ('X', 68, struct xfs_health_monitor)
 
 /*
  * ioctl commands that replace IRIX syssgi()'s
index fbb8886c72fe5e3a14e58ee7a948352dc1651ebe..3d50397f8f7c00aa81e722442f33fa9659fa1415 100644 (file)
@@ -19,6 +19,7 @@
 #include "xfs_da_btree.h"
 #include "xfs_quota_defs.h"
 #include "xfs_rtgroup.h"
+#include "xfs_healthmon.h"
 
 #include <linux/fserror.h>
 
diff --git a/fs/xfs/xfs_healthmon.c b/fs/xfs/xfs_healthmon.c
new file mode 100644 (file)
index 0000000..b7095ea
--- /dev/null
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2024-2026 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_trace.h"
+#include "xfs_ag.h"
+#include "xfs_btree.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_quota_defs.h"
+#include "xfs_rtgroup.h"
+#include "xfs_healthmon.h"
+
+#include <linux/anon_inodes.h>
+#include <linux/eventpoll.h>
+#include <linux/poll.h>
+
+/*
+ * Live Health Monitoring
+ * ======================
+ *
+ * Autonomous self-healing of XFS filesystems requires a means for the kernel
+ * to send filesystem health events to a monitoring daemon in userspace.  To
+ * accomplish this, we establish a thread_with_file kthread object to handle
+ * translating internal events about filesystem health into a format that can
+ * be parsed easily by userspace.  When those internal events occur, the core
+ * filesystem code calls this health monitor to convey the events to userspace.
+ * Userspace reads events from the file descriptor returned by the ioctl.
+ *
+ * The healthmon abstraction has a weak reference to the host filesystem mount
+ * so that the queueing and processing of the events do not pin the mount and
+ * cannot slow down the main filesystem.  The healthmon object can exist past
+ * the end of the filesystem mount.
+ */
+
+/* sign of a detached health monitor */
+#define DETACHED_MOUNT_COOKIE          ((uintptr_t)0)
+
+/* spinlock for atomically updating xfs_mount <-> xfs_healthmon pointers */
+static DEFINE_SPINLOCK(xfs_healthmon_lock);
+
+/* Grab a reference to the healthmon object for a given mount, if any. */
+static struct xfs_healthmon *
+xfs_healthmon_get(
+       struct xfs_mount                *mp)
+{
+       struct xfs_healthmon            *hm;
+
+       rcu_read_lock();
+       hm = mp->m_healthmon;
+       if (hm && !refcount_inc_not_zero(&hm->ref))
+               hm = NULL;
+       rcu_read_unlock();
+
+       return hm;
+}
+
+/*
+ * Release the reference to a healthmon object.  If there are no more holders,
+ * free the health monitor after an RCU grace period to eliminate possibility
+ * of races with xfs_healthmon_get.
+ */
+static void
+xfs_healthmon_put(
+       struct xfs_healthmon            *hm)
+{
+       if (refcount_dec_and_test(&hm->ref))
+               kfree_rcu_mightsleep(hm);
+}
+
+/* Attach a health monitor to an xfs_mount.  Only one allowed at a time. */
+STATIC int
+xfs_healthmon_attach(
+       struct xfs_mount        *mp,
+       struct xfs_healthmon    *hm)
+{
+       spin_lock(&xfs_healthmon_lock);
+       if (mp->m_healthmon != NULL) {
+               spin_unlock(&xfs_healthmon_lock);
+               return -EEXIST;
+       }
+
+       refcount_inc(&hm->ref);
+       mp->m_healthmon = hm;
+       hm->mount_cookie = (uintptr_t)mp->m_super;
+       spin_unlock(&xfs_healthmon_lock);
+
+       return 0;
+}
+
+/* Detach a xfs mount from a specific healthmon instance. */
+STATIC void
+xfs_healthmon_detach(
+       struct xfs_healthmon    *hm)
+{
+       spin_lock(&xfs_healthmon_lock);
+       if (hm->mount_cookie == DETACHED_MOUNT_COOKIE) {
+               spin_unlock(&xfs_healthmon_lock);
+               return;
+       }
+
+       XFS_M((struct super_block *)hm->mount_cookie)->m_healthmon = NULL;
+       hm->mount_cookie = DETACHED_MOUNT_COOKIE;
+       spin_unlock(&xfs_healthmon_lock);
+
+       xfs_healthmon_put(hm);
+}
+
+/* Detach the xfs mount from this healthmon instance. */
+void
+xfs_healthmon_unmount(
+       struct xfs_mount                *mp)
+{
+       struct xfs_healthmon            *hm = xfs_healthmon_get(mp);
+
+       if (!hm)
+               return;
+
+       xfs_healthmon_detach(hm);
+       xfs_healthmon_put(hm);
+}
+
+STATIC ssize_t
+xfs_healthmon_read_iter(
+       struct kiocb            *iocb,
+       struct iov_iter         *to)
+{
+       return -EIO;
+}
+
+/* Free the health monitoring information. */
+STATIC int
+xfs_healthmon_release(
+       struct inode            *inode,
+       struct file             *file)
+{
+       struct xfs_healthmon    *hm = file->private_data;
+
+       /*
+        * We might be closing the healthmon file before the filesystem
+        * unmounts, because userspace processes can terminate at any time and
+        * for any reason.  Null out xfs_mount::m_healthmon so that another
+        * process can create another health monitor file.
+        */
+       xfs_healthmon_detach(hm);
+
+       xfs_healthmon_put(hm);
+       return 0;
+}
+
+/* Validate ioctl parameters. */
+static inline bool
+xfs_healthmon_validate(
+       const struct xfs_health_monitor *hmo)
+{
+       if (hmo->flags)
+               return false;
+       if (hmo->format)
+               return false;
+       if (memchr_inv(&hmo->pad, 0, sizeof(hmo->pad)))
+               return false;
+       return true;
+}
+
+/* Emit some data about the health monitoring fd. */
+static void
+xfs_healthmon_show_fdinfo(
+       struct seq_file         *m,
+       struct file             *file)
+{
+       struct xfs_healthmon    *hm = file->private_data;
+
+       seq_printf(m, "state:\t%s\ndev:\t%d:%d\n",
+                       hm->mount_cookie == DETACHED_MOUNT_COOKIE ?
+                               "dead" : "alive",
+                       MAJOR(hm->dev), MINOR(hm->dev));
+}
+
+static const struct file_operations xfs_healthmon_fops = {
+       .owner          = THIS_MODULE,
+       .show_fdinfo    = xfs_healthmon_show_fdinfo,
+       .read_iter      = xfs_healthmon_read_iter,
+       .release        = xfs_healthmon_release,
+};
+
+/*
+ * Create a health monitoring file.  Returns an index to the fd table or a
+ * negative errno.
+ */
+long
+xfs_ioc_health_monitor(
+       struct file                     *file,
+       struct xfs_health_monitor __user *arg)
+{
+       struct xfs_health_monitor       hmo;
+       struct xfs_healthmon            *hm;
+       struct xfs_inode                *ip = XFS_I(file_inode(file));
+       struct xfs_mount                *mp = ip->i_mount;
+       int                             ret;
+
+       /*
+        * The only intended user of the health monitoring system should be the
+        * xfs_healer daemon running on behalf of the whole filesystem in the
+        * initial user namespace.  IOWs, we don't allow unprivileged userspace
+        * (they can use fsnotify) nor do we allow containers.
+        */
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       if (ip->i_ino != mp->m_sb.sb_rootino)
+               return -EPERM;
+       if (current_user_ns() != &init_user_ns)
+               return -EPERM;
+
+       if (copy_from_user(&hmo, arg, sizeof(hmo)))
+               return -EFAULT;
+
+       if (!xfs_healthmon_validate(&hmo))
+               return -EINVAL;
+
+       hm = kzalloc(sizeof(*hm), GFP_KERNEL);
+       if (!hm)
+               return -ENOMEM;
+       hm->dev = mp->m_super->s_dev;
+       refcount_set(&hm->ref, 1);
+
+       /*
+        * Try to attach this health monitor to the xfs_mount.  The monitor is
+        * considered live and will receive events if this succeeds.
+        */
+       ret = xfs_healthmon_attach(mp, hm);
+       if (ret)
+               goto out_hm;
+
+       /*
+        * Create the anonymous file and install a fd for it.  If it succeeds,
+        * the file owns hm and can go away at any time, so we must not access
+        * it again.  This must go last because we can't undo a fd table
+        * installation.
+        */
+       ret = anon_inode_getfd("xfs_healthmon", &xfs_healthmon_fops, hm,
+                       O_CLOEXEC | O_RDONLY);
+       if (ret < 0)
+               goto out_mp;
+
+       return ret;
+
+out_mp:
+       xfs_healthmon_detach(hm);
+out_hm:
+       ASSERT(refcount_read(&hm->ref) == 1);
+       xfs_healthmon_put(hm);
+       return ret;
+}
diff --git a/fs/xfs/xfs_healthmon.h b/fs/xfs/xfs_healthmon.h
new file mode 100644 (file)
index 0000000..218d5aa
--- /dev/null
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2024-2026 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_HEALTHMON_H__
+#define __XFS_HEALTHMON_H__
+
+struct xfs_healthmon {
+       /*
+        * Weak reference to the xfs filesystem that is being monitored.  It
+        * will be set to zero when the filesystem detaches from the monitor.
+        * Do not dereference this pointer.
+        */
+       uintptr_t                       mount_cookie;
+
+       /*
+        * Device number of the filesystem being monitored.  This is for
+        * consistent tracing even after unmount.
+        */
+       dev_t                           dev;
+
+       /*
+        * Reference count of this structure.  The open healthmon fd holds one
+        * ref, the xfs_mount holds another ref if it points to this object,
+        * and running event handlers hold their own refs.
+        */
+       refcount_t                      ref;
+};
+
+void xfs_healthmon_unmount(struct xfs_mount *mp);
+
+long xfs_ioc_health_monitor(struct file *file,
+               struct xfs_health_monitor __user *arg);
+
+#endif /* __XFS_HEALTHMON_H__ */
index 59eaad77437181dde21d67e86c8a0a24fa2a5c32..c04c41ca924e37214c656729c76cae23e65603b1 100644 (file)
@@ -41,6 +41,7 @@
 #include "xfs_exchrange.h"
 #include "xfs_handle.h"
 #include "xfs_rtgroup.h"
+#include "xfs_healthmon.h"
 
 #include <linux/mount.h>
 #include <linux/fileattr.h>
@@ -1419,6 +1420,9 @@ xfs_file_ioctl(
        case XFS_IOC_COMMIT_RANGE:
                return xfs_ioc_commit_range(filp, arg);
 
+       case XFS_IOC_HEALTH_MONITOR:
+               return xfs_ioc_health_monitor(filp, arg);
+
        default:
                return -ENOTTY;
        }
index 0953f6ae94abc83eab5ce7a0064c30eba1d6ca51..ab67c91915384c5f9b2f8d0135b32807a127fb63 100644 (file)
@@ -41,6 +41,7 @@
 #include "xfs_rtrefcount_btree.h"
 #include "scrub/stats.h"
 #include "xfs_zone_alloc.h"
+#include "xfs_healthmon.h"
 
 static DEFINE_MUTEX(xfs_uuid_table_mutex);
 static int xfs_uuid_table_size;
@@ -625,6 +626,7 @@ xfs_unmount_flush_inodes(
        cancel_delayed_work_sync(&mp->m_reclaim_work);
        xfs_reclaim_inodes(mp);
        xfs_health_unmount(mp);
+       xfs_healthmon_unmount(mp);
 }
 
 static void
index b871dfde372b526366f307651d9958e0375abdf7..61c71128d171cbf028166d305436f20b9d5feba9 100644 (file)
@@ -13,6 +13,7 @@ struct xfs_ail;
 struct xfs_quotainfo;
 struct xfs_da_geometry;
 struct xfs_perag;
+struct xfs_healthmon;
 
 /* dynamic preallocation free space thresholds, 5% down to 1% */
 enum {
@@ -342,6 +343,9 @@ typedef struct xfs_mount {
 
        /* Hook to feed dirent updates to an active online repair. */
        struct xfs_hooks        m_dir_update_hooks;
+
+       /* Private data referring to a health monitor object. */
+       struct xfs_healthmon    *m_healthmon;
 } xfs_mount_t;
 
 #define M_IGEO(mp)             (&(mp)->m_ino_geo)