]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
xfs: convey file I/O errors to the health monitor
authorDarrick J. Wong <djwong@kernel.org>
Wed, 21 Jan 2026 02:06:50 +0000 (18:06 -0800)
committerDarrick J. Wong <djwong@kernel.org>
Wed, 21 Jan 2026 02:06:50 +0000 (18:06 -0800)
Connect the fserror reporting to the health monitor so that xfs can send
events about file I/O errors to the xfs_healer daemon.  These events are
entirely informational because xfs cannot regenerate user data, so
hopefully the fsnotify I/O error event gets noticed by the relevant
management systems.

Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
fs/xfs/libxfs/xfs_fs.h
fs/xfs/xfs_healthmon.c
fs/xfs/xfs_healthmon.h
fs/xfs/xfs_super.c
fs/xfs/xfs_trace.c
fs/xfs/xfs_trace.h

index 38aeb1b0d87b5e63e6bb28ef5a39d553ec309ad5..4ec1b2aede976f6ddd72a2f23c8f2ba2d73a17ab 100644 (file)
@@ -1019,6 +1019,9 @@ struct xfs_rtgroup_geometry {
 #define XFS_HEALTH_MONITOR_DOMAIN_RTDEV                (6)
 #define XFS_HEALTH_MONITOR_DOMAIN_LOGDEV       (7)
 
+/* file range events */
+#define XFS_HEALTH_MONITOR_DOMAIN_FILERANGE    (8)
+
 /* Health monitor event types */
 
 /* status of the monitor itself */
@@ -1039,6 +1042,17 @@ struct xfs_rtgroup_geometry {
 /* media errors */
 #define XFS_HEALTH_MONITOR_TYPE_MEDIA_ERROR    (7)
 
+/* pagecache I/O to a file range failed */
+#define XFS_HEALTH_MONITOR_TYPE_BUFREAD                (8)
+#define XFS_HEALTH_MONITOR_TYPE_BUFWRITE       (9)
+
+/* direct I/O to a file range failed */
+#define XFS_HEALTH_MONITOR_TYPE_DIOREAD                (10)
+#define XFS_HEALTH_MONITOR_TYPE_DIOWRITE       (11)
+
+/* out of band media error reported for a file range */
+#define XFS_HEALTH_MONITOR_TYPE_DATALOST       (12)
+
 /* lost events */
 struct xfs_health_monitor_lost {
        __u64   count;
@@ -1079,6 +1093,15 @@ struct xfs_health_monitor_shutdown {
        __u32   reasons;
 };
 
+/* file range events */
+struct xfs_health_monitor_filerange {
+       __u64   pos;
+       __u64   len;
+       __u64   ino;
+       __u32   gen;
+       __u32   error;
+};
+
 /* disk media errors */
 struct xfs_health_monitor_media {
        __u64   daddr;
@@ -1107,6 +1130,7 @@ struct xfs_health_monitor_event {
                struct xfs_health_monitor_inode inode;
                struct xfs_health_monitor_shutdown shutdown;
                struct xfs_health_monitor_media media;
+               struct xfs_health_monitor_filerange filerange;
        } e;
 
        /* zeroes */
index 773bd4414d947ae6d878576de2380fb52805b5c1..1bb4b0adf2470e75dc618a05129834fef08c36e7 100644 (file)
 #include "xfs_healthmon.h"
 #include "xfs_fsops.h"
 #include "xfs_notify_failure.h"
+#include "xfs_file.h"
 
 #include <linux/anon_inodes.h>
 #include <linux/eventpoll.h>
 #include <linux/poll.h>
+#include <linux/fserror.h>
 
 /*
  * Live Health Monitoring
@@ -222,6 +224,27 @@ xfs_healthmon_merge_events(
                        return true;
                }
                return false;
+
+       case XFS_HEALTHMON_BUFREAD:
+       case XFS_HEALTHMON_BUFWRITE:
+       case XFS_HEALTHMON_DIOREAD:
+       case XFS_HEALTHMON_DIOWRITE:
+       case XFS_HEALTHMON_DATALOST:
+               /* logically adjacent file ranges can merge */
+               if (existing->fino != new->fino || existing->fgen != new->fgen)
+                       return false;
+
+               if (existing->fpos + existing->flen == new->fpos) {
+                       existing->flen += new->flen;
+                       return true;
+               }
+
+               if (new->fpos + new->flen == existing->fpos) {
+                       existing->fpos = new->fpos;
+                       existing->flen += new->flen;
+                       return true;
+               }
+               return false;
        }
 
        return false;
@@ -578,6 +601,55 @@ xfs_healthmon_report_media(
        xfs_healthmon_put(hm);
 }
 
+static inline enum xfs_healthmon_type file_ioerr_type(enum fserror_type action)
+{
+       switch (action) {
+       case FSERR_BUFFERED_READ:
+               return XFS_HEALTHMON_BUFREAD;
+       case FSERR_BUFFERED_WRITE:
+               return XFS_HEALTHMON_BUFWRITE;
+       case FSERR_DIRECTIO_READ:
+               return XFS_HEALTHMON_DIOREAD;
+       case FSERR_DIRECTIO_WRITE:
+               return XFS_HEALTHMON_DIOWRITE;
+       case FSERR_DATA_LOST:
+               return XFS_HEALTHMON_DATALOST;
+       case FSERR_METADATA:
+               /* filtered out by xfs_fs_report_error */
+               break;
+       }
+
+       ASSERT(0);
+       return -1;
+}
+
+/* Add a file io error event to the reporting queue. */
+void
+xfs_healthmon_report_file_ioerror(
+       struct xfs_inode                *ip,
+       const struct fserror_event      *p)
+{
+       struct xfs_healthmon_event      event = {
+               .type                   = file_ioerr_type(p->type),
+               .domain                 = XFS_HEALTHMON_FILERANGE,
+               .fino                   = ip->i_ino,
+               .fgen                   = VFS_I(ip)->i_generation,
+               .fpos                   = p->pos,
+               .flen                   = p->len,
+               /* send positive error number to userspace */
+               .error                  = -p->error,
+       };
+       struct xfs_healthmon            *hm = xfs_healthmon_get(ip->i_mount);
+
+       if (!hm)
+               return;
+
+       trace_xfs_healthmon_report_file_ioerror(hm, p);
+
+       xfs_healthmon_push(hm, &event);
+       xfs_healthmon_put(hm);
+}
+
 static inline void
 xfs_healthmon_reset_outbuf(
        struct xfs_healthmon            *hm)
@@ -633,6 +705,7 @@ static const unsigned int domain_map[] = {
        [XFS_HEALTHMON_DATADEV]         = XFS_HEALTH_MONITOR_DOMAIN_DATADEV,
        [XFS_HEALTHMON_RTDEV]           = XFS_HEALTH_MONITOR_DOMAIN_RTDEV,
        [XFS_HEALTHMON_LOGDEV]          = XFS_HEALTH_MONITOR_DOMAIN_LOGDEV,
+       [XFS_HEALTHMON_FILERANGE]       = XFS_HEALTH_MONITOR_DOMAIN_FILERANGE,
 };
 
 static const unsigned int type_map[] = {
@@ -644,6 +717,11 @@ static const unsigned int type_map[] = {
        [XFS_HEALTHMON_UNMOUNT]         = XFS_HEALTH_MONITOR_TYPE_UNMOUNT,
        [XFS_HEALTHMON_SHUTDOWN]        = XFS_HEALTH_MONITOR_TYPE_SHUTDOWN,
        [XFS_HEALTHMON_MEDIA_ERROR]     = XFS_HEALTH_MONITOR_TYPE_MEDIA_ERROR,
+       [XFS_HEALTHMON_BUFREAD]         = XFS_HEALTH_MONITOR_TYPE_BUFREAD,
+       [XFS_HEALTHMON_BUFWRITE]        = XFS_HEALTH_MONITOR_TYPE_BUFWRITE,
+       [XFS_HEALTHMON_DIOREAD]         = XFS_HEALTH_MONITOR_TYPE_DIOREAD,
+       [XFS_HEALTHMON_DIOWRITE]        = XFS_HEALTH_MONITOR_TYPE_DIOWRITE,
+       [XFS_HEALTHMON_DATALOST]        = XFS_HEALTH_MONITOR_TYPE_DATALOST,
 };
 
 /* Render event as a V0 structure */
@@ -701,6 +779,13 @@ xfs_healthmon_format_v0(
                hme.e.media.daddr = event->daddr;
                hme.e.media.bbcount = event->bbcount;
                break;
+       case XFS_HEALTHMON_FILERANGE:
+               hme.e.filerange.ino = event->fino;
+               hme.e.filerange.gen = event->fgen;
+               hme.e.filerange.pos = event->fpos;
+               hme.e.filerange.len = event->flen;
+               hme.e.filerange.error = abs(event->error);
+               break;
        default:
                break;
        }
index 54536aac427813093915bbe2c30f10ed425f5f38..0e936507037fdad75e35cd7f1a17c2a6777160d7 100644 (file)
@@ -82,6 +82,13 @@ enum xfs_healthmon_type {
 
        /* media errors */
        XFS_HEALTHMON_MEDIA_ERROR,
+
+       /* file range events */
+       XFS_HEALTHMON_BUFREAD,
+       XFS_HEALTHMON_BUFWRITE,
+       XFS_HEALTHMON_DIOREAD,
+       XFS_HEALTHMON_DIOWRITE,
+       XFS_HEALTHMON_DATALOST,
 };
 
 enum xfs_healthmon_domain {
@@ -97,6 +104,9 @@ enum xfs_healthmon_domain {
        XFS_HEALTHMON_DATADEV,
        XFS_HEALTHMON_RTDEV,
        XFS_HEALTHMON_LOGDEV,
+
+       /* file range events */
+       XFS_HEALTHMON_FILERANGE,
 };
 
 struct xfs_healthmon_event {
@@ -139,6 +149,14 @@ struct xfs_healthmon_event {
                        xfs_daddr_t     daddr;
                        uint64_t        bbcount;
                };
+               /* file range events */
+               struct {
+                       xfs_ino_t       fino;
+                       loff_t          fpos;
+                       uint64_t        flen;
+                       uint32_t        fgen;
+                       int             error;
+               };
        };
 };
 
@@ -157,6 +175,9 @@ void xfs_healthmon_report_shutdown(struct xfs_mount *mp, uint32_t flags);
 void xfs_healthmon_report_media(struct xfs_mount *mp, enum xfs_device fdev,
                xfs_daddr_t daddr, uint64_t bbcount);
 
+void xfs_healthmon_report_file_ioerror(struct xfs_inode *ip,
+               const struct fserror_event *p);
+
 long xfs_ioc_health_monitor(struct file *file,
                struct xfs_health_monitor __user *arg);
 
index bc71aa9dcee8d6afbf641b0606895da49fe00dbf..d0cef9ce6b8992bc6d03b2157666962a69e74d95 100644 (file)
 #include "xfs_parent.h"
 #include "xfs_rtalloc.h"
 #include "xfs_zone_alloc.h"
+#include "xfs_healthmon.h"
 #include "scrub/stats.h"
 #include "scrub/rcbag_btree.h"
 
 #include <linux/magic.h>
 #include <linux/fs_context.h>
 #include <linux/fs_parser.h>
+#include <linux/fserror.h>
 
 static const struct super_operations xfs_super_operations;
 
@@ -1301,6 +1303,15 @@ xfs_fs_show_stats(
        return 0;
 }
 
+static void
+xfs_fs_report_error(
+       const struct fserror_event      *event)
+{
+       /* healthmon already knows about non-inode and metadata errors */
+       if (event->inode && event->type != FSERR_METADATA)
+               xfs_healthmon_report_file_ioerror(XFS_I(event->inode), event);
+}
+
 static const struct super_operations xfs_super_operations = {
        .alloc_inode            = xfs_fs_alloc_inode,
        .destroy_inode          = xfs_fs_destroy_inode,
@@ -1317,6 +1328,7 @@ static const struct super_operations xfs_super_operations = {
        .free_cached_objects    = xfs_fs_free_cached_objects,
        .shutdown               = xfs_fs_shutdown,
        .show_stats             = xfs_fs_show_stats,
+       .report_error           = xfs_fs_report_error,
 };
 
 static int
index 08ddab700a6cd3ff61e7e912e7f7e706a8e9c3e7..3ae449646eb9b237308f5b62746a19cdefbd705a 100644 (file)
@@ -54,6 +54,8 @@
 #include "xfs_health.h"
 #include "xfs_healthmon.h"
 #include "xfs_notify_failure.h"
+#include "xfs_file.h"
+#include <linux/fserror.h>
 
 /*
  * We include this last to have the helpers above available for the trace
index fe7295a4e917ee7f375d72fcef6a278359768c42..0cf4877753584fc2eed099993fa16281490ed116 100644 (file)
@@ -105,6 +105,7 @@ struct xfs_rtgroup;
 struct xfs_open_zone;
 struct xfs_healthmon_event;
 struct xfs_healthmon;
+struct fserror_event;
 
 #define XFS_ATTR_FILTER_FLAGS \
        { XFS_ATTR_ROOT,        "ROOT" }, \
@@ -6092,6 +6093,12 @@ DECLARE_EVENT_CLASS(xfs_healthmon_event_class,
                        __entry->offset = event->daddr;
                        __entry->length = event->bbcount;
                        break;
+               case XFS_HEALTHMON_FILERANGE:
+                       __entry->ino = event->fino;
+                       __entry->gen = event->fgen;
+                       __entry->offset = event->fpos;
+                       __entry->length = event->flen;
+                       break;
                }
        ),
        TP_printk("dev %d:%d type %s domain %s mask 0x%x ino 0x%llx gen 0x%x offset 0x%llx len 0x%llx group 0x%x lost %llu",
@@ -6266,6 +6273,53 @@ TRACE_EVENT(xfs_healthmon_report_media,
                  __entry->bbcount)
 );
 
+#define FS_ERROR_STRINGS \
+       { FSERR_BUFFERED_READ,          "buffered_read" }, \
+       { FSERR_BUFFERED_WRITE,         "buffered_write" }, \
+       { FSERR_DIRECTIO_READ,          "directio_read" }, \
+       { FSERR_DIRECTIO_WRITE,         "directio_write" }, \
+       { FSERR_DATA_LOST,              "data_lost" }, \
+       { FSERR_METADATA,               "metadata" }
+
+TRACE_DEFINE_ENUM(FSERR_BUFFERED_READ);
+TRACE_DEFINE_ENUM(FSERR_BUFFERED_WRITE);
+TRACE_DEFINE_ENUM(FSERR_DIRECTIO_READ);
+TRACE_DEFINE_ENUM(FSERR_DIRECTIO_WRITE);
+TRACE_DEFINE_ENUM(FSERR_DATA_LOST);
+TRACE_DEFINE_ENUM(FSERR_METADATA);
+
+TRACE_EVENT(xfs_healthmon_report_file_ioerror,
+       TP_PROTO(const struct xfs_healthmon *hm,
+                const struct fserror_event *p),
+       TP_ARGS(hm, p),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(unsigned int, type)
+               __field(unsigned long long, ino)
+               __field(unsigned int, gen)
+               __field(long long, pos)
+               __field(unsigned long long, len)
+               __field(int, error)
+       ),
+       TP_fast_assign(
+               __entry->dev = hm->dev;
+               __entry->type = p->type;
+               __entry->ino = XFS_I(p->inode)->i_ino;
+               __entry->gen = p->inode->i_generation;
+               __entry->pos = p->pos;
+               __entry->len = p->len;
+               __entry->error = p->error;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx gen 0x%x op %s pos 0x%llx bytecount 0x%llx error %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->gen,
+                 __print_symbolic(__entry->type, FS_ERROR_STRINGS),
+                 __entry->pos,
+                 __entry->len,
+                 __entry->error)
+);
+
 #endif /* _TRACE_XFS_H */
 
 #undef TRACE_INCLUDE_PATH