]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
xfs_spaceman: report health problems
authorDarrick J. Wong <darrick.wong@oracle.com>
Thu, 26 Sep 2019 17:41:34 +0000 (13:41 -0400)
committerEric Sandeen <sandeen@redhat.com>
Thu, 26 Sep 2019 17:41:34 +0000 (13:41 -0400)
Use the fs and ag geometry ioctls to report health problems to users.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
libfrog/fsgeom.c
libfrog/fsgeom.h
man/man8/xfs_spaceman.8
spaceman/Makefile
spaceman/health.c [new file with mode: 0644]
spaceman/init.c
spaceman/space.h

index 631286cda6bc1e0983a415359ecf462aa318b49e..3ea91e3fa2f2a2a250a3697131fb104777f47455 100644 (file)
@@ -159,3 +159,19 @@ xfd_close(
 
        return 0;
 }
+
+/* Try to obtain an AG's geometry.  Returns zero or a positive error code. */
+int
+xfrog_ag_geometry(
+       int                     fd,
+       unsigned int            agno,
+       struct xfs_ag_geometry  *ageo)
+{
+       int                     ret;
+
+       ageo->ag_number = agno;
+       ret = ioctl(fd, XFS_IOC_AG_GEOMETRY, ageo);
+       if (ret)
+               return errno;
+       return 0;
+}
index 5dcfc1bbc3bacb1cc24128587c2550f632894129..55b14c2bc1f10ea34105b1ad1f71e973db7fdc1d 100644 (file)
@@ -8,6 +8,7 @@
 void xfs_report_geom(struct xfs_fsop_geom *geo, const char *mntpoint,
                const char *logname, const char *rtname);
 int xfrog_geometry(int fd, struct xfs_fsop_geom *fsgeo);
+int xfrog_ag_geometry(int fd, unsigned int agno, struct xfs_ag_geometry *ageo);
 
 /*
  * Structure for recording whatever observations we want about the level of
index 12dd04e4961965c9328b5cf7bf49d8ebeab1ff0d..ece840d7300a575a6603a873aebf2ed1dfdc5b6e 100644 (file)
@@ -91,6 +91,34 @@ The output will have the same format that
 .BR "xfs_info" "(8)"
 prints when querying a filesystem.
 .TP
+.BI "health [ \-a agno] [ \-c ] [ \-f ] [ \-i inum ] [ \-q ] [ paths ]"
+Reports the health of the given group of filesystem metadata.
+.RS 1.0i
+.PD 0
+.TP 0.4i
+.B \-a agno
+Report on the health of the given allocation group.
+.TP
+.B \-c
+Scan all inodes in the filesystem and report each file's health status.
+If the
+.B \-a
+option is given, scan only the inodes in that AG.
+.TP
+.B \-f
+Report on the health of metadata that affect the entire filesystem.
+.TP
+.B \-i inum
+Report on the health of a specific inode.
+.TP
+.B \-q
+Report only unhealthy metadata.
+.TP
+.B paths
+Report on the health of the files at the given path.
+.PD
+.RE
+.TP
 .BR "help [ " command " ]"
 Display a brief description of one or all commands.
 .TP
index b1c1b16d51a0896547e4b8b646de8ce52e9129ef..d01aa74a30d1ad3bae67db2d060f0a7110cb72ef 100644 (file)
@@ -7,7 +7,7 @@ include $(TOPDIR)/include/builddefs
 
 LTCOMMAND = xfs_spaceman
 HFILES = init.h space.h
-CFILES = info.c init.c file.c prealloc.c trim.c
+CFILES = info.c init.c file.c health.c prealloc.c trim.c
 LSRCFILES = xfs_info.sh
 
 LLDLIBS = $(LIBXCMD) $(LIBFROG)
diff --git a/spaceman/health.c b/spaceman/health.c
new file mode 100644 (file)
index 0000000..a8bd3f3
--- /dev/null
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2019 Oracle.
+ * All Rights Reserved.
+ */
+#include "platform_defs.h"
+#include "libxfs.h"
+#include "command.h"
+#include "init.h"
+#include "input.h"
+#include "libfrog/paths.h"
+#include "libfrog/fsgeom.h"
+#include "libfrog/bulkstat.h"
+#include "space.h"
+
+static cmdinfo_t health_cmd;
+static unsigned long long reported;
+static bool comprehensive;
+static bool quiet;
+
+static bool has_realtime(const struct xfs_fsop_geom *g)
+{
+       return g->rtblocks > 0;
+}
+
+static bool has_finobt(const struct xfs_fsop_geom *g)
+{
+       return g->flags & XFS_FSOP_GEOM_FLAGS_FINOBT;
+}
+
+static bool has_rmapbt(const struct xfs_fsop_geom *g)
+{
+       return g->flags & XFS_FSOP_GEOM_FLAGS_RMAPBT;
+}
+
+static bool has_reflink(const struct xfs_fsop_geom *g)
+{
+       return g->flags & XFS_FSOP_GEOM_FLAGS_REFLINK;
+}
+
+struct flag_map {
+       unsigned int            mask;
+       bool                    (*has_fn)(const struct xfs_fsop_geom *g);
+       const char              *descr;
+};
+
+static const struct flag_map fs_flags[] = {
+       {
+               .mask = XFS_FSOP_GEOM_SICK_COUNTERS,
+               .descr = "summary counters",
+       },
+       {
+               .mask = XFS_FSOP_GEOM_SICK_UQUOTA,
+               .descr = "user quota",
+       },
+       {
+               .mask = XFS_FSOP_GEOM_SICK_GQUOTA,
+               .descr = "group quota",
+       },
+       {
+               .mask = XFS_FSOP_GEOM_SICK_PQUOTA,
+               .descr = "project quota",
+       },
+       {
+               .mask = XFS_FSOP_GEOM_SICK_RT_BITMAP,
+               .descr = "realtime bitmap",
+               .has_fn = has_realtime,
+       },
+       {
+               .mask = XFS_FSOP_GEOM_SICK_RT_SUMMARY,
+               .descr = "realtime summary",
+               .has_fn = has_realtime,
+       },
+       {0},
+};
+
+static const struct flag_map ag_flags[] = {
+       {
+               .mask = XFS_AG_GEOM_SICK_SB,
+               .descr = "superblock",
+       },
+       {
+               .mask = XFS_AG_GEOM_SICK_AGF,
+               .descr = "AGF header",
+       },
+       {
+               .mask = XFS_AG_GEOM_SICK_AGFL,
+               .descr = "AGFL header",
+       },
+       {
+               .mask = XFS_AG_GEOM_SICK_AGI,
+               .descr = "AGI header",
+       },
+       {
+               .mask = XFS_AG_GEOM_SICK_BNOBT,
+               .descr = "free space by block btree",
+       },
+       {
+               .mask = XFS_AG_GEOM_SICK_CNTBT,
+               .descr = "free space by length btree",
+       },
+       {
+               .mask = XFS_AG_GEOM_SICK_INOBT,
+               .descr = "inode btree",
+       },
+       {
+               .mask = XFS_AG_GEOM_SICK_FINOBT,
+               .descr = "free inode btree",
+               .has_fn = has_finobt,
+       },
+       {
+               .mask = XFS_AG_GEOM_SICK_RMAPBT,
+               .descr = "reverse mappings btree",
+               .has_fn = has_rmapbt,
+       },
+       {
+               .mask = XFS_AG_GEOM_SICK_REFCNTBT,
+               .descr = "reference count btree",
+               .has_fn = has_reflink,
+       },
+       {0},
+};
+
+static const struct flag_map inode_flags[] = {
+       {
+               .mask = XFS_BS_SICK_INODE,
+               .descr = "inode core",
+       },
+       {
+               .mask = XFS_BS_SICK_BMBTD,
+               .descr = "data fork",
+       },
+       {
+               .mask = XFS_BS_SICK_BMBTA,
+               .descr = "extended attribute fork",
+       },
+       {
+               .mask = XFS_BS_SICK_BMBTC,
+               .descr = "copy on write fork",
+       },
+       {
+               .mask = XFS_BS_SICK_DIR,
+               .descr = "directory",
+       },
+       {
+               .mask = XFS_BS_SICK_XATTR,
+               .descr = "extended attributes",
+       },
+       {
+               .mask = XFS_BS_SICK_SYMLINK,
+               .descr = "symbolic link target",
+       },
+       {
+               .mask = XFS_BS_SICK_PARENT,
+               .descr = "parent pointers",
+       },
+       {0},
+};
+
+/* Convert a flag mask to a report. */
+static void
+report_sick(
+       const char                      *descr,
+       const struct flag_map           *maps,
+       unsigned int                    sick,
+       unsigned int                    checked)
+{
+       const struct flag_map           *f;
+       bool                            bad;
+
+       for (f = maps; f->mask != 0; f++) {
+               if (f->has_fn && !f->has_fn(&file->xfd.fsgeom))
+                       continue;
+               if (!(checked & f->mask))
+                       continue;
+               reported++;
+               bad = sick & f->mask;
+               if (!bad && quiet)
+                       continue;
+               printf("%s %s: %s\n", descr, _(f->descr),
+                               bad ? _("unhealthy") : _("ok"));
+       }
+}
+
+/* Report on an AG's health. */
+static int
+report_ag_sick(
+       xfs_agnumber_t          agno)
+{
+       struct xfs_ag_geometry  ageo = { 0 };
+       char                    descr[256];
+       int                     ret;
+
+       ret = xfrog_ag_geometry(file->xfd.fd, agno, &ageo);
+       if (ret) {
+               errno = ret;
+               perror("ag_geometry");
+               return 1;
+       }
+       snprintf(descr, sizeof(descr) - 1, _("AG %u"), agno);
+       report_sick(descr, ag_flags, ageo.ag_sick, ageo.ag_checked);
+       return 0;
+}
+
+/* Report on an inode's health. */
+static int
+report_inode_health(
+       unsigned long long      ino,
+       const char              *descr)
+{
+       struct xfs_bstat        bs;
+       char                    d[256];
+       int                     ret;
+
+       if (!descr) {
+               snprintf(d, sizeof(d) - 1, _("inode %llu"), ino);
+               descr = d;
+       }
+
+       ret = xfrog_bulkstat_single(&file->xfd, ino, &bs);
+       if (ret) {
+               errno = ret;
+               perror(descr);
+               return 1;
+       }
+
+       report_sick(descr, inode_flags, bs.bs_sick, bs.bs_checked);
+       return 0;
+}
+
+/* Report on a file's health. */
+static int
+report_file_health(
+       const char      *path)
+{
+       struct stat     stata, statb;
+       int             ret;
+
+       ret = lstat(path, &statb);
+       if (ret) {
+               perror(path);
+               return 1;
+       }
+
+       ret = fstat(file->xfd.fd, &stata);
+       if (ret) {
+               perror(file->name);
+               return 1;
+       }
+
+       if (stata.st_dev != statb.st_dev) {
+               fprintf(stderr, _("%s: not on the open filesystem"), path);
+               return 1;
+       }
+
+       return report_inode_health(statb.st_ino, path);
+}
+
+#define BULKSTAT_NR            (128)
+
+/*
+ * Report on all files' health for a given @agno.  If @agno is NULLAGNUMBER,
+ * report on all files in the filesystem.
+ */
+static int
+report_bulkstat_health(
+       xfs_agnumber_t          agno)
+{
+       struct xfs_bstat        bstat[BULKSTAT_NR];
+       char                    descr[256];
+       uint64_t                startino = 0;
+       uint64_t                lastino = -1ULL;
+       uint32_t                ocount;
+       uint32_t                i;
+       int                     error;
+
+       if (agno != NULLAGNUMBER) {
+               startino = cvt_agino_to_ino(&file->xfd, agno, 0);
+               lastino = cvt_agino_to_ino(&file->xfd, agno + 1, 0) - 1;
+       }
+
+       do {
+               error = xfrog_bulkstat(&file->xfd, &startino, BULKSTAT_NR,
+                               bstat, &ocount);
+               if (error)
+                       break;
+               for (i = 0; i < ocount; i++) {
+                       if (bstat[i].bs_ino > lastino)
+                               goto out;
+                       snprintf(descr, sizeof(descr) - 1, _("inode %llu"),
+                                       bstat[i].bs_ino);
+                       report_sick(descr, inode_flags, bstat[i].bs_sick,
+                                       bstat[i].bs_checked);
+               }
+       } while (ocount > 0);
+
+       if (error) {
+               errno = error;
+               perror("bulkstat");
+       }
+out:
+       return error;
+}
+
+#define OPT_STRING ("a:cfi:q")
+
+/* Report on health problems in XFS filesystem. */
+static int
+health_f(
+       int                     argc,
+       char                    **argv)
+{
+       unsigned long long      x;
+       xfs_agnumber_t          agno;
+       bool                    default_report = true;
+       int                     c;
+       int                     ret;
+
+       reported = 0;
+
+       if (file->xfd.fsgeom.version != XFS_FSOP_GEOM_VERSION_V5) {
+               perror("health");
+               return 1;
+       }
+
+       /* Set our reporting options appropriately in the first pass. */
+       while ((c = getopt(argc, argv, OPT_STRING)) != EOF) {
+               switch (c) {
+               case 'a':
+                       default_report = false;
+                       errno = 0;
+                       x = strtoll(optarg, NULL, 10);
+                       if (!errno && x >= NULLAGNUMBER)
+                               errno = ERANGE;
+                       if (errno) {
+                               perror("ag health");
+                               return 1;
+                       }
+                       break;
+               case 'c':
+                       comprehensive = true;
+                       break;
+               case 'f':
+                       default_report = false;
+                       break;
+               case 'i':
+                       default_report = false;
+                       errno = 0;
+                       x = strtoll(optarg, NULL, 10);
+                       if (errno) {
+                               perror("inode health");
+                               return 1;
+                       }
+                       break;
+               case 'q':
+                       quiet = true;
+                       break;
+               default:
+                       return command_usage(&health_cmd);
+               }
+       }
+       if (optind < argc)
+               default_report = false;
+
+       /* Reparse arguments, this time for reporting actions. */
+       optind = 1;
+       while ((c = getopt(argc, argv, OPT_STRING)) != EOF) {
+               switch (c) {
+               case 'a':
+                       agno = strtoll(optarg, NULL, 10);
+                       ret = report_ag_sick(agno);
+                       if (!ret && comprehensive)
+                               ret = report_bulkstat_health(agno);
+                       if (ret)
+                               return 1;
+                       break;
+               case 'f':
+                       report_sick(_("filesystem"), fs_flags,
+                                       file->xfd.fsgeom.sick,
+                                       file->xfd.fsgeom.checked);
+                       if (comprehensive) {
+                               ret = report_bulkstat_health(NULLAGNUMBER);
+                               if (ret)
+                                       return 1;
+                       }
+                       break;
+               case 'i':
+                       x = strtoll(optarg, NULL, 10);
+                       ret = report_inode_health(x, NULL);
+                       if (ret)
+                               return 1;
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       for (c = optind; c < argc; c++) {
+               ret = report_file_health(argv[c]);
+               if (ret)
+                       return 1;
+       }
+
+       /* No arguments gets us a summary of fs state. */
+       if (default_report) {
+               report_sick(_("filesystem"), fs_flags, file->xfd.fsgeom.sick,
+                               file->xfd.fsgeom.checked);
+
+               for (agno = 0; agno < file->xfd.fsgeom.agcount; agno++) {
+                       ret = report_ag_sick(agno);
+                       if (ret)
+                               return 1;
+               }
+               if (comprehensive) {
+                       ret = report_bulkstat_health(NULLAGNUMBER);
+                       if (ret)
+                               return 1;
+               }
+       }
+
+       if (!reported) {
+               fprintf(stderr,
+_("Health status has not been collected for this filesystem.\n"));
+               fprintf(stderr,
+_("Please run xfs_scrub(8) to remedy this situation.\n"));
+       }
+
+       return 0;
+}
+
+static void
+health_help(void)
+{
+       printf(_(
+"\n"
+"Report all observed filesystem health problems.\n"
+"\n"
+" -a agno  -- Report health of the given allocation group.\n"
+" -c       -- Report on the health of all inodes.\n"
+" -f       -- Report health of the overall filesystem.\n"
+" -i inum  -- Report health of a given inode number.\n"
+" -q       -- Only report unhealthy metadata.\n"
+" paths    -- Report health of the given file path.\n"
+"\n"));
+
+}
+
+static cmdinfo_t health_cmd = {
+       .name = "health",
+       .cfunc = health_f,
+       .argmin = 0,
+       .argmax = -1,
+       .args = "[-a agno] [-c] [-f] [-i inum] [-q] [paths]",
+       .flags = CMD_FLAG_ONESHOT,
+       .help = health_help,
+};
+
+void
+health_init(void)
+{
+       health_cmd.oneline = _("Report observed XFS health problems."),
+       add_command(&health_cmd);
+}
index 4afdb3866c35f93fd62d18b83cd57de63851961a..cf1ff3cbb0ee8d73e3e38c3c92551333f3f80ef2 100644 (file)
@@ -34,6 +34,7 @@ init_commands(void)
        quit_init();
        trim_init();
        freesp_init();
+       health_init();
 }
 
 static int
index 2c26884a024825f4e52a7bdda105da99cdf76bb1..723209edd9984b8ee558d69f08f8bd5bfe087e07 100644 (file)
@@ -32,5 +32,6 @@ extern void   freesp_init(void);
 # define freesp_init() do { } while (0)
 #endif
 extern void    info_init(void);
+extern void    health_init(void);
 
 #endif /* XFS_SPACEMAN_SPACE_H_ */