]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
xfs_scrub: retry incomplete repairs
authorDarrick J. Wong <djwong@kernel.org>
Mon, 29 Jul 2024 23:23:06 +0000 (16:23 -0700)
committerDarrick J. Wong <djwong@kernel.org>
Tue, 30 Jul 2024 00:01:07 +0000 (17:01 -0700)
If a repair says it didn't do anything on account of not being able to
complete a scan of the metadata, retry the repair a few times; if even
that doesn't work, we can delay it to phase 4.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
scrub/repair.c
scrub/scrub.c
scrub/scrub_private.h

index 9b4b5d01626ed253db835902fb77f1a36d33cedf..2b863bb41958bcbf67553b027445cfe05f9b6c72 100644 (file)
@@ -58,6 +58,7 @@ xfs_repair_metadata(
        struct xfs_scrub_metadata       oldm;
        DEFINE_DESCR(dsc, ctx, format_scrub_descr);
        bool                            repair_only;
+       unsigned int                    tries = 0;
        int                             error;
 
        /*
@@ -99,6 +100,7 @@ xfs_repair_metadata(
                str_info(ctx, descr_render(&dsc),
                                _("Attempting optimization."));
 
+retry:
        error = -xfrog_scrub_metadata(xfdp, &meta);
        switch (error) {
        case 0:
@@ -179,9 +181,20 @@ _("Read-only filesystem; cannot make changes."));
                return CHECK_DONE;
        }
 
+       /*
+        * If the kernel says the repair was incomplete or that there was a
+        * cross-referencing discrepancy but no obvious corruption, we'll try
+        * the repair again, just in case the fs was busy.  Only retry so many
+        * times.
+        */
+       if (want_retry(&meta) && tries < 10) {
+               tries++;
+               goto retry;
+       }
+
        if (repair_flags & XRM_FINAL_WARNING)
                scrub_warn_incomplete_scrub(ctx, &dsc, &meta);
-       if (needs_repair(&meta)) {
+       if (needs_repair(&meta) || is_incomplete(&meta)) {
                /*
                 * Still broken; if we've been told not to complain then we
                 * just requeue this and try again later.  Otherwise we
index 5c14ed2092eba725f19c57b4f9aefd41b52026eb..5fc549f97285c1d696d8f3df635fd543eaf96e6f 100644 (file)
@@ -137,8 +137,7 @@ _("Filesystem is shut down, aborting."));
         * we'll try the scan again, just in case the fs was busy.
         * Only retry so many times.
         */
-       if (tries < 10 && (is_incomplete(meta) ||
-                          (xref_disagrees(meta) && !is_corrupt(meta)))) {
+       if (want_retry(meta) && tries < 10) {
                tries++;
                goto retry;
        }
index 08b9130cbc9e3790d7faa72f19813671f91e9996..53372e1f322bc945a511be968c892ac50ed5933b 100644 (file)
@@ -49,6 +49,16 @@ static inline bool needs_repair(struct xfs_scrub_metadata *sm)
        return is_corrupt(sm) || xref_disagrees(sm);
 }
 
+/*
+ * We want to retry an operation if the kernel says it couldn't complete the
+ * scan/repair; or if there were cross-referencing problems but the object was
+ * not obviously corrupt.
+ */
+static inline bool want_retry(struct xfs_scrub_metadata *sm)
+{
+       return is_incomplete(sm) || (xref_disagrees(sm) && !is_corrupt(sm));
+}
+
 void scrub_warn_incomplete_scrub(struct scrub_ctx *ctx, struct descr *dsc,
                struct xfs_scrub_metadata *meta);