* Defer all the repairs until phase 4, being careful about locking since the
* inode scrub threads are not per-AG.
*/
-static void
+static int
defer_inode_repair(
- struct scrub_inode_ctx *ictx,
- xfs_agnumber_t agno,
- struct action_list *alist)
+ struct scrub_inode_ctx *ictx,
+ const struct xfs_bulkstat *bstat,
+ struct scrub_item *sri)
{
- if (alist->nr == 0)
- return;
+ struct action_item *aitem = NULL;
+ xfs_agnumber_t agno;
+ int ret;
+
+ ret = repair_item_to_action_item(ictx->ctx, sri, &aitem);
+ if (ret || !aitem)
+ return ret;
+ agno = cvt_ino_to_agno(&ictx->ctx->mnt, bstat->bs_ino);
pthread_mutex_lock(&ictx->locks[agno]);
- action_list_defer(ictx->ctx, agno, alist);
+ action_list_add(&ictx->ctx->action_lists[agno], aitem);
pthread_mutex_unlock(&ictx->locks[agno]);
+ return 0;
}
-/* Run repair actions now and defer unfinished items for later. */
+/* Run repair actions now and leave unfinished items for later. */
static int
try_inode_repair(
- struct scrub_inode_ctx *ictx,
- int fd,
- xfs_agnumber_t agno,
- struct action_list *alist)
+ struct scrub_inode_ctx *ictx,
+ struct scrub_item *sri,
+ int fd,
+ const struct xfs_bulkstat *bstat)
{
- int ret;
-
/*
* If at the start of phase 3 we already had ag/rt metadata repairs
* queued up for phase 4, leave the action list untouched so that file
- * metadata repairs will be deferred in scan order until phase 4.
+ * metadata repairs will be deferred until phase 4.
*/
if (ictx->always_defer_repairs)
return 0;
- ret = action_list_process(ictx->ctx, fd, alist,
- XRM_REPAIR_ONLY | XRM_NOPROGRESS);
- if (ret)
- return ret;
-
- defer_inode_repair(ictx, agno, alist);
- return 0;
+ /*
+ * Try to repair the file metadata. Unfixed metadata will remain in
+ * the scrub item state to be queued as a single action item.
+ */
+ return repair_file_corruption(ictx->ctx, sri, fd);
}
/* Verify the contents, xattrs, and extent maps of an inode. */
struct scrub_item sri;
struct scrub_inode_ctx *ictx = arg;
struct ptcounter *icount = ictx->icount;
- xfs_agnumber_t agno;
int fd = -1;
int error;
scrub_item_init_file(&sri, bstat);
action_list_init(&alist);
- agno = cvt_ino_to_agno(&ctx->mnt, bstat->bs_ino);
background_sleep();
/*
if (error)
goto out;
- error = try_inode_repair(ictx, fd, agno, &alist);
+ error = try_inode_repair(ictx, &sri, fd, bstat);
if (error)
goto out;
if (error)
goto out;
- error = try_inode_repair(ictx, fd, agno, &alist);
+ error = try_inode_repair(ictx, &sri, fd, bstat);
if (error)
goto out;
goto out;
/* Try to repair the file while it's open. */
- error = try_inode_repair(ictx, fd, agno, &alist);
+ error = try_inode_repair(ictx, &sri, fd, bstat);
if (error)
goto out;
progress_add(1);
if (!error && !ictx->aborted)
- defer_inode_repair(ictx, agno, &alist);
+ error = defer_inode_repair(ictx, bstat, &sri);
if (fd >= 0) {
int err2;
xfs_repair_metadata(
struct scrub_ctx *ctx,
struct xfs_fd *xfdp,
- struct action_item *aitem,
+ unsigned int scrub_type,
+ struct scrub_item *sri,
unsigned int repair_flags)
{
struct xfs_scrub_metadata meta = { 0 };
DEFINE_DESCR(dsc, ctx, format_scrub_descr);
int error;
- assert(aitem->type < XFS_SCRUB_TYPE_NR);
+ assert(scrub_type < XFS_SCRUB_TYPE_NR);
assert(!debug_tweak_on("XFS_SCRUB_NO_KERNEL"));
- meta.sm_type = aitem->type;
- meta.sm_flags = aitem->flags | XFS_SCRUB_IFLAG_REPAIR;
+ meta.sm_type = scrub_type;
+ meta.sm_flags = XFS_SCRUB_IFLAG_REPAIR;
if (use_force_rebuild)
meta.sm_flags |= XFS_SCRUB_IFLAG_FORCE_REBUILD;
- switch (xfrog_scrubbers[aitem->type].group) {
+ switch (xfrog_scrubbers[scrub_type].group) {
case XFROG_SCRUB_GROUP_AGHEADER:
case XFROG_SCRUB_GROUP_PERAG:
- meta.sm_agno = aitem->agno;
+ meta.sm_agno = sri->sri_agno;
break;
case XFROG_SCRUB_GROUP_INODE:
- meta.sm_ino = aitem->ino;
- meta.sm_gen = aitem->gen;
+ meta.sm_ino = sri->sri_ino;
+ meta.sm_gen = sri->sri_gen;
break;
default:
break;
return CHECK_RETRY;
memcpy(&oldm, &meta, sizeof(oldm));
+ oldm.sm_flags = sri->sri_state[scrub_type] & SCRUB_ITEM_REPAIR_ANY;
descr_set(&dsc, &oldm);
- if (needs_repair(&meta))
+ if (needs_repair(&oldm))
str_info(ctx, descr_render(&dsc), _("Attempting repair."));
else if (debug || verbose)
str_info(ctx, descr_render(&dsc),
* it done and move on.
*/
if (is_unoptimized(&oldm) ||
- debug_tweak_on("XFS_SCRUB_FORCE_REPAIR"))
+ debug_tweak_on("XFS_SCRUB_FORCE_REPAIR")) {
+ scrub_item_clean_state(sri, scrub_type);
return CHECK_DONE;
+ }
/*
* If we're in no-complain mode, requeue the check for
* later. It's possible that an error in another
/* Kernel doesn't know how to repair this? */
str_corrupt(ctx, descr_render(&dsc),
_("Don't know how to fix; offline repair required."));
+ scrub_item_clean_state(sri, scrub_type);
return CHECK_DONE;
case EROFS:
/* Read-only filesystem, can't fix. */
return CHECK_ABORT;
case ENOENT:
/* Metadata not present, just skip it. */
+ scrub_item_clean_state(sri, scrub_type);
return CHECK_DONE;
case ENOMEM:
case ENOSPC:
/* Don't care if preen fails due to low resources. */
- if (is_unoptimized(&oldm) && !needs_repair(&oldm))
+ if (is_unoptimized(&oldm) && !needs_repair(&oldm)) {
+ scrub_item_clean_state(sri, scrub_type);
return CHECK_DONE;
+ }
fallthrough;
default:
/*
- * Operational error. If the caller doesn't want us
- * to complain about repair failures, tell the caller
- * to requeue the repair for later and don't say a
- * thing. Otherwise, print error and bail out.
+ * Operational error. If the caller doesn't want us to
+ * complain about repair failures, tell the caller to requeue
+ * the repair for later and don't say a thing. Otherwise,
+ * print an error, mark the item clean because we're done with
+ * trying to repair it, and bail out.
*/
if (!(repair_flags & XRM_FINAL_WARNING))
return CHECK_RETRY;
str_liberror(ctx, error, descr_render(&dsc));
+ scrub_item_clean_state(sri, scrub_type);
return CHECK_DONE;
}
record_preen(ctx, descr_render(&dsc),
_("Optimization successful."));
}
+
+ scrub_item_clean_state(sri, scrub_type);
return CHECK_DONE;
}
/*
* Prioritize action items in order of how long we can wait.
- * 0 = do it now, 10000 = do it later.
*
* To minimize the amount of repair work, we want to prioritize metadata
* objects by perceived corruptness. If CORRUPT is set, the fields are
* in order.
*/
-/* Sort action items in severity order. */
-static int
-PRIO(
- const struct action_item *aitem,
- int order)
-{
- if (aitem->flags & XFS_SCRUB_OFLAG_CORRUPT)
- return order;
- else if (aitem->flags & XFS_SCRUB_OFLAG_XCORRUPT)
- return 100 + order;
- else if (aitem->flags & XFS_SCRUB_OFLAG_XFAIL)
- return 200 + order;
- else if (aitem->flags & XFS_SCRUB_OFLAG_PREEN)
- return 300 + order;
- abort();
-}
-
-/* Sort the repair items in dependency order. */
-static int
-xfs_action_item_priority(
- const struct action_item *aitem)
-{
- switch (aitem->type) {
- case XFS_SCRUB_TYPE_SB:
- case XFS_SCRUB_TYPE_AGF:
- case XFS_SCRUB_TYPE_AGFL:
- case XFS_SCRUB_TYPE_AGI:
- case XFS_SCRUB_TYPE_BNOBT:
- case XFS_SCRUB_TYPE_CNTBT:
- case XFS_SCRUB_TYPE_INOBT:
- case XFS_SCRUB_TYPE_FINOBT:
- case XFS_SCRUB_TYPE_REFCNTBT:
- case XFS_SCRUB_TYPE_RMAPBT:
- case XFS_SCRUB_TYPE_INODE:
- case XFS_SCRUB_TYPE_BMBTD:
- case XFS_SCRUB_TYPE_BMBTA:
- case XFS_SCRUB_TYPE_BMBTC:
- return PRIO(aitem, aitem->type - 1);
- case XFS_SCRUB_TYPE_DIR:
- case XFS_SCRUB_TYPE_XATTR:
- case XFS_SCRUB_TYPE_SYMLINK:
- case XFS_SCRUB_TYPE_PARENT:
- return PRIO(aitem, XFS_SCRUB_TYPE_DIR);
- case XFS_SCRUB_TYPE_RTBITMAP:
- case XFS_SCRUB_TYPE_RTSUM:
- return PRIO(aitem, XFS_SCRUB_TYPE_RTBITMAP);
- case XFS_SCRUB_TYPE_UQUOTA:
- case XFS_SCRUB_TYPE_GQUOTA:
- case XFS_SCRUB_TYPE_PQUOTA:
- return PRIO(aitem, XFS_SCRUB_TYPE_UQUOTA);
- case XFS_SCRUB_TYPE_QUOTACHECK:
- /* This should always go after [UGP]QUOTA no matter what. */
- return PRIO(aitem, aitem->type);
- case XFS_SCRUB_TYPE_FSCOUNTERS:
- /* This should always go after AG headers no matter what. */
- return PRIO(aitem, INT_MAX);
- }
- abort();
-}
-
-/* Make sure that btrees get repaired before headers. */
-static int
-xfs_action_item_compare(
- void *priv,
- const struct list_head *a,
- const struct list_head *b)
-{
- const struct action_item *ra;
- const struct action_item *rb;
-
- ra = container_of(a, struct action_item, list);
- rb = container_of(b, struct action_item, list);
-
- return xfs_action_item_priority(ra) - xfs_action_item_priority(rb);
-}
+struct action_item {
+ struct list_head list;
+ struct scrub_item sri;
+};
/*
* Figure out which AG metadata must be fixed before we can move on
* to the inode scan.
*/
void
-action_list_find_mustfix(
- struct action_list *alist,
- struct action_list *immediate_alist)
+repair_item_mustfix(
+ struct scrub_item *sri,
+ struct scrub_item *fix_now)
{
- struct action_item *n;
- struct action_item *aitem;
+ unsigned int scrub_type;
- list_for_each_entry_safe(aitem, n, &alist->list, list) {
- if (!(aitem->flags & XFS_SCRUB_OFLAG_CORRUPT))
+ assert(sri->sri_agno != -1U);
+ scrub_item_init_ag(fix_now, sri->sri_agno);
+
+ foreach_scrub_type(scrub_type) {
+ if (!(sri->sri_state[scrub_type] & SCRUB_ITEM_CORRUPT))
continue;
- switch (aitem->type) {
+
+ switch (scrub_type) {
case XFS_SCRUB_TYPE_AGI:
case XFS_SCRUB_TYPE_FINOBT:
case XFS_SCRUB_TYPE_INOBT:
- alist->nr--;
- list_move_tail(&aitem->list, &immediate_alist->list);
- immediate_alist->nr++;
+ fix_now->sri_state[scrub_type] |= SCRUB_ITEM_CORRUPT;
break;
}
}
/* Determine if primary or secondary metadata are inconsistent. */
unsigned int
-action_list_difficulty(
- const struct action_list *alist)
+repair_item_difficulty(
+ const struct scrub_item *sri)
{
- struct action_item *aitem, *n;
- unsigned int ret = 0;
+ unsigned int scrub_type;
+ unsigned int ret = 0;
- list_for_each_entry_safe(aitem, n, &alist->list, list) {
- if (!(aitem->flags & (XFS_SCRUB_OFLAG_CORRUPT |
- XFS_SCRUB_OFLAG_XCORRUPT |
- XFS_SCRUB_OFLAG_XFAIL)))
+ foreach_scrub_type(scrub_type) {
+ if (!(sri->sri_state[scrub_type] & (XFS_SCRUB_OFLAG_CORRUPT |
+ XFS_SCRUB_OFLAG_XCORRUPT |
+ XFS_SCRUB_OFLAG_XFAIL)))
continue;
- switch (aitem->type) {
+ switch (scrub_type) {
case XFS_SCRUB_TYPE_RMAPBT:
ret |= REPAIR_DIFFICULTY_SECONDARY;
break;
alist->sorted = false;
}
-/* Number of repairs in this list. */
+/* Number of pending repairs in this list. */
unsigned long long
action_list_length(
struct action_list *alist)
{
- return alist->nr;
-};
+ struct action_item *aitem;
+ unsigned long long ret = 0;
+
+ list_for_each_entry(aitem, &alist->list, list)
+ ret += repair_item_count_needsrepair(&aitem->sri);
+
+ return ret;
+}
/* Add to the list of repairs. */
void
alist->sorted = false;
}
-/* Splice two repair lists. */
-void
-action_list_splice(
- struct action_list *dest,
- struct action_list *src)
-{
- if (src->nr == 0)
- return;
-
- list_splice_tail_init(&src->list, &dest->list);
- dest->nr += src->nr;
- src->nr = 0;
- dest->sorted = false;
-}
-
/* Repair everything on this list. */
int
action_list_process(
struct scrub_ctx *ctx,
- int fd,
struct action_list *alist,
unsigned int repair_flags)
{
- struct xfs_fd xfd;
- struct xfs_fd *xfdp = &ctx->mnt;
struct action_item *aitem;
struct action_item *n;
- enum check_outcome fix;
+ int ret;
+
+ list_for_each_entry_safe(aitem, n, &alist->list, list) {
+ if (scrub_excessive_errors(ctx))
+ return ECANCELED;
+
+ ret = repair_item(ctx, &aitem->sri, repair_flags);
+ if (ret)
+ break;
+
+ if (repair_item_count_needsrepair(&aitem->sri) == 0) {
+ list_del(&aitem->list);
+ free(aitem);
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * For a given filesystem object, perform all repairs of a given class
+ * (corrupt, xcorrupt, xfail, preen) if the repair item says it's needed.
+ */
+static int
+repair_item_class(
+ struct scrub_ctx *ctx,
+ struct scrub_item *sri,
+ int override_fd,
+ uint8_t repair_mask,
+ unsigned int flags)
+{
+ struct xfs_fd xfd;
+ struct xfs_fd *xfdp = &ctx->mnt;
+ unsigned int scrub_type;
+
+ if (ctx->mode < SCRUB_MODE_REPAIR)
+ return 0;
/*
* If the caller passed us a file descriptor for a scrub, use it
* instead of scrub-by-handle because this enables the kernel to skip
* costly inode btree lookups.
*/
- if (fd >= 0) {
+ if (override_fd >= 0) {
memcpy(&xfd, xfdp, sizeof(xfd));
- xfd.fd = fd;
+ xfd.fd = override_fd;
xfdp = &xfd;
}
- if (!alist->sorted) {
- list_sort(NULL, &alist->list, xfs_action_item_compare);
- alist->sorted = true;
- }
+ foreach_scrub_type(scrub_type) {
+ enum check_outcome fix;
- list_for_each_entry_safe(aitem, n, &alist->list, list) {
- fix = xfs_repair_metadata(ctx, xfdp, aitem, repair_flags);
+ if (scrub_excessive_errors(ctx))
+ return ECANCELED;
+
+ if (!(sri->sri_state[scrub_type] & repair_mask))
+ continue;
+
+ fix = xfs_repair_metadata(ctx, xfdp, scrub_type, sri, flags);
switch (fix) {
case CHECK_DONE:
- if (!(repair_flags & XRM_NOPROGRESS))
+ if (!(flags & XRM_NOPROGRESS))
progress_add(1);
- alist->nr--;
- list_del(&aitem->list);
- free(aitem);
continue;
case CHECK_ABORT:
return ECANCELED;
}
}
- if (scrub_excessive_errors(ctx))
- return ECANCELED;
return 0;
}
-/* Defer all the repairs until phase 4. */
-void
-action_list_defer(
- struct scrub_ctx *ctx,
- xfs_agnumber_t agno,
- struct action_list *alist)
+/*
+ * Repair all parts (i.e. scrub types) of this filesystem object for which
+ * corruption has been observed directly. Other types of repair work (fixing
+ * cross referencing problems and preening) are deferred.
+ *
+ * This function should only be called to perform spot repairs of fs objects
+ * during phase 2 and 3 while we still have open handles to those objects.
+ */
+int
+repair_item_corruption(
+ struct scrub_ctx *ctx,
+ struct scrub_item *sri)
{
- ASSERT(agno < ctx->mnt.fsgeom.agcount);
+ return repair_item_class(ctx, sri, -1, SCRUB_ITEM_CORRUPT,
+ XRM_REPAIR_ONLY | XRM_NOPROGRESS);
+}
- action_list_splice(&ctx->action_lists[agno], alist);
+/* Repair all parts of this file, similar to repair_item_corruption. */
+int
+repair_file_corruption(
+ struct scrub_ctx *ctx,
+ struct scrub_item *sri,
+ int override_fd)
+{
+ return repair_item_class(ctx, sri, override_fd, SCRUB_ITEM_CORRUPT,
+ XRM_REPAIR_ONLY | XRM_NOPROGRESS);
}
-/* Run actions now and defer unfinished items for later. */
+/*
+ * Repair everything in this filesystem object that needs it. This includes
+ * cross-referencing and preening.
+ */
int
-action_list_process_or_defer(
- struct scrub_ctx *ctx,
- xfs_agnumber_t agno,
- struct action_list *alist)
+repair_item(
+ struct scrub_ctx *ctx,
+ struct scrub_item *sri,
+ unsigned int flags)
{
- int ret;
+ int ret;
- ret = action_list_process(ctx, -1, alist,
- XRM_REPAIR_ONLY | XRM_NOPROGRESS);
+ ret = repair_item_class(ctx, sri, -1, SCRUB_ITEM_CORRUPT, flags);
+ if (ret)
+ return ret;
+
+ ret = repair_item_class(ctx, sri, -1, SCRUB_ITEM_XCORRUPT, flags);
if (ret)
return ret;
- action_list_defer(ctx, agno, alist);
+ ret = repair_item_class(ctx, sri, -1, SCRUB_ITEM_XFAIL, flags);
+ if (ret)
+ return ret;
+
+ return repair_item_class(ctx, sri, -1, SCRUB_ITEM_PREEN, flags);
+}
+
+/* Create an action item around a scrub item that needs repairs. */
+int
+repair_item_to_action_item(
+ struct scrub_ctx *ctx,
+ const struct scrub_item *sri,
+ struct action_item **aitemp)
+{
+ struct action_item *aitem;
+
+ if (repair_item_count_needsrepair(sri) == 0)
+ return 0;
+
+ aitem = malloc(sizeof(struct action_item));
+ if (!aitem) {
+ int error = errno;
+
+ str_liberror(ctx, error, _("creating repair action item"));
+ return error;
+ }
+
+ INIT_LIST_HEAD(&aitem->list);
+ memcpy(&aitem->sri, sri, sizeof(struct scrub_item));
+
+ *aitemp = aitem;
+ return 0;
+}
+
+/* Defer all the repairs until phase 4. */
+int
+repair_item_defer(
+ struct scrub_ctx *ctx,
+ const struct scrub_item *sri)
+{
+ struct action_item *aitem = NULL;
+ unsigned int agno;
+ int error;
+
+ error = repair_item_to_action_item(ctx, sri, &aitem);
+ if (error || !aitem)
+ return error;
+
+ if (sri->sri_agno != -1U)
+ agno = sri->sri_agno;
+ else if (sri->sri_ino != -1ULL && sri->sri_gen != -1U)
+ agno = cvt_ino_to_agno(&ctx->mnt, sri->sri_ino);
+ else
+ agno = 0;
+ ASSERT(agno < ctx->mnt.fsgeom.agcount);
+
+ action_list_add(&ctx->action_lists[agno], aitem);
return 0;
}