#include <sys/statvfs.h>
#include "platform_defs.h"
#include "xfs_arch.h"
-#include "xfs_format.h"
#include "handle.h"
-#include "path.h"
-#include "workqueue.h"
+#include "libfrog/paths.h"
+#include "libfrog/workqueue.h"
#include "xfs_scrub.h"
#include "common.h"
#include "inodes.h"
+#include "descr.h"
+#include "libfrog/fsgeom.h"
+#include "libfrog/bulkstat.h"
/*
* Iterate a range of inodes.
*/
/*
- * Did we get exactly the inodes we expected? If not, load them one at a
- * time (or fake it) into the bulkstat data.
+ * Run bulkstat on an entire inode allocation group, then check that we got
+ * exactly the inodes we expected. If not, load them one at a time (or fake
+ * it) into the bulkstat data.
*/
static void
-xfs_iterate_inodes_range_check(
+bulkstat_for_inumbers(
struct scrub_ctx *ctx,
- struct xfs_inogrp *inogrp,
- struct xfs_bstat *bstat)
+ struct descr *dsc,
+ const struct xfs_inumbers *inumbers,
+ struct xfs_bulkstat_req *breq)
{
- struct xfs_fsop_bulkreq onereq = {0};
- struct xfs_bstat *bs;
- __u64 oneino;
- __s32 onelen = 0;
+ struct xfs_bulkstat *bstat = breq->bulkstat;
+ struct xfs_bulkstat *bs;
int i;
int error;
- onereq.lastip = &oneino;
- onereq.icount = 1;
- onereq.ocount = &onelen;
+ /* First we try regular bulkstat, for speed. */
+ breq->hdr.ino = inumbers->xi_startino;
+ breq->hdr.icount = inumbers->xi_alloccount;
+ error = -xfrog_bulkstat(&ctx->mnt, breq);
+ if (error) {
+ char errbuf[DESCR_BUFSZ];
+
+ str_info(ctx, descr_render(dsc), "%s",
+ strerror_r(error, errbuf, DESCR_BUFSZ));
+ }
- for (i = 0, bs = bstat; i < XFS_INODES_PER_CHUNK; i++) {
- if (!(inogrp->xi_allocmask & (1ULL << i)))
+ /*
+ * Check each of the stats we got back to make sure we got the inodes
+ * we asked for.
+ */
+ for (i = 0, bs = bstat; i < LIBFROG_BULKSTAT_CHUNKSIZE; i++) {
+ if (!(inumbers->xi_allocmask & (1ULL << i)))
continue;
- if (bs->bs_ino == inogrp->xi_startino + i) {
+ if (bs->bs_ino == inumbers->xi_startino + i) {
bs++;
continue;
}
/* Load the one inode. */
- oneino = inogrp->xi_startino + i;
- onereq.ubuffer = bs;
- error = ioctl(ctx->mnt_fd, XFS_IOC_FSBULKSTAT_SINGLE,
- &onereq);
- if (error || bs->bs_ino != inogrp->xi_startino + i) {
- memset(bs, 0, sizeof(struct xfs_bstat));
- bs->bs_ino = inogrp->xi_startino + i;
+ error = -xfrog_bulkstat_single(&ctx->mnt,
+ inumbers->xi_startino + i, 0, bs);
+ if (error || bs->bs_ino != inumbers->xi_startino + i) {
+ memset(bs, 0, sizeof(struct xfs_bulkstat));
+ bs->bs_ino = inumbers->xi_startino + i;
bs->bs_blksize = ctx->mnt_sv.f_frsize;
}
bs++;
}
}
+/* BULKSTAT wrapper routines. */
+struct scan_inodes {
+ struct workqueue wq_bulkstat;
+ scrub_inode_iter_fn fn;
+ void *arg;
+ unsigned int nr_threads;
+ bool aborted;
+};
+
/*
- * Call into the filesystem for inode/bulkstat information and call our
- * iterator function. We'll try to fill the bulkstat information in batches,
- * but we also can detect iget failures.
+ * A single unit of inode scan work. This contains a pointer to the parent
+ * information, followed by an INUMBERS request structure, followed by a
+ * BULKSTAT request structure. The last two are VLAs, so we can't represent
+ * them here.
*/
-static bool
-xfs_iterate_inodes_range(
+struct scan_ichunk {
+ struct scan_inodes *si;
+};
+
+static inline struct xfs_inumbers_req *
+ichunk_to_inumbers(
+ struct scan_ichunk *ichunk)
+{
+ char *p = (char *)ichunk;
+
+ return (struct xfs_inumbers_req *)(p + sizeof(struct scan_ichunk));
+}
+
+static inline struct xfs_bulkstat_req *
+ichunk_to_bulkstat(
+ struct scan_ichunk *ichunk)
+{
+ char *p = (char *)ichunk_to_inumbers(ichunk);
+
+ return (struct xfs_bulkstat_req *)(p + XFS_INUMBERS_REQ_SIZE(1));
+}
+
+static inline int
+alloc_ichunk(
+ struct scan_inodes *si,
+ uint32_t agno,
+ uint64_t startino,
+ struct scan_ichunk **ichunkp)
+{
+ struct scan_ichunk *ichunk;
+ struct xfs_inumbers_req *ireq;
+ struct xfs_bulkstat_req *breq;
+
+ ichunk = calloc(1, sizeof(struct scan_ichunk) +
+ XFS_INUMBERS_REQ_SIZE(1) +
+ XFS_BULKSTAT_REQ_SIZE(LIBFROG_BULKSTAT_CHUNKSIZE));
+ if (!ichunk)
+ return -errno;
+
+ ichunk->si = si;
+
+ ireq = ichunk_to_inumbers(ichunk);
+ ireq->hdr.icount = 1;
+ ireq->hdr.ino = startino;
+ ireq->hdr.agno = agno;
+ ireq->hdr.flags |= XFS_BULK_IREQ_AGNO;
+
+ breq = ichunk_to_bulkstat(ichunk);
+ breq->hdr.icount = LIBFROG_BULKSTAT_CHUNKSIZE;
+
+ *ichunkp = ichunk;
+ return 0;
+}
+
+static int
+render_ino_from_bulkstat(
struct scrub_ctx *ctx,
- const char *descr,
- void *fshandle,
- uint64_t first_ino,
- uint64_t last_ino,
- xfs_inode_iter_fn fn,
+ char *buf,
+ size_t buflen,
+ void *data)
+{
+ struct xfs_bulkstat *bstat = data;
+
+ return scrub_render_ino_descr(ctx, buf, buflen, bstat->bs_ino,
+ bstat->bs_gen, NULL);
+}
+
+static int
+render_inumbers_from_agno(
+ struct scrub_ctx *ctx,
+ char *buf,
+ size_t buflen,
+ void *data)
+{
+ xfs_agnumber_t *agno = data;
+
+ return snprintf(buf, buflen, _("dev %d:%d AG %u inodes"),
+ major(ctx->fsinfo.fs_datadev),
+ minor(ctx->fsinfo.fs_datadev),
+ *agno);
+}
+
+/*
+ * Call BULKSTAT for information on a single chunk's worth of inodes and call
+ * our iterator function. We'll try to fill the bulkstat information in
+ * batches, but we also can detect iget failures.
+ */
+static void
+scan_ag_bulkstat(
+ struct workqueue *wq,
+ xfs_agnumber_t agno,
void *arg)
{
- struct xfs_fsop_bulkreq igrpreq = {0};
- struct xfs_fsop_bulkreq bulkreq = {0};
- struct xfs_handle handle;
- struct xfs_inogrp inogrp;
- struct xfs_bstat bstat[XFS_INODES_PER_CHUNK];
- char idescr[DESCR_BUFSZ];
- char buf[DESCR_BUFSZ];
- struct xfs_bstat *bs;
- __u64 igrp_ino;
- __u64 ino;
- __s32 bulklen = 0;
- __s32 igrplen = 0;
- bool moveon = true;
+ struct xfs_handle handle = { };
+ struct scrub_ctx *ctx = (struct scrub_ctx *)wq->wq_ctx;
+ struct scan_ichunk *ichunk = arg;
+ struct xfs_inumbers_req *ireq = ichunk_to_inumbers(ichunk);
+ struct xfs_bulkstat_req *breq = ichunk_to_bulkstat(ichunk);
+ struct scan_inodes *si = ichunk->si;
+ struct xfs_bulkstat *bs;
+ struct xfs_inumbers *inumbers = &ireq->inumbers[0];
+ uint64_t last_ino = 0;
int i;
int error;
int stale_count = 0;
+ DEFINE_DESCR(dsc_bulkstat, ctx, render_ino_from_bulkstat);
+ DEFINE_DESCR(dsc_inumbers, ctx, render_inumbers_from_agno);
+ descr_set(&dsc_inumbers, &agno);
- memset(bstat, 0, XFS_INODES_PER_CHUNK * sizeof(struct xfs_bstat));
- bulkreq.lastip = &ino;
- bulkreq.icount = XFS_INODES_PER_CHUNK;
- bulkreq.ubuffer = &bstat;
- bulkreq.ocount = &bulklen;
-
- igrpreq.lastip = &igrp_ino;
- igrpreq.icount = 1;
- igrpreq.ubuffer = &inogrp;
- igrpreq.ocount = &igrplen;
-
- memcpy(&handle.ha_fsid, fshandle, sizeof(handle.ha_fsid));
+ memcpy(&handle.ha_fsid, ctx->fshandle, sizeof(handle.ha_fsid));
handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
sizeof(handle.ha_fid.fid_len);
handle.ha_fid.fid_pad = 0;
- /* Find the inode chunk & alloc mask */
- igrp_ino = first_ino;
- error = ioctl(ctx->mnt_fd, XFS_IOC_FSINUMBERS, &igrpreq);
- while (!error && igrplen) {
- /* Load the inodes. */
- ino = inogrp.xi_startino - 1;
- bulkreq.icount = inogrp.xi_alloccount;
- error = ioctl(ctx->mnt_fd, XFS_IOC_FSBULKSTAT, &bulkreq);
- if (error)
- str_info(ctx, descr, "%s", strerror_r(errno,
- buf, DESCR_BUFSZ));
-
- xfs_iterate_inodes_range_check(ctx, &inogrp, bstat);
-
- /* Iterate all the inodes. */
- for (i = 0, bs = bstat; i < inogrp.xi_alloccount; i++, bs++) {
- if (bs->bs_ino > last_ino)
- goto out;
+retry:
+ bulkstat_for_inumbers(ctx, &dsc_inumbers, inumbers, breq);
- handle.ha_fid.fid_ino = bs->bs_ino;
- handle.ha_fid.fid_gen = bs->bs_gen;
- error = fn(ctx, &handle, bs, arg);
- switch (error) {
- case 0:
- break;
- case ESTALE:
- stale_count++;
- if (stale_count < 30) {
- igrp_ino = inogrp.xi_startino;
- goto igrp_retry;
- }
- snprintf(idescr, DESCR_BUFSZ, "inode %"PRIu64,
- (uint64_t)bs->bs_ino);
- str_info(ctx, idescr,
-_("Changed too many times during scan; giving up."));
- break;
- case XFS_ITERATE_INODES_ABORT:
- error = 0;
- /* fall thru */
- default:
- moveon = false;
- errno = error;
- goto err;
- }
- if (xfs_scrub_excessive_errors(ctx)) {
- moveon = false;
- goto out;
+ /* Iterate all the inodes. */
+ bs = &breq->bulkstat[0];
+ for (i = 0; !si->aborted && i < inumbers->xi_alloccount; i++, bs++) {
+ uint64_t scan_ino = bs->bs_ino;
+
+ /* ensure forward progress if we retried */
+ if (scan_ino < last_ino)
+ continue;
+
+ descr_set(&dsc_bulkstat, bs);
+ handle.ha_fid.fid_ino = scan_ino;
+ handle.ha_fid.fid_gen = bs->bs_gen;
+ error = si->fn(ctx, &handle, bs, si->arg);
+ switch (error) {
+ case 0:
+ break;
+ case ESTALE: {
+ stale_count++;
+ if (stale_count < 30) {
+ ireq->hdr.ino = inumbers->xi_startino;
+ error = -xfrog_inumbers(&ctx->mnt, ireq);
+ if (error)
+ goto err;
+ goto retry;
}
+ str_info(ctx, descr_render(&dsc_bulkstat),
+_("Changed too many times during scan; giving up."));
+ si->aborted = true;
+ goto out;
}
-
- stale_count = 0;
-igrp_retry:
- error = ioctl(ctx->mnt_fd, XFS_IOC_FSINUMBERS, &igrpreq);
+ case ECANCELED:
+ error = 0;
+ fallthrough;
+ default:
+ goto err;
+ }
+ if (scrub_excessive_errors(ctx)) {
+ si->aborted = true;
+ goto out;
+ }
+ last_ino = scan_ino;
}
err:
if (error) {
- str_errno(ctx, descr);
- moveon = false;
+ str_liberror(ctx, error, descr_render(&dsc_bulkstat));
+ si->aborted = true;
}
out:
- return moveon;
+ free(ichunk);
}
-/* BULKSTAT wrapper routines. */
-struct xfs_scan_inodes {
- xfs_inode_iter_fn fn;
- void *arg;
- bool moveon;
-};
-
-/* Scan all the inodes in an AG. */
+/*
+ * Call INUMBERS for information about inode chunks, then queue the inumbers
+ * responses in the bulkstat workqueue. This helps us maximize CPU parallelism
+ * if the filesystem AGs are not evenly loaded.
+ */
static void
-xfs_scan_ag_inodes(
+scan_ag_inumbers(
struct workqueue *wq,
xfs_agnumber_t agno,
void *arg)
{
- struct xfs_scan_inodes *si = arg;
+ struct scan_ichunk *ichunk = NULL;
+ struct scan_inodes *si = arg;
struct scrub_ctx *ctx = (struct scrub_ctx *)wq->wq_ctx;
- char descr[DESCR_BUFSZ];
- uint64_t ag_ino;
- uint64_t next_ag_ino;
- bool moveon;
+ struct xfs_inumbers_req *ireq;
+ uint64_t nextino = cvt_agino_to_ino(&ctx->mnt, agno, 0);
+ int error;
+ DEFINE_DESCR(dsc, ctx, render_inumbers_from_agno);
- snprintf(descr, DESCR_BUFSZ, _("dev %d:%d AG %u inodes"),
- major(ctx->fsinfo.fs_datadev),
- minor(ctx->fsinfo.fs_datadev),
- agno);
+ descr_set(&dsc, &agno);
+
+ error = alloc_ichunk(si, agno, 0, &ichunk);
+ if (error)
+ goto err;
+ ireq = ichunk_to_inumbers(ichunk);
+
+ /* Find the inode chunk & alloc mask */
+ error = -xfrog_inumbers(&ctx->mnt, ireq);
+ while (!error && !si->aborted && ireq->hdr.ocount > 0) {
+ /*
+ * Make sure that we always make forward progress while we
+ * scan the inode btree.
+ */
+ if (nextino > ireq->inumbers[0].xi_startino) {
+ str_corrupt(ctx, descr_render(&dsc),
+ _("AG %u inode btree is corrupt near agino %lu, got %lu"), agno,
+ cvt_ino_to_agino(&ctx->mnt, nextino),
+ cvt_ino_to_agino(&ctx->mnt,
+ ireq->inumbers[0].xi_startino));
+ si->aborted = true;
+ break;
+ }
+ nextino = ireq->hdr.ino;
+
+ if (ireq->inumbers[0].xi_alloccount == 0) {
+ /*
+ * We can have totally empty inode chunks on
+ * filesystems where there are more than 64 inodes per
+ * block. Skip these.
+ */
+ ;
+ } else if (si->nr_threads > 0) {
+ /* Queue this inode chunk on the bulkstat workqueue. */
+ error = -workqueue_add(&si->wq_bulkstat,
+ scan_ag_bulkstat, agno, ichunk);
+ if (error) {
+ si->aborted = true;
+ str_liberror(ctx, error,
+ _("queueing bulkstat work"));
+ goto out;
+ }
+ ichunk = NULL;
+ } else {
+ /*
+ * Only one thread, call bulkstat directly. Remember,
+ * ichunk is freed by the worker before returning.
+ */
+ scan_ag_bulkstat(wq, agno, ichunk);
+ ichunk = NULL;
+ if (si->aborted)
+ break;
+ }
- ag_ino = (__u64)agno << (ctx->inopblog + ctx->agblklog);
- next_ag_ino = (__u64)(agno + 1) << (ctx->inopblog + ctx->agblklog);
+ if (!ichunk) {
+ error = alloc_ichunk(si, agno, nextino, &ichunk);
+ if (error)
+ goto err;
+ }
+ ireq = ichunk_to_inumbers(ichunk);
- moveon = xfs_iterate_inodes_range(ctx, descr, ctx->fshandle, ag_ino,
- next_ag_ino - 1, si->fn, si->arg);
- if (!moveon)
- si->moveon = false;
+ error = -xfrog_inumbers(&ctx->mnt, ireq);
+ }
+
+err:
+ if (error) {
+ str_liberror(ctx, error, descr_render(&dsc));
+ si->aborted = true;
+ }
+out:
+ if (ichunk)
+ free(ichunk);
}
-/* Scan all the inodes in a filesystem. */
-bool
-xfs_scan_all_inodes(
+/*
+ * Scan all the inodes in a filesystem. On error, this function will log
+ * an error message and return -1.
+ */
+int
+scrub_scan_all_inodes(
struct scrub_ctx *ctx,
- xfs_inode_iter_fn fn,
+ scrub_inode_iter_fn fn,
void *arg)
{
- struct xfs_scan_inodes si;
+ struct scan_inodes si = {
+ .fn = fn,
+ .arg = arg,
+ .nr_threads = scrub_nproc_workqueue(ctx),
+ };
xfs_agnumber_t agno;
- struct workqueue wq;
+ struct workqueue wq_inumbers;
+ unsigned int max_bulkstat;
int ret;
- si.moveon = true;
- si.fn = fn;
- si.arg = arg;
+ /*
+ * The bulkstat workqueue should queue at most one inobt block's worth
+ * of inode chunk records per worker thread. If we're running in
+ * single thread mode (nr_threads==0) then we skip the workqueues.
+ */
+ max_bulkstat = si.nr_threads * (ctx->mnt.fsgeom.blocksize / 16);
+
+ ret = -workqueue_create_bound(&si.wq_bulkstat, (struct xfs_mount *)ctx,
+ si.nr_threads, max_bulkstat);
+ if (ret) {
+ str_liberror(ctx, ret, _("creating bulkstat workqueue"));
+ return -1;
+ }
- ret = workqueue_create(&wq, (struct xfs_mount *)ctx,
- scrub_nproc_workqueue(ctx));
+ ret = -workqueue_create(&wq_inumbers, (struct xfs_mount *)ctx,
+ si.nr_threads);
if (ret) {
- str_info(ctx, ctx->mntpoint, _("Could not create workqueue."));
- return false;
+ str_liberror(ctx, ret, _("creating inumbers workqueue"));
+ si.aborted = true;
+ goto kill_bulkstat;
}
- for (agno = 0; agno < ctx->geo.agcount; agno++) {
- ret = workqueue_add(&wq, xfs_scan_ag_inodes, agno, &si);
+ for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++) {
+ ret = -workqueue_add(&wq_inumbers, scan_ag_inumbers, agno, &si);
if (ret) {
- si.moveon = false;
- str_info(ctx, ctx->mntpoint,
-_("Could not queue AG %u bulkstat work."), agno);
+ si.aborted = true;
+ str_liberror(ctx, ret, _("queueing inumbers work"));
break;
}
}
- workqueue_destroy(&wq);
+ ret = -workqueue_terminate(&wq_inumbers);
+ if (ret) {
+ si.aborted = true;
+ str_liberror(ctx, ret, _("finishing inumbers work"));
+ }
+ workqueue_destroy(&wq_inumbers);
+
+kill_bulkstat:
+ ret = -workqueue_terminate(&si.wq_bulkstat);
+ if (ret) {
+ si.aborted = true;
+ str_liberror(ctx, ret, _("finishing bulkstat work"));
+ }
+ workqueue_destroy(&si.wq_bulkstat);
- return si.moveon;
+ return si.aborted ? -1 : 0;
}
-/*
- * Open a file by handle, or return a negative error code.
- */
+/* Open a file by handle, returning either the fd or -1 on error. */
int
-xfs_open_handle(
+scrub_open_handle(
struct xfs_handle *handle)
{
return open_by_fshandle(handle, sizeof(*handle),