]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
xfs_scrub: create infrastructure to read verify data blocks
authorDarrick J. Wong <darrick.wong@oracle.com>
Fri, 2 Feb 2018 15:32:46 +0000 (09:32 -0600)
committerEric Sandeen <sandeen@redhat.com>
Fri, 2 Feb 2018 15:32:46 +0000 (09:32 -0600)
Manage the scheduling, issuance, and reporting of data block
verification reads.  This enables us to combine adjacent (or nearly
adjacent) read requests, and to take advantage of high-IOPS devices by
issuing IO from multiple threads.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
scrub/Makefile
scrub/read_verify.c [new file with mode: 0644]
scrub/read_verify.h [new file with mode: 0644]
scrub/xfs_scrub.h

index a9aaa99af749e99acc546038bff3b53798ee4085..3b3eb95c3d0a89185186ddacf9f03c982c296bad 100644 (file)
@@ -23,6 +23,7 @@ disk.h \
 filemap.h \
 fscounters.h \
 inodes.h \
+read_verify.h \
 scrub.h \
 spacemap.h \
 unicrash.h \
@@ -40,6 +41,7 @@ phase1.c \
 phase2.c \
 phase3.c \
 phase5.c \
+read_verify.c \
 scrub.c \
 spacemap.c \
 xfs_scrub.c
diff --git a/scrub/read_verify.c b/scrub/read_verify.c
new file mode 100644 (file)
index 0000000..244626d
--- /dev/null
@@ -0,0 +1,268 @@
+/*
+ * Copyright (C) 2018 Oracle.  All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/statvfs.h>
+#include "workqueue.h"
+#include "path.h"
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_scrub.h"
+#include "common.h"
+#include "counter.h"
+#include "disk.h"
+#include "read_verify.h"
+
+/*
+ * Read Verify Pool
+ *
+ * Manages the data block read verification phase.  The caller schedules
+ * verification requests, which are then scheduled to be run by a thread
+ * pool worker.  Adjacent (or nearly adjacent) requests can be combined
+ * to reduce overhead when free space fragmentation is high.  The thread
+ * pool takes care of issuing multiple IOs to the device, if possible.
+ */
+
+/*
+ * Perform all IO in 32M chunks.  This cannot exceed 65536 sectors
+ * because that's the biggest SCSI VERIFY(16) we dare to send.
+ */
+#define RVP_IO_MAX_SIZE                (33554432)
+#define RVP_IO_MAX_SECTORS     (RVP_IO_MAX_SIZE >> BBSHIFT)
+
+/* Tolerate 64k holes in adjacent read verify requests. */
+#define RVP_IO_BATCH_LOCALITY  (65536)
+
+struct read_verify_pool {
+       struct workqueue        wq;             /* thread pool */
+       struct scrub_ctx        *ctx;           /* scrub context */
+       void                    *readbuf;       /* read buffer */
+       struct ptcounter        *verified_bytes;
+       read_verify_ioerr_fn_t  ioerr_fn;       /* io error callback */
+       size_t                  miniosz;        /* minimum io size, bytes */
+};
+
+/* Create a thread pool to run read verifiers. */
+struct read_verify_pool *
+read_verify_pool_init(
+       struct scrub_ctx                *ctx,
+       size_t                          miniosz,
+       read_verify_ioerr_fn_t          ioerr_fn,
+       unsigned int                    nproc)
+{
+       struct read_verify_pool         *rvp;
+       bool                            ret;
+       int                             error;
+
+       rvp = calloc(1, sizeof(struct read_verify_pool));
+       if (!rvp)
+               return NULL;
+
+       error = posix_memalign((void **)&rvp->readbuf, page_size,
+                       RVP_IO_MAX_SIZE);
+       if (error || !rvp->readbuf)
+               goto out_free;
+       rvp->verified_bytes = ptcounter_init(nproc);
+       if (!rvp->verified_bytes)
+               goto out_buf;
+       rvp->miniosz = miniosz;
+       rvp->ctx = ctx;
+       rvp->ioerr_fn = ioerr_fn;
+       /* Run in the main thread if we only want one thread. */
+       if (nproc == 1)
+               nproc = 0;
+       ret = workqueue_create(&rvp->wq, (struct xfs_mount *)rvp, nproc);
+       if (ret)
+               goto out_counter;
+       return rvp;
+
+out_counter:
+       ptcounter_free(rvp->verified_bytes);
+out_buf:
+       free(rvp->readbuf);
+out_free:
+       free(rvp);
+       return NULL;
+}
+
+/* Finish up any read verification work. */
+void
+read_verify_pool_flush(
+       struct read_verify_pool         *rvp)
+{
+       workqueue_destroy(&rvp->wq);
+}
+
+/* Finish up any read verification work and tear it down. */
+void
+read_verify_pool_destroy(
+       struct read_verify_pool         *rvp)
+{
+       ptcounter_free(rvp->verified_bytes);
+       free(rvp->readbuf);
+       free(rvp);
+}
+
+/*
+ * Issue a read-verify IO in big batches.
+ */
+static void
+read_verify(
+       struct workqueue                *wq,
+       xfs_agnumber_t                  agno,
+       void                            *arg)
+{
+       struct read_verify              *rv = arg;
+       struct read_verify_pool         *rvp;
+       unsigned long long              verified = 0;
+       ssize_t                         sz;
+       ssize_t                         len;
+
+       rvp = (struct read_verify_pool *)wq->wq_ctx;
+       while (rv->io_length > 0) {
+               len = min(rv->io_length, RVP_IO_MAX_SIZE);
+               dbg_printf("diskverify %d %"PRIu64" %zu\n", rv->io_disk->d_fd,
+                               rv->io_start, len);
+               sz = disk_read_verify(rv->io_disk, rvp->readbuf,
+                               rv->io_start, len);
+               if (sz < 0) {
+                       dbg_printf("IOERR %d %"PRIu64" %zu\n",
+                                       rv->io_disk->d_fd,
+                                       rv->io_start, len);
+                       /* IO error, so try the next logical block. */
+                       len = rvp->miniosz;
+                       rvp->ioerr_fn(rvp->ctx, rv->io_disk, rv->io_start, len,
+                                       errno, rv->io_end_arg);
+               }
+
+               verified += len;
+               rv->io_start += len;
+               rv->io_length -= len;
+       }
+
+       free(rv);
+       ptcounter_add(rvp->verified_bytes, verified);
+}
+
+/* Queue a read verify request. */
+static bool
+read_verify_queue(
+       struct read_verify_pool         *rvp,
+       struct read_verify              *rv)
+{
+       struct read_verify              *tmp;
+       bool                            ret;
+
+       dbg_printf("verify fd %d start %"PRIu64" len %"PRIu64"\n",
+                       rv->io_disk->d_fd, rv->io_start, rv->io_length);
+
+       tmp = malloc(sizeof(struct read_verify));
+       if (!tmp) {
+               rvp->ioerr_fn(rvp->ctx, rv->io_disk, rv->io_start,
+                               rv->io_length, errno, rv->io_end_arg);
+               return true;
+       }
+       memcpy(tmp, rv, sizeof(*tmp));
+
+       ret = workqueue_add(&rvp->wq, read_verify, 0, tmp);
+       if (ret) {
+               str_error(rvp->ctx, rvp->ctx->mntpoint,
+_("Could not queue read-verify work."));
+               free(tmp);
+               return false;
+       }
+       rv->io_length = 0;
+       return true;
+}
+
+/*
+ * Issue an IO request.  We'll batch subsequent requests if they're
+ * within 64k of each other
+ */
+bool
+read_verify_schedule_io(
+       struct read_verify_pool         *rvp,
+       struct read_verify              *rv,
+       struct disk                     *disk,
+       uint64_t                        start,
+       uint64_t                        length,
+       void                            *end_arg)
+{
+       uint64_t                        req_end;
+       uint64_t                        rv_end;
+
+       assert(rvp->readbuf);
+       req_end = start + length;
+       rv_end = rv->io_start + rv->io_length;
+
+       /*
+        * If we have a stashed IO, we haven't changed fds, the error
+        * reporting is the same, and the two extents are close,
+        * we can combine them.
+        */
+       if (rv->io_length > 0 && disk == rv->io_disk &&
+           end_arg == rv->io_end_arg &&
+           ((start >= rv->io_start && start <= rv_end + RVP_IO_BATCH_LOCALITY) ||
+            (rv->io_start >= start &&
+             rv->io_start <= req_end + RVP_IO_BATCH_LOCALITY))) {
+               rv->io_start = min(rv->io_start, start);
+               rv->io_length = max(req_end, rv_end) - rv->io_start;
+       } else  {
+               /* Otherwise, issue the stashed IO (if there is one) */
+               if (rv->io_length > 0)
+                       return read_verify_queue(rvp, rv);
+
+               /* Stash the new IO. */
+               rv->io_disk = disk;
+               rv->io_start = start;
+               rv->io_length = length;
+               rv->io_end_arg = end_arg;
+       }
+
+       return true;
+}
+
+/* Force any stashed IOs into the verifier. */
+bool
+read_verify_force_io(
+       struct read_verify_pool         *rvp,
+       struct read_verify              *rv)
+{
+       bool                            moveon;
+
+       assert(rvp->readbuf);
+       if (rv->io_length == 0)
+               return true;
+
+       moveon = read_verify_queue(rvp, rv);
+       if (moveon)
+               rv->io_length = 0;
+       return moveon;
+}
+
+/* How many bytes has this process verified? */
+uint64_t
+read_verify_bytes(
+       struct read_verify_pool         *rvp)
+{
+       return ptcounter_value(rvp->verified_bytes);
+}
diff --git a/scrub/read_verify.h b/scrub/read_verify.h
new file mode 100644 (file)
index 0000000..cea7a08
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2018 Oracle.  All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef XFS_SCRUB_READ_VERIFY_H_
+#define XFS_SCRUB_READ_VERIFY_H_
+
+struct scrub_ctx;
+struct read_verify_pool;
+
+/* Function called when an IO error happens. */
+typedef void (*read_verify_ioerr_fn_t)(struct scrub_ctx *ctx,
+               struct disk *disk, uint64_t start, uint64_t length,
+               int error, void *arg);
+
+struct read_verify_pool *read_verify_pool_init(struct scrub_ctx *ctx,
+               size_t miniosz, read_verify_ioerr_fn_t ioerr_fn,
+               unsigned int nproc);
+void read_verify_pool_flush(struct read_verify_pool *rvp);
+void read_verify_pool_destroy(struct read_verify_pool *rvp);
+
+struct read_verify {
+       void                    *io_end_arg;
+       struct disk             *io_disk;
+       uint64_t                io_start;       /* bytes */
+       uint64_t                io_length;      /* bytes */
+};
+
+bool read_verify_schedule_io(struct read_verify_pool *rvp,
+               struct read_verify *rv, struct disk *disk, uint64_t start,
+               uint64_t length, void *end_arg);
+bool read_verify_force_io(struct read_verify_pool *rvp, struct read_verify *rv);
+uint64_t read_verify_bytes(struct read_verify_pool *rvp);
+
+#endif /* XFS_SCRUB_READ_VERIFY_H_ */
index 0aef76b9304b72e8eccdcd5efc9d098d2cb49ea1..c883bdb2951aa84b4f30b31c883543b76c26a0a1 100644 (file)
@@ -80,6 +80,9 @@ struct scrub_ctx {
        void                    *fshandle;
        size_t                  fshandle_len;
 
+       /* Data block read verification buffer */
+       void                    *readbuf;
+
        /* Mutable scrub state; use lock. */
        pthread_mutex_t         lock;
        unsigned long long      max_errors;