]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - scrub/read_verify.c
mkfs: use cvtnum from libfrog
[thirdparty/xfsprogs-dev.git] / scrub / read_verify.c
CommitLineData
959ef981 1// SPDX-License-Identifier: GPL-2.0+
2000470d
DW
2/*
3 * Copyright (C) 2018 Oracle. All Rights Reserved.
2000470d 4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
2000470d 5 */
a440f877 6#include "xfs.h"
2000470d 7#include <stdint.h>
2000470d
DW
8#include <stdlib.h>
9#include <sys/statvfs.h>
14051909 10#include "libfrog/ptvar.h"
56598728 11#include "libfrog/workqueue.h"
42b4c8e8 12#include "libfrog/paths.h"
2000470d
DW
13#include "xfs_scrub.h"
14#include "common.h"
15#include "counter.h"
16#include "disk.h"
17#include "read_verify.h"
ed60d210 18#include "progress.h"
2000470d
DW
19
20/*
21 * Read Verify Pool
22 *
23 * Manages the data block read verification phase. The caller schedules
24 * verification requests, which are then scheduled to be run by a thread
25 * pool worker. Adjacent (or nearly adjacent) requests can be combined
26 * to reduce overhead when free space fragmentation is high. The thread
27 * pool takes care of issuing multiple IOs to the device, if possible.
28 */
29
30/*
31 * Perform all IO in 32M chunks. This cannot exceed 65536 sectors
32 * because that's the biggest SCSI VERIFY(16) we dare to send.
33 */
34#define RVP_IO_MAX_SIZE (33554432)
16dbab1a
DW
35
36/*
37 * If we're running in the background then we perform IO in 128k chunks
38 * to reduce the load on the IO subsystem.
39 */
40#define RVP_BACKGROUND_IO_MAX_SIZE (131072)
41
42/* What's the real maximum IO size? */
43static inline unsigned int
44rvp_io_max_size(void)
45{
46 return bg_mode > 0 ? RVP_BACKGROUND_IO_MAX_SIZE : RVP_IO_MAX_SIZE;
47}
2000470d
DW
48
49/* Tolerate 64k holes in adjacent read verify requests. */
50#define RVP_IO_BATCH_LOCALITY (65536)
51
41c08606
DW
52struct read_verify {
53 void *io_end_arg;
54 struct disk *io_disk;
55 uint64_t io_start; /* bytes */
56 uint64_t io_length; /* bytes */
57};
58
2000470d
DW
59struct read_verify_pool {
60 struct workqueue wq; /* thread pool */
61 struct scrub_ctx *ctx; /* scrub context */
62 void *readbuf; /* read buffer */
63 struct ptcounter *verified_bytes;
41c08606 64 struct ptvar *rvstate; /* combines read requests */
f1bb1696 65 struct disk *disk; /* which disk? */
2000470d
DW
66 read_verify_ioerr_fn_t ioerr_fn; /* io error callback */
67 size_t miniosz; /* minimum io size, bytes */
5c657f1e
DW
68
69 /*
70 * Store a runtime error code here so that we can stop the pool and
71 * return it to the caller.
72 */
73 int runtime_error;
2000470d
DW
74};
75
41c08606
DW
76/*
77 * Create a thread pool to run read verifiers.
78 *
f1bb1696 79 * @disk is the disk we want to verify.
41c08606
DW
80 * @miniosz is the minimum size of an IO to expect (in bytes).
81 * @ioerr_fn will be called when IO errors occur.
41c08606
DW
82 * @submitter_threads is the number of threads that may be sending verify
83 * requests at any given time.
84 */
8cab77d3
DW
85int
86read_verify_pool_alloc(
2000470d 87 struct scrub_ctx *ctx,
f1bb1696 88 struct disk *disk,
2000470d
DW
89 size_t miniosz,
90 read_verify_ioerr_fn_t ioerr_fn,
8cab77d3
DW
91 unsigned int submitter_threads,
92 struct read_verify_pool **prvp)
2000470d
DW
93{
94 struct read_verify_pool *rvp;
eacea707 95 unsigned int verifier_threads = disk_heads(disk);
8cab77d3 96 int ret;
2000470d 97
29c4f385
DW
98 /*
99 * The minimum IO size must be a multiple of the disk sector size
100 * and a factor of the max io size.
101 */
102 if (miniosz % disk->d_lbasize)
103 return EINVAL;
16dbab1a 104 if (rvp_io_max_size() % miniosz)
29c4f385
DW
105 return EINVAL;
106
2000470d
DW
107 rvp = calloc(1, sizeof(struct read_verify_pool));
108 if (!rvp)
8cab77d3 109 return errno;
2000470d 110
8cab77d3 111 ret = posix_memalign((void **)&rvp->readbuf, page_size,
16dbab1a 112 rvp_io_max_size());
8cab77d3 113 if (ret)
2000470d 114 goto out_free;
eacea707 115 ret = ptcounter_alloc(verifier_threads, &rvp->verified_bytes);
8cab77d3 116 if (ret)
2000470d
DW
117 goto out_buf;
118 rvp->miniosz = miniosz;
119 rvp->ctx = ctx;
f1bb1696 120 rvp->disk = disk;
2000470d 121 rvp->ioerr_fn = ioerr_fn;
2f4422f4 122 ret = -ptvar_alloc(submitter_threads, sizeof(struct read_verify),
cb321a39 123 &rvp->rvstate);
8cab77d3 124 if (ret)
41c08606 125 goto out_counter;
baed134d 126 ret = -workqueue_create(&rvp->wq, (struct xfs_mount *)rvp,
eacea707 127 verifier_threads == 1 ? 0 : verifier_threads);
2000470d 128 if (ret)
41c08606 129 goto out_rvstate;
8cab77d3
DW
130 *prvp = rvp;
131 return 0;
2000470d 132
41c08606
DW
133out_rvstate:
134 ptvar_free(rvp->rvstate);
2000470d
DW
135out_counter:
136 ptcounter_free(rvp->verified_bytes);
137out_buf:
138 free(rvp->readbuf);
139out_free:
140 free(rvp);
8cab77d3 141 return ret;
2000470d
DW
142}
143
4cd869e5
DW
144/* Abort all verification work. */
145void
146read_verify_pool_abort(
147 struct read_verify_pool *rvp)
148{
149 if (!rvp->runtime_error)
150 rvp->runtime_error = ECANCELED;
151 workqueue_terminate(&rvp->wq);
152}
153
2000470d 154/* Finish up any read verification work. */
8cab77d3 155int
2000470d
DW
156read_verify_pool_flush(
157 struct read_verify_pool *rvp)
158{
baed134d 159 return -workqueue_terminate(&rvp->wq);
2000470d
DW
160}
161
162/* Finish up any read verification work and tear it down. */
163void
164read_verify_pool_destroy(
165 struct read_verify_pool *rvp)
166{
7668d01d 167 workqueue_destroy(&rvp->wq);
41c08606 168 ptvar_free(rvp->rvstate);
2000470d
DW
169 ptcounter_free(rvp->verified_bytes);
170 free(rvp->readbuf);
171 free(rvp);
172}
173
174/*
175 * Issue a read-verify IO in big batches.
176 */
177static void
178read_verify(
179 struct workqueue *wq,
180 xfs_agnumber_t agno,
181 void *arg)
182{
183 struct read_verify *rv = arg;
184 struct read_verify_pool *rvp;
185 unsigned long long verified = 0;
27464242 186 ssize_t io_max_size;
2000470d
DW
187 ssize_t sz;
188 ssize_t len;
27464242 189 int read_error;
5c657f1e 190 int ret;
2000470d
DW
191
192 rvp = (struct read_verify_pool *)wq->wq_ctx;
27464242
DW
193 if (rvp->runtime_error)
194 return;
195
16dbab1a 196 io_max_size = rvp_io_max_size();
27464242 197
2000470d 198 while (rv->io_length > 0) {
27464242
DW
199 read_error = 0;
200 len = min(rv->io_length, io_max_size);
f1bb1696 201 dbg_printf("diskverify %d %"PRIu64" %zu\n", rvp->disk->d_fd,
2000470d 202 rv->io_start, len);
f1bb1696
DW
203 sz = disk_read_verify(rvp->disk, rvp->readbuf, rv->io_start,
204 len);
27464242
DW
205 if (sz == len && io_max_size < rvp->miniosz) {
206 /*
207 * If the verify request was 100% successful and less
208 * than a single block in length, we were trying to
209 * read to the end of a block after a short read. That
210 * suggests there's something funny with this device,
211 * so single-step our way through the rest of the @rv
212 * range.
213 */
214 io_max_size = rvp->miniosz;
215 } else if (sz < 0) {
216 read_error = errno;
217
218 /* Runtime error, bail out... */
219 if (read_error != EIO && read_error != EILSEQ) {
220 rvp->runtime_error = read_error;
221 return;
222 }
223
224 /*
225 * A direct read encountered an error while performing
226 * a multi-block read. Reduce the transfer size to a
227 * single block so that we can identify the exact range
228 * of bad blocks and good blocks. We single-step all
229 * the way to the end of the @rv range, (re)starting
230 * with the block that just failed.
231 */
232 if (io_max_size > rvp->miniosz) {
233 io_max_size = rvp->miniosz;
234 continue;
235 }
236
237 /*
238 * A direct read hit an error while we were stepping
239 * through single blocks. Mark everything bad from
240 * io_start to the next miniosz block.
241 */
242 sz = rvp->miniosz - (rv->io_start % rvp->miniosz);
243 dbg_printf("IOERR %d @ %"PRIu64" %zu err %d\n",
244 rvp->disk->d_fd, rv->io_start, sz,
245 read_error);
246 rvp->ioerr_fn(rvp->ctx, rvp->disk, rv->io_start, sz,
247 read_error, rv->io_end_arg);
248 } else if (sz < len) {
249 /*
250 * A short direct read suggests that we might have hit
251 * an IO error midway through the read but still had to
252 * return the number of bytes that were actually read.
253 *
254 * We need to force an EIO, so try reading the rest of
255 * the block (if it was a partial block read) or the
256 * next full block.
257 */
258 io_max_size = rvp->miniosz - (sz % rvp->miniosz);
259 dbg_printf("SHORT %d READ @ %"PRIu64" %zu try for %zd\n",
260 rvp->disk->d_fd, rv->io_start, sz,
261 io_max_size);
262 } else {
263 /* We should never get back more bytes than we asked. */
264 assert(sz == len);
2000470d
DW
265 }
266
27464242
DW
267 progress_add(sz);
268 if (read_error == 0)
269 verified += sz;
270 rv->io_start += sz;
271 rv->io_length -= sz;
16dbab1a 272 background_sleep();
2000470d
DW
273 }
274
275 free(rv);
5c657f1e 276 ret = ptcounter_add(rvp->verified_bytes, verified);
8cab77d3 277 if (ret)
5c657f1e 278 rvp->runtime_error = ret;
2000470d
DW
279}
280
281/* Queue a read verify request. */
8cab77d3 282static int
2000470d
DW
283read_verify_queue(
284 struct read_verify_pool *rvp,
285 struct read_verify *rv)
286{
287 struct read_verify *tmp;
288 bool ret;
289
290 dbg_printf("verify fd %d start %"PRIu64" len %"PRIu64"\n",
f1bb1696 291 rvp->disk->d_fd, rv->io_start, rv->io_length);
2000470d 292
5c657f1e
DW
293 /* Worker thread saw a runtime error, don't queue more. */
294 if (rvp->runtime_error)
8cab77d3 295 return rvp->runtime_error;
5c657f1e
DW
296
297 /* Otherwise clone the request and queue the copy. */
2000470d
DW
298 tmp = malloc(sizeof(struct read_verify));
299 if (!tmp) {
5c657f1e 300 rvp->runtime_error = errno;
8cab77d3 301 return errno;
2000470d 302 }
5c657f1e 303
2000470d
DW
304 memcpy(tmp, rv, sizeof(*tmp));
305
baed134d 306 ret = -workqueue_add(&rvp->wq, read_verify, 0, tmp);
2000470d 307 if (ret) {
2000470d 308 free(tmp);
5c657f1e 309 rvp->runtime_error = ret;
8cab77d3 310 return ret;
2000470d 311 }
8cab77d3 312
2000470d 313 rv->io_length = 0;
8cab77d3 314 return 0;
2000470d
DW
315}
316
317/*
318 * Issue an IO request. We'll batch subsequent requests if they're
319 * within 64k of each other
320 */
8cab77d3 321int
2000470d
DW
322read_verify_schedule_io(
323 struct read_verify_pool *rvp,
2000470d
DW
324 uint64_t start,
325 uint64_t length,
326 void *end_arg)
327{
41c08606 328 struct read_verify *rv;
2000470d
DW
329 uint64_t req_end;
330 uint64_t rv_end;
cb321a39 331 int ret;
2000470d
DW
332
333 assert(rvp->readbuf);
29c4f385
DW
334
335 /* Round up and down to the start of a miniosz chunk. */
336 start &= ~(rvp->miniosz - 1);
337 length = roundup(length, rvp->miniosz);
338
cb321a39
DW
339 rv = ptvar_get(rvp->rvstate, &ret);
340 if (ret)
2f4422f4 341 return -ret;
2000470d
DW
342 req_end = start + length;
343 rv_end = rv->io_start + rv->io_length;
344
345 /*
346 * If we have a stashed IO, we haven't changed fds, the error
347 * reporting is the same, and the two extents are close,
348 * we can combine them.
349 */
f1bb1696 350 if (rv->io_length > 0 &&
2000470d
DW
351 end_arg == rv->io_end_arg &&
352 ((start >= rv->io_start && start <= rv_end + RVP_IO_BATCH_LOCALITY) ||
353 (rv->io_start >= start &&
354 rv->io_start <= req_end + RVP_IO_BATCH_LOCALITY))) {
355 rv->io_start = min(rv->io_start, start);
356 rv->io_length = max(req_end, rv_end) - rv->io_start;
357 } else {
358 /* Otherwise, issue the stashed IO (if there is one) */
601ebcd8
DW
359 if (rv->io_length > 0) {
360 int res;
361
362 res = read_verify_queue(rvp, rv);
363 if (res)
364 return res;
365 }
2000470d
DW
366
367 /* Stash the new IO. */
2000470d
DW
368 rv->io_start = start;
369 rv->io_length = length;
370 rv->io_end_arg = end_arg;
371 }
372
8cab77d3 373 return 0;
2000470d
DW
374}
375
22d658ec
DW
376/* Force any per-thread stashed IOs into the verifier. */
377static int
378force_one_io(
379 struct ptvar *ptv,
380 void *data,
381 void *foreach_arg)
382{
383 struct read_verify_pool *rvp = foreach_arg;
384 struct read_verify *rv = data;
385
386 if (rv->io_length == 0)
387 return 0;
388
2f4422f4 389 return -read_verify_queue(rvp, rv);
22d658ec
DW
390}
391
2000470d 392/* Force any stashed IOs into the verifier. */
8cab77d3 393int
2000470d 394read_verify_force_io(
41c08606 395 struct read_verify_pool *rvp)
2000470d 396{
2000470d 397 assert(rvp->readbuf);
2000470d 398
2f4422f4 399 return -ptvar_foreach(rvp->rvstate, force_one_io, rvp);
2000470d
DW
400}
401
402/* How many bytes has this process verified? */
8cab77d3 403int
2000470d 404read_verify_bytes(
8cab77d3
DW
405 struct read_verify_pool *rvp,
406 uint64_t *bytes_checked)
2000470d 407{
8cab77d3 408 return ptcounter_value(rvp->verified_bytes, bytes_checked);
2000470d 409}