]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - scrub/read_verify.c
libfrog: convert workqueue.c functions to negative error codes
[thirdparty/xfsprogs-dev.git] / scrub / read_verify.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * Copyright (C) 2018 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6 #include "xfs.h"
7 #include <stdint.h>
8 #include <stdlib.h>
9 #include <sys/statvfs.h>
10 #include "libfrog/ptvar.h"
11 #include "libfrog/workqueue.h"
12 #include "libfrog/paths.h"
13 #include "xfs_scrub.h"
14 #include "common.h"
15 #include "counter.h"
16 #include "disk.h"
17 #include "read_verify.h"
18 #include "progress.h"
19
20 /*
21 * Read Verify Pool
22 *
23 * Manages the data block read verification phase. The caller schedules
24 * verification requests, which are then scheduled to be run by a thread
25 * pool worker. Adjacent (or nearly adjacent) requests can be combined
26 * to reduce overhead when free space fragmentation is high. The thread
27 * pool takes care of issuing multiple IOs to the device, if possible.
28 */
29
30 /*
31 * Perform all IO in 32M chunks. This cannot exceed 65536 sectors
32 * because that's the biggest SCSI VERIFY(16) we dare to send.
33 */
34 #define RVP_IO_MAX_SIZE (33554432)
35
36 /*
37 * If we're running in the background then we perform IO in 128k chunks
38 * to reduce the load on the IO subsystem.
39 */
40 #define RVP_BACKGROUND_IO_MAX_SIZE (131072)
41
42 /* What's the real maximum IO size? */
43 static inline unsigned int
44 rvp_io_max_size(void)
45 {
46 return bg_mode > 0 ? RVP_BACKGROUND_IO_MAX_SIZE : RVP_IO_MAX_SIZE;
47 }
48
49 /* Tolerate 64k holes in adjacent read verify requests. */
50 #define RVP_IO_BATCH_LOCALITY (65536)
51
52 struct read_verify {
53 void *io_end_arg;
54 struct disk *io_disk;
55 uint64_t io_start; /* bytes */
56 uint64_t io_length; /* bytes */
57 };
58
59 struct read_verify_pool {
60 struct workqueue wq; /* thread pool */
61 struct scrub_ctx *ctx; /* scrub context */
62 void *readbuf; /* read buffer */
63 struct ptcounter *verified_bytes;
64 struct ptvar *rvstate; /* combines read requests */
65 struct disk *disk; /* which disk? */
66 read_verify_ioerr_fn_t ioerr_fn; /* io error callback */
67 size_t miniosz; /* minimum io size, bytes */
68
69 /*
70 * Store a runtime error code here so that we can stop the pool and
71 * return it to the caller.
72 */
73 int runtime_error;
74 };
75
76 /*
77 * Create a thread pool to run read verifiers.
78 *
79 * @disk is the disk we want to verify.
80 * @miniosz is the minimum size of an IO to expect (in bytes).
81 * @ioerr_fn will be called when IO errors occur.
82 * @submitter_threads is the number of threads that may be sending verify
83 * requests at any given time.
84 */
85 int
86 read_verify_pool_alloc(
87 struct scrub_ctx *ctx,
88 struct disk *disk,
89 size_t miniosz,
90 read_verify_ioerr_fn_t ioerr_fn,
91 unsigned int submitter_threads,
92 struct read_verify_pool **prvp)
93 {
94 struct read_verify_pool *rvp;
95 unsigned int verifier_threads = disk_heads(disk);
96 int ret;
97
98 /*
99 * The minimum IO size must be a multiple of the disk sector size
100 * and a factor of the max io size.
101 */
102 if (miniosz % disk->d_lbasize)
103 return EINVAL;
104 if (rvp_io_max_size() % miniosz)
105 return EINVAL;
106
107 rvp = calloc(1, sizeof(struct read_verify_pool));
108 if (!rvp)
109 return errno;
110
111 ret = posix_memalign((void **)&rvp->readbuf, page_size,
112 rvp_io_max_size());
113 if (ret)
114 goto out_free;
115 ret = ptcounter_alloc(verifier_threads, &rvp->verified_bytes);
116 if (ret)
117 goto out_buf;
118 rvp->miniosz = miniosz;
119 rvp->ctx = ctx;
120 rvp->disk = disk;
121 rvp->ioerr_fn = ioerr_fn;
122 ret = -ptvar_alloc(submitter_threads, sizeof(struct read_verify),
123 &rvp->rvstate);
124 if (ret)
125 goto out_counter;
126 ret = -workqueue_create(&rvp->wq, (struct xfs_mount *)rvp,
127 verifier_threads == 1 ? 0 : verifier_threads);
128 if (ret)
129 goto out_rvstate;
130 *prvp = rvp;
131 return 0;
132
133 out_rvstate:
134 ptvar_free(rvp->rvstate);
135 out_counter:
136 ptcounter_free(rvp->verified_bytes);
137 out_buf:
138 free(rvp->readbuf);
139 out_free:
140 free(rvp);
141 return ret;
142 }
143
144 /* Abort all verification work. */
145 void
146 read_verify_pool_abort(
147 struct read_verify_pool *rvp)
148 {
149 if (!rvp->runtime_error)
150 rvp->runtime_error = ECANCELED;
151 workqueue_terminate(&rvp->wq);
152 }
153
154 /* Finish up any read verification work. */
155 int
156 read_verify_pool_flush(
157 struct read_verify_pool *rvp)
158 {
159 return -workqueue_terminate(&rvp->wq);
160 }
161
162 /* Finish up any read verification work and tear it down. */
163 void
164 read_verify_pool_destroy(
165 struct read_verify_pool *rvp)
166 {
167 workqueue_destroy(&rvp->wq);
168 ptvar_free(rvp->rvstate);
169 ptcounter_free(rvp->verified_bytes);
170 free(rvp->readbuf);
171 free(rvp);
172 }
173
174 /*
175 * Issue a read-verify IO in big batches.
176 */
177 static void
178 read_verify(
179 struct workqueue *wq,
180 xfs_agnumber_t agno,
181 void *arg)
182 {
183 struct read_verify *rv = arg;
184 struct read_verify_pool *rvp;
185 unsigned long long verified = 0;
186 ssize_t io_max_size;
187 ssize_t sz;
188 ssize_t len;
189 int read_error;
190 int ret;
191
192 rvp = (struct read_verify_pool *)wq->wq_ctx;
193 if (rvp->runtime_error)
194 return;
195
196 io_max_size = rvp_io_max_size();
197
198 while (rv->io_length > 0) {
199 read_error = 0;
200 len = min(rv->io_length, io_max_size);
201 dbg_printf("diskverify %d %"PRIu64" %zu\n", rvp->disk->d_fd,
202 rv->io_start, len);
203 sz = disk_read_verify(rvp->disk, rvp->readbuf, rv->io_start,
204 len);
205 if (sz == len && io_max_size < rvp->miniosz) {
206 /*
207 * If the verify request was 100% successful and less
208 * than a single block in length, we were trying to
209 * read to the end of a block after a short read. That
210 * suggests there's something funny with this device,
211 * so single-step our way through the rest of the @rv
212 * range.
213 */
214 io_max_size = rvp->miniosz;
215 } else if (sz < 0) {
216 read_error = errno;
217
218 /* Runtime error, bail out... */
219 if (read_error != EIO && read_error != EILSEQ) {
220 rvp->runtime_error = read_error;
221 return;
222 }
223
224 /*
225 * A direct read encountered an error while performing
226 * a multi-block read. Reduce the transfer size to a
227 * single block so that we can identify the exact range
228 * of bad blocks and good blocks. We single-step all
229 * the way to the end of the @rv range, (re)starting
230 * with the block that just failed.
231 */
232 if (io_max_size > rvp->miniosz) {
233 io_max_size = rvp->miniosz;
234 continue;
235 }
236
237 /*
238 * A direct read hit an error while we were stepping
239 * through single blocks. Mark everything bad from
240 * io_start to the next miniosz block.
241 */
242 sz = rvp->miniosz - (rv->io_start % rvp->miniosz);
243 dbg_printf("IOERR %d @ %"PRIu64" %zu err %d\n",
244 rvp->disk->d_fd, rv->io_start, sz,
245 read_error);
246 rvp->ioerr_fn(rvp->ctx, rvp->disk, rv->io_start, sz,
247 read_error, rv->io_end_arg);
248 } else if (sz < len) {
249 /*
250 * A short direct read suggests that we might have hit
251 * an IO error midway through the read but still had to
252 * return the number of bytes that were actually read.
253 *
254 * We need to force an EIO, so try reading the rest of
255 * the block (if it was a partial block read) or the
256 * next full block.
257 */
258 io_max_size = rvp->miniosz - (sz % rvp->miniosz);
259 dbg_printf("SHORT %d READ @ %"PRIu64" %zu try for %zd\n",
260 rvp->disk->d_fd, rv->io_start, sz,
261 io_max_size);
262 } else {
263 /* We should never get back more bytes than we asked. */
264 assert(sz == len);
265 }
266
267 progress_add(sz);
268 if (read_error == 0)
269 verified += sz;
270 rv->io_start += sz;
271 rv->io_length -= sz;
272 background_sleep();
273 }
274
275 free(rv);
276 ret = ptcounter_add(rvp->verified_bytes, verified);
277 if (ret)
278 rvp->runtime_error = ret;
279 }
280
281 /* Queue a read verify request. */
282 static int
283 read_verify_queue(
284 struct read_verify_pool *rvp,
285 struct read_verify *rv)
286 {
287 struct read_verify *tmp;
288 bool ret;
289
290 dbg_printf("verify fd %d start %"PRIu64" len %"PRIu64"\n",
291 rvp->disk->d_fd, rv->io_start, rv->io_length);
292
293 /* Worker thread saw a runtime error, don't queue more. */
294 if (rvp->runtime_error)
295 return rvp->runtime_error;
296
297 /* Otherwise clone the request and queue the copy. */
298 tmp = malloc(sizeof(struct read_verify));
299 if (!tmp) {
300 rvp->runtime_error = errno;
301 return errno;
302 }
303
304 memcpy(tmp, rv, sizeof(*tmp));
305
306 ret = -workqueue_add(&rvp->wq, read_verify, 0, tmp);
307 if (ret) {
308 free(tmp);
309 rvp->runtime_error = ret;
310 return ret;
311 }
312
313 rv->io_length = 0;
314 return 0;
315 }
316
317 /*
318 * Issue an IO request. We'll batch subsequent requests if they're
319 * within 64k of each other
320 */
321 int
322 read_verify_schedule_io(
323 struct read_verify_pool *rvp,
324 uint64_t start,
325 uint64_t length,
326 void *end_arg)
327 {
328 struct read_verify *rv;
329 uint64_t req_end;
330 uint64_t rv_end;
331 int ret;
332
333 assert(rvp->readbuf);
334
335 /* Round up and down to the start of a miniosz chunk. */
336 start &= ~(rvp->miniosz - 1);
337 length = roundup(length, rvp->miniosz);
338
339 rv = ptvar_get(rvp->rvstate, &ret);
340 if (ret)
341 return -ret;
342 req_end = start + length;
343 rv_end = rv->io_start + rv->io_length;
344
345 /*
346 * If we have a stashed IO, we haven't changed fds, the error
347 * reporting is the same, and the two extents are close,
348 * we can combine them.
349 */
350 if (rv->io_length > 0 &&
351 end_arg == rv->io_end_arg &&
352 ((start >= rv->io_start && start <= rv_end + RVP_IO_BATCH_LOCALITY) ||
353 (rv->io_start >= start &&
354 rv->io_start <= req_end + RVP_IO_BATCH_LOCALITY))) {
355 rv->io_start = min(rv->io_start, start);
356 rv->io_length = max(req_end, rv_end) - rv->io_start;
357 } else {
358 /* Otherwise, issue the stashed IO (if there is one) */
359 if (rv->io_length > 0) {
360 int res;
361
362 res = read_verify_queue(rvp, rv);
363 if (res)
364 return res;
365 }
366
367 /* Stash the new IO. */
368 rv->io_start = start;
369 rv->io_length = length;
370 rv->io_end_arg = end_arg;
371 }
372
373 return 0;
374 }
375
376 /* Force any per-thread stashed IOs into the verifier. */
377 static int
378 force_one_io(
379 struct ptvar *ptv,
380 void *data,
381 void *foreach_arg)
382 {
383 struct read_verify_pool *rvp = foreach_arg;
384 struct read_verify *rv = data;
385
386 if (rv->io_length == 0)
387 return 0;
388
389 return -read_verify_queue(rvp, rv);
390 }
391
392 /* Force any stashed IOs into the verifier. */
393 int
394 read_verify_force_io(
395 struct read_verify_pool *rvp)
396 {
397 assert(rvp->readbuf);
398
399 return -ptvar_foreach(rvp->rvstate, force_one_io, rvp);
400 }
401
402 /* How many bytes has this process verified? */
403 int
404 read_verify_bytes(
405 struct read_verify_pool *rvp,
406 uint64_t *bytes_checked)
407 {
408 return ptcounter_value(rvp->verified_bytes, bytes_checked);
409 }