]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - scrub/read_verify.c
libfrog: fix workqueue error communication problems
[thirdparty/xfsprogs-dev.git] / scrub / read_verify.c
CommitLineData
959ef981 1// SPDX-License-Identifier: GPL-2.0+
2000470d
DW
2/*
3 * Copyright (C) 2018 Oracle. All Rights Reserved.
2000470d 4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
2000470d 5 */
a440f877 6#include "xfs.h"
2000470d 7#include <stdint.h>
2000470d
DW
8#include <stdlib.h>
9#include <sys/statvfs.h>
14051909 10#include "libfrog/ptvar.h"
56598728 11#include "libfrog/workqueue.h"
42b4c8e8 12#include "libfrog/paths.h"
2000470d
DW
13#include "xfs_scrub.h"
14#include "common.h"
15#include "counter.h"
16#include "disk.h"
17#include "read_verify.h"
ed60d210 18#include "progress.h"
2000470d
DW
19
20/*
21 * Read Verify Pool
22 *
23 * Manages the data block read verification phase. The caller schedules
24 * verification requests, which are then scheduled to be run by a thread
25 * pool worker. Adjacent (or nearly adjacent) requests can be combined
26 * to reduce overhead when free space fragmentation is high. The thread
27 * pool takes care of issuing multiple IOs to the device, if possible.
28 */
29
30/*
31 * Perform all IO in 32M chunks. This cannot exceed 65536 sectors
32 * because that's the biggest SCSI VERIFY(16) we dare to send.
33 */
34#define RVP_IO_MAX_SIZE (33554432)
35#define RVP_IO_MAX_SECTORS (RVP_IO_MAX_SIZE >> BBSHIFT)
36
37/* Tolerate 64k holes in adjacent read verify requests. */
38#define RVP_IO_BATCH_LOCALITY (65536)
39
41c08606
DW
40struct read_verify {
41 void *io_end_arg;
42 struct disk *io_disk;
43 uint64_t io_start; /* bytes */
44 uint64_t io_length; /* bytes */
45};
46
2000470d
DW
47struct read_verify_pool {
48 struct workqueue wq; /* thread pool */
49 struct scrub_ctx *ctx; /* scrub context */
50 void *readbuf; /* read buffer */
51 struct ptcounter *verified_bytes;
41c08606 52 struct ptvar *rvstate; /* combines read requests */
f1bb1696 53 struct disk *disk; /* which disk? */
2000470d
DW
54 read_verify_ioerr_fn_t ioerr_fn; /* io error callback */
55 size_t miniosz; /* minimum io size, bytes */
56};
57
41c08606
DW
58/*
59 * Create a thread pool to run read verifiers.
60 *
f1bb1696 61 * @disk is the disk we want to verify.
41c08606
DW
62 * @miniosz is the minimum size of an IO to expect (in bytes).
63 * @ioerr_fn will be called when IO errors occur.
41c08606
DW
64 * @submitter_threads is the number of threads that may be sending verify
65 * requests at any given time.
66 */
2000470d
DW
67struct read_verify_pool *
68read_verify_pool_init(
69 struct scrub_ctx *ctx,
f1bb1696 70 struct disk *disk,
2000470d
DW
71 size_t miniosz,
72 read_verify_ioerr_fn_t ioerr_fn,
41c08606 73 unsigned int submitter_threads)
2000470d
DW
74{
75 struct read_verify_pool *rvp;
76 bool ret;
77 int error;
78
79 rvp = calloc(1, sizeof(struct read_verify_pool));
80 if (!rvp)
81 return NULL;
82
83 error = posix_memalign((void **)&rvp->readbuf, page_size,
84 RVP_IO_MAX_SIZE);
85 if (error || !rvp->readbuf)
86 goto out_free;
87 rvp->verified_bytes = ptcounter_init(nproc);
88 if (!rvp->verified_bytes)
89 goto out_buf;
90 rvp->miniosz = miniosz;
91 rvp->ctx = ctx;
f1bb1696 92 rvp->disk = disk;
2000470d 93 rvp->ioerr_fn = ioerr_fn;
41c08606
DW
94 rvp->rvstate = ptvar_init(submitter_threads,
95 sizeof(struct read_verify));
96 if (rvp->rvstate == NULL)
97 goto out_counter;
2000470d
DW
98 /* Run in the main thread if we only want one thread. */
99 if (nproc == 1)
100 nproc = 0;
f1bb1696
DW
101 ret = workqueue_create(&rvp->wq, (struct xfs_mount *)rvp,
102 disk_heads(disk));
2000470d 103 if (ret)
41c08606 104 goto out_rvstate;
2000470d
DW
105 return rvp;
106
41c08606
DW
107out_rvstate:
108 ptvar_free(rvp->rvstate);
2000470d
DW
109out_counter:
110 ptcounter_free(rvp->verified_bytes);
111out_buf:
112 free(rvp->readbuf);
113out_free:
114 free(rvp);
115 return NULL;
116}
117
118/* Finish up any read verification work. */
119void
120read_verify_pool_flush(
121 struct read_verify_pool *rvp)
122{
123 workqueue_destroy(&rvp->wq);
124}
125
126/* Finish up any read verification work and tear it down. */
127void
128read_verify_pool_destroy(
129 struct read_verify_pool *rvp)
130{
41c08606 131 ptvar_free(rvp->rvstate);
2000470d
DW
132 ptcounter_free(rvp->verified_bytes);
133 free(rvp->readbuf);
134 free(rvp);
135}
136
137/*
138 * Issue a read-verify IO in big batches.
139 */
140static void
141read_verify(
142 struct workqueue *wq,
143 xfs_agnumber_t agno,
144 void *arg)
145{
146 struct read_verify *rv = arg;
147 struct read_verify_pool *rvp;
148 unsigned long long verified = 0;
149 ssize_t sz;
150 ssize_t len;
151
152 rvp = (struct read_verify_pool *)wq->wq_ctx;
153 while (rv->io_length > 0) {
154 len = min(rv->io_length, RVP_IO_MAX_SIZE);
f1bb1696 155 dbg_printf("diskverify %d %"PRIu64" %zu\n", rvp->disk->d_fd,
2000470d 156 rv->io_start, len);
f1bb1696
DW
157 sz = disk_read_verify(rvp->disk, rvp->readbuf, rv->io_start,
158 len);
2000470d
DW
159 if (sz < 0) {
160 dbg_printf("IOERR %d %"PRIu64" %zu\n",
f1bb1696 161 rvp->disk->d_fd, rv->io_start, len);
2000470d
DW
162 /* IO error, so try the next logical block. */
163 len = rvp->miniosz;
f1bb1696 164 rvp->ioerr_fn(rvp->ctx, rvp->disk, rv->io_start, len,
2000470d
DW
165 errno, rv->io_end_arg);
166 }
167
ed60d210 168 progress_add(len);
2000470d
DW
169 verified += len;
170 rv->io_start += len;
171 rv->io_length -= len;
172 }
173
174 free(rv);
175 ptcounter_add(rvp->verified_bytes, verified);
176}
177
178/* Queue a read verify request. */
179static bool
180read_verify_queue(
181 struct read_verify_pool *rvp,
182 struct read_verify *rv)
183{
184 struct read_verify *tmp;
185 bool ret;
186
187 dbg_printf("verify fd %d start %"PRIu64" len %"PRIu64"\n",
f1bb1696 188 rvp->disk->d_fd, rv->io_start, rv->io_length);
2000470d
DW
189
190 tmp = malloc(sizeof(struct read_verify));
191 if (!tmp) {
f1bb1696 192 rvp->ioerr_fn(rvp->ctx, rvp->disk, rv->io_start,
2000470d
DW
193 rv->io_length, errno, rv->io_end_arg);
194 return true;
195 }
196 memcpy(tmp, rv, sizeof(*tmp));
197
198 ret = workqueue_add(&rvp->wq, read_verify, 0, tmp);
199 if (ret) {
9d57cbfc 200 str_liberror(rvp->ctx, ret, _("queueing read-verify work"));
2000470d
DW
201 free(tmp);
202 return false;
203 }
204 rv->io_length = 0;
205 return true;
206}
207
208/*
209 * Issue an IO request. We'll batch subsequent requests if they're
210 * within 64k of each other
211 */
212bool
213read_verify_schedule_io(
214 struct read_verify_pool *rvp,
2000470d
DW
215 uint64_t start,
216 uint64_t length,
217 void *end_arg)
218{
41c08606 219 struct read_verify *rv;
2000470d
DW
220 uint64_t req_end;
221 uint64_t rv_end;
222
223 assert(rvp->readbuf);
41c08606 224 rv = ptvar_get(rvp->rvstate);
2000470d
DW
225 req_end = start + length;
226 rv_end = rv->io_start + rv->io_length;
227
228 /*
229 * If we have a stashed IO, we haven't changed fds, the error
230 * reporting is the same, and the two extents are close,
231 * we can combine them.
232 */
f1bb1696 233 if (rv->io_length > 0 &&
2000470d
DW
234 end_arg == rv->io_end_arg &&
235 ((start >= rv->io_start && start <= rv_end + RVP_IO_BATCH_LOCALITY) ||
236 (rv->io_start >= start &&
237 rv->io_start <= req_end + RVP_IO_BATCH_LOCALITY))) {
238 rv->io_start = min(rv->io_start, start);
239 rv->io_length = max(req_end, rv_end) - rv->io_start;
240 } else {
241 /* Otherwise, issue the stashed IO (if there is one) */
242 if (rv->io_length > 0)
243 return read_verify_queue(rvp, rv);
244
245 /* Stash the new IO. */
2000470d
DW
246 rv->io_start = start;
247 rv->io_length = length;
248 rv->io_end_arg = end_arg;
249 }
250
251 return true;
252}
253
254/* Force any stashed IOs into the verifier. */
255bool
256read_verify_force_io(
41c08606 257 struct read_verify_pool *rvp)
2000470d 258{
41c08606 259 struct read_verify *rv;
2000470d
DW
260 bool moveon;
261
262 assert(rvp->readbuf);
41c08606 263 rv = ptvar_get(rvp->rvstate);
2000470d
DW
264 if (rv->io_length == 0)
265 return true;
266
267 moveon = read_verify_queue(rvp, rv);
268 if (moveon)
269 rv->io_length = 0;
270 return moveon;
271}
272
273/* How many bytes has this process verified? */
274uint64_t
275read_verify_bytes(
276 struct read_verify_pool *rvp)
277{
278 return ptcounter_value(rvp->verified_bytes);
279}