]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - scrub/read_verify.c
xfs_scrub: reclassify runtime errors
[thirdparty/xfsprogs-dev.git] / scrub / read_verify.c
CommitLineData
2000470d
DW
1/*
2 * Copyright (C) 2018 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include <stdio.h>
21#include <stdint.h>
22#include <stdbool.h>
23#include <stdlib.h>
24#include <sys/statvfs.h>
25#include "workqueue.h"
26#include "path.h"
27#include "xfs.h"
28#include "xfs_fs.h"
29#include "xfs_scrub.h"
30#include "common.h"
31#include "counter.h"
32#include "disk.h"
33#include "read_verify.h"
ed60d210 34#include "progress.h"
2000470d
DW
35
36/*
37 * Read Verify Pool
38 *
39 * Manages the data block read verification phase. The caller schedules
40 * verification requests, which are then scheduled to be run by a thread
41 * pool worker. Adjacent (or nearly adjacent) requests can be combined
42 * to reduce overhead when free space fragmentation is high. The thread
43 * pool takes care of issuing multiple IOs to the device, if possible.
44 */
45
46/*
47 * Perform all IO in 32M chunks. This cannot exceed 65536 sectors
48 * because that's the biggest SCSI VERIFY(16) we dare to send.
49 */
50#define RVP_IO_MAX_SIZE (33554432)
51#define RVP_IO_MAX_SECTORS (RVP_IO_MAX_SIZE >> BBSHIFT)
52
53/* Tolerate 64k holes in adjacent read verify requests. */
54#define RVP_IO_BATCH_LOCALITY (65536)
55
56struct read_verify_pool {
57 struct workqueue wq; /* thread pool */
58 struct scrub_ctx *ctx; /* scrub context */
59 void *readbuf; /* read buffer */
60 struct ptcounter *verified_bytes;
61 read_verify_ioerr_fn_t ioerr_fn; /* io error callback */
62 size_t miniosz; /* minimum io size, bytes */
63};
64
65/* Create a thread pool to run read verifiers. */
66struct read_verify_pool *
67read_verify_pool_init(
68 struct scrub_ctx *ctx,
69 size_t miniosz,
70 read_verify_ioerr_fn_t ioerr_fn,
71 unsigned int nproc)
72{
73 struct read_verify_pool *rvp;
74 bool ret;
75 int error;
76
77 rvp = calloc(1, sizeof(struct read_verify_pool));
78 if (!rvp)
79 return NULL;
80
81 error = posix_memalign((void **)&rvp->readbuf, page_size,
82 RVP_IO_MAX_SIZE);
83 if (error || !rvp->readbuf)
84 goto out_free;
85 rvp->verified_bytes = ptcounter_init(nproc);
86 if (!rvp->verified_bytes)
87 goto out_buf;
88 rvp->miniosz = miniosz;
89 rvp->ctx = ctx;
90 rvp->ioerr_fn = ioerr_fn;
91 /* Run in the main thread if we only want one thread. */
92 if (nproc == 1)
93 nproc = 0;
94 ret = workqueue_create(&rvp->wq, (struct xfs_mount *)rvp, nproc);
95 if (ret)
96 goto out_counter;
97 return rvp;
98
99out_counter:
100 ptcounter_free(rvp->verified_bytes);
101out_buf:
102 free(rvp->readbuf);
103out_free:
104 free(rvp);
105 return NULL;
106}
107
108/* Finish up any read verification work. */
109void
110read_verify_pool_flush(
111 struct read_verify_pool *rvp)
112{
113 workqueue_destroy(&rvp->wq);
114}
115
116/* Finish up any read verification work and tear it down. */
117void
118read_verify_pool_destroy(
119 struct read_verify_pool *rvp)
120{
121 ptcounter_free(rvp->verified_bytes);
122 free(rvp->readbuf);
123 free(rvp);
124}
125
126/*
127 * Issue a read-verify IO in big batches.
128 */
129static void
130read_verify(
131 struct workqueue *wq,
132 xfs_agnumber_t agno,
133 void *arg)
134{
135 struct read_verify *rv = arg;
136 struct read_verify_pool *rvp;
137 unsigned long long verified = 0;
138 ssize_t sz;
139 ssize_t len;
140
141 rvp = (struct read_verify_pool *)wq->wq_ctx;
142 while (rv->io_length > 0) {
143 len = min(rv->io_length, RVP_IO_MAX_SIZE);
144 dbg_printf("diskverify %d %"PRIu64" %zu\n", rv->io_disk->d_fd,
145 rv->io_start, len);
146 sz = disk_read_verify(rv->io_disk, rvp->readbuf,
147 rv->io_start, len);
148 if (sz < 0) {
149 dbg_printf("IOERR %d %"PRIu64" %zu\n",
150 rv->io_disk->d_fd,
151 rv->io_start, len);
152 /* IO error, so try the next logical block. */
153 len = rvp->miniosz;
154 rvp->ioerr_fn(rvp->ctx, rv->io_disk, rv->io_start, len,
155 errno, rv->io_end_arg);
156 }
157
ed60d210 158 progress_add(len);
2000470d
DW
159 verified += len;
160 rv->io_start += len;
161 rv->io_length -= len;
162 }
163
164 free(rv);
165 ptcounter_add(rvp->verified_bytes, verified);
166}
167
168/* Queue a read verify request. */
169static bool
170read_verify_queue(
171 struct read_verify_pool *rvp,
172 struct read_verify *rv)
173{
174 struct read_verify *tmp;
175 bool ret;
176
177 dbg_printf("verify fd %d start %"PRIu64" len %"PRIu64"\n",
178 rv->io_disk->d_fd, rv->io_start, rv->io_length);
179
180 tmp = malloc(sizeof(struct read_verify));
181 if (!tmp) {
182 rvp->ioerr_fn(rvp->ctx, rv->io_disk, rv->io_start,
183 rv->io_length, errno, rv->io_end_arg);
184 return true;
185 }
186 memcpy(tmp, rv, sizeof(*tmp));
187
188 ret = workqueue_add(&rvp->wq, read_verify, 0, tmp);
189 if (ret) {
82377bde 190 str_info(rvp->ctx, rvp->ctx->mntpoint,
2000470d
DW
191_("Could not queue read-verify work."));
192 free(tmp);
193 return false;
194 }
195 rv->io_length = 0;
196 return true;
197}
198
199/*
200 * Issue an IO request. We'll batch subsequent requests if they're
201 * within 64k of each other
202 */
203bool
204read_verify_schedule_io(
205 struct read_verify_pool *rvp,
206 struct read_verify *rv,
207 struct disk *disk,
208 uint64_t start,
209 uint64_t length,
210 void *end_arg)
211{
212 uint64_t req_end;
213 uint64_t rv_end;
214
215 assert(rvp->readbuf);
216 req_end = start + length;
217 rv_end = rv->io_start + rv->io_length;
218
219 /*
220 * If we have a stashed IO, we haven't changed fds, the error
221 * reporting is the same, and the two extents are close,
222 * we can combine them.
223 */
224 if (rv->io_length > 0 && disk == rv->io_disk &&
225 end_arg == rv->io_end_arg &&
226 ((start >= rv->io_start && start <= rv_end + RVP_IO_BATCH_LOCALITY) ||
227 (rv->io_start >= start &&
228 rv->io_start <= req_end + RVP_IO_BATCH_LOCALITY))) {
229 rv->io_start = min(rv->io_start, start);
230 rv->io_length = max(req_end, rv_end) - rv->io_start;
231 } else {
232 /* Otherwise, issue the stashed IO (if there is one) */
233 if (rv->io_length > 0)
234 return read_verify_queue(rvp, rv);
235
236 /* Stash the new IO. */
237 rv->io_disk = disk;
238 rv->io_start = start;
239 rv->io_length = length;
240 rv->io_end_arg = end_arg;
241 }
242
243 return true;
244}
245
246/* Force any stashed IOs into the verifier. */
247bool
248read_verify_force_io(
249 struct read_verify_pool *rvp,
250 struct read_verify *rv)
251{
252 bool moveon;
253
254 assert(rvp->readbuf);
255 if (rv->io_length == 0)
256 return true;
257
258 moveon = read_verify_queue(rvp, rv);
259 if (moveon)
260 rv->io_length = 0;
261 return moveon;
262}
263
264/* How many bytes has this process verified? */
265uint64_t
266read_verify_bytes(
267 struct read_verify_pool *rvp)
268{
269 return ptcounter_value(rvp->verified_bytes);
270}