]>
Commit | Line | Data |
---|---|---|
959ef981 | 1 | // SPDX-License-Identifier: GPL-2.0+ |
2000470d DW |
2 | /* |
3 | * Copyright (C) 2018 Oracle. All Rights Reserved. | |
2000470d | 4 | * Author: Darrick J. Wong <darrick.wong@oracle.com> |
2000470d | 5 | */ |
a440f877 | 6 | #include "xfs.h" |
2000470d | 7 | #include <stdint.h> |
2000470d DW |
8 | #include <stdlib.h> |
9 | #include <sys/statvfs.h> | |
14051909 | 10 | #include "libfrog/ptvar.h" |
56598728 | 11 | #include "libfrog/workqueue.h" |
42b4c8e8 | 12 | #include "libfrog/paths.h" |
2000470d DW |
13 | #include "xfs_scrub.h" |
14 | #include "common.h" | |
15 | #include "counter.h" | |
16 | #include "disk.h" | |
17 | #include "read_verify.h" | |
ed60d210 | 18 | #include "progress.h" |
2000470d DW |
19 | |
20 | /* | |
21 | * Read Verify Pool | |
22 | * | |
23 | * Manages the data block read verification phase. The caller schedules | |
24 | * verification requests, which are then scheduled to be run by a thread | |
25 | * pool worker. Adjacent (or nearly adjacent) requests can be combined | |
26 | * to reduce overhead when free space fragmentation is high. The thread | |
27 | * pool takes care of issuing multiple IOs to the device, if possible. | |
28 | */ | |
29 | ||
30 | /* | |
31 | * Perform all IO in 32M chunks. This cannot exceed 65536 sectors | |
32 | * because that's the biggest SCSI VERIFY(16) we dare to send. | |
33 | */ | |
34 | #define RVP_IO_MAX_SIZE (33554432) | |
35 | #define RVP_IO_MAX_SECTORS (RVP_IO_MAX_SIZE >> BBSHIFT) | |
36 | ||
37 | /* Tolerate 64k holes in adjacent read verify requests. */ | |
38 | #define RVP_IO_BATCH_LOCALITY (65536) | |
39 | ||
41c08606 DW |
40 | struct read_verify { |
41 | void *io_end_arg; | |
42 | struct disk *io_disk; | |
43 | uint64_t io_start; /* bytes */ | |
44 | uint64_t io_length; /* bytes */ | |
45 | }; | |
46 | ||
2000470d DW |
47 | struct read_verify_pool { |
48 | struct workqueue wq; /* thread pool */ | |
49 | struct scrub_ctx *ctx; /* scrub context */ | |
50 | void *readbuf; /* read buffer */ | |
51 | struct ptcounter *verified_bytes; | |
41c08606 | 52 | struct ptvar *rvstate; /* combines read requests */ |
f1bb1696 | 53 | struct disk *disk; /* which disk? */ |
2000470d DW |
54 | read_verify_ioerr_fn_t ioerr_fn; /* io error callback */ |
55 | size_t miniosz; /* minimum io size, bytes */ | |
56 | }; | |
57 | ||
41c08606 DW |
58 | /* |
59 | * Create a thread pool to run read verifiers. | |
60 | * | |
f1bb1696 | 61 | * @disk is the disk we want to verify. |
41c08606 DW |
62 | * @miniosz is the minimum size of an IO to expect (in bytes). |
63 | * @ioerr_fn will be called when IO errors occur. | |
41c08606 DW |
64 | * @submitter_threads is the number of threads that may be sending verify |
65 | * requests at any given time. | |
66 | */ | |
2000470d DW |
67 | struct read_verify_pool * |
68 | read_verify_pool_init( | |
69 | struct scrub_ctx *ctx, | |
f1bb1696 | 70 | struct disk *disk, |
2000470d DW |
71 | size_t miniosz, |
72 | read_verify_ioerr_fn_t ioerr_fn, | |
41c08606 | 73 | unsigned int submitter_threads) |
2000470d DW |
74 | { |
75 | struct read_verify_pool *rvp; | |
76 | bool ret; | |
77 | int error; | |
78 | ||
79 | rvp = calloc(1, sizeof(struct read_verify_pool)); | |
80 | if (!rvp) | |
81 | return NULL; | |
82 | ||
83 | error = posix_memalign((void **)&rvp->readbuf, page_size, | |
84 | RVP_IO_MAX_SIZE); | |
85 | if (error || !rvp->readbuf) | |
86 | goto out_free; | |
87 | rvp->verified_bytes = ptcounter_init(nproc); | |
88 | if (!rvp->verified_bytes) | |
89 | goto out_buf; | |
90 | rvp->miniosz = miniosz; | |
91 | rvp->ctx = ctx; | |
f1bb1696 | 92 | rvp->disk = disk; |
2000470d | 93 | rvp->ioerr_fn = ioerr_fn; |
41c08606 DW |
94 | rvp->rvstate = ptvar_init(submitter_threads, |
95 | sizeof(struct read_verify)); | |
96 | if (rvp->rvstate == NULL) | |
97 | goto out_counter; | |
2000470d DW |
98 | /* Run in the main thread if we only want one thread. */ |
99 | if (nproc == 1) | |
100 | nproc = 0; | |
f1bb1696 DW |
101 | ret = workqueue_create(&rvp->wq, (struct xfs_mount *)rvp, |
102 | disk_heads(disk)); | |
2000470d | 103 | if (ret) |
41c08606 | 104 | goto out_rvstate; |
2000470d DW |
105 | return rvp; |
106 | ||
41c08606 DW |
107 | out_rvstate: |
108 | ptvar_free(rvp->rvstate); | |
2000470d DW |
109 | out_counter: |
110 | ptcounter_free(rvp->verified_bytes); | |
111 | out_buf: | |
112 | free(rvp->readbuf); | |
113 | out_free: | |
114 | free(rvp); | |
115 | return NULL; | |
116 | } | |
117 | ||
118 | /* Finish up any read verification work. */ | |
119 | void | |
120 | read_verify_pool_flush( | |
121 | struct read_verify_pool *rvp) | |
122 | { | |
123 | workqueue_destroy(&rvp->wq); | |
124 | } | |
125 | ||
126 | /* Finish up any read verification work and tear it down. */ | |
127 | void | |
128 | read_verify_pool_destroy( | |
129 | struct read_verify_pool *rvp) | |
130 | { | |
41c08606 | 131 | ptvar_free(rvp->rvstate); |
2000470d DW |
132 | ptcounter_free(rvp->verified_bytes); |
133 | free(rvp->readbuf); | |
134 | free(rvp); | |
135 | } | |
136 | ||
137 | /* | |
138 | * Issue a read-verify IO in big batches. | |
139 | */ | |
140 | static void | |
141 | read_verify( | |
142 | struct workqueue *wq, | |
143 | xfs_agnumber_t agno, | |
144 | void *arg) | |
145 | { | |
146 | struct read_verify *rv = arg; | |
147 | struct read_verify_pool *rvp; | |
148 | unsigned long long verified = 0; | |
149 | ssize_t sz; | |
150 | ssize_t len; | |
151 | ||
152 | rvp = (struct read_verify_pool *)wq->wq_ctx; | |
153 | while (rv->io_length > 0) { | |
154 | len = min(rv->io_length, RVP_IO_MAX_SIZE); | |
f1bb1696 | 155 | dbg_printf("diskverify %d %"PRIu64" %zu\n", rvp->disk->d_fd, |
2000470d | 156 | rv->io_start, len); |
f1bb1696 DW |
157 | sz = disk_read_verify(rvp->disk, rvp->readbuf, rv->io_start, |
158 | len); | |
2000470d DW |
159 | if (sz < 0) { |
160 | dbg_printf("IOERR %d %"PRIu64" %zu\n", | |
f1bb1696 | 161 | rvp->disk->d_fd, rv->io_start, len); |
2000470d DW |
162 | /* IO error, so try the next logical block. */ |
163 | len = rvp->miniosz; | |
f1bb1696 | 164 | rvp->ioerr_fn(rvp->ctx, rvp->disk, rv->io_start, len, |
2000470d DW |
165 | errno, rv->io_end_arg); |
166 | } | |
167 | ||
ed60d210 | 168 | progress_add(len); |
2000470d DW |
169 | verified += len; |
170 | rv->io_start += len; | |
171 | rv->io_length -= len; | |
172 | } | |
173 | ||
174 | free(rv); | |
175 | ptcounter_add(rvp->verified_bytes, verified); | |
176 | } | |
177 | ||
178 | /* Queue a read verify request. */ | |
179 | static bool | |
180 | read_verify_queue( | |
181 | struct read_verify_pool *rvp, | |
182 | struct read_verify *rv) | |
183 | { | |
184 | struct read_verify *tmp; | |
185 | bool ret; | |
186 | ||
187 | dbg_printf("verify fd %d start %"PRIu64" len %"PRIu64"\n", | |
f1bb1696 | 188 | rvp->disk->d_fd, rv->io_start, rv->io_length); |
2000470d DW |
189 | |
190 | tmp = malloc(sizeof(struct read_verify)); | |
191 | if (!tmp) { | |
f1bb1696 | 192 | rvp->ioerr_fn(rvp->ctx, rvp->disk, rv->io_start, |
2000470d DW |
193 | rv->io_length, errno, rv->io_end_arg); |
194 | return true; | |
195 | } | |
196 | memcpy(tmp, rv, sizeof(*tmp)); | |
197 | ||
198 | ret = workqueue_add(&rvp->wq, read_verify, 0, tmp); | |
199 | if (ret) { | |
9d57cbfc | 200 | str_liberror(rvp->ctx, ret, _("queueing read-verify work")); |
2000470d DW |
201 | free(tmp); |
202 | return false; | |
203 | } | |
204 | rv->io_length = 0; | |
205 | return true; | |
206 | } | |
207 | ||
208 | /* | |
209 | * Issue an IO request. We'll batch subsequent requests if they're | |
210 | * within 64k of each other | |
211 | */ | |
212 | bool | |
213 | read_verify_schedule_io( | |
214 | struct read_verify_pool *rvp, | |
2000470d DW |
215 | uint64_t start, |
216 | uint64_t length, | |
217 | void *end_arg) | |
218 | { | |
41c08606 | 219 | struct read_verify *rv; |
2000470d DW |
220 | uint64_t req_end; |
221 | uint64_t rv_end; | |
222 | ||
223 | assert(rvp->readbuf); | |
41c08606 | 224 | rv = ptvar_get(rvp->rvstate); |
2000470d DW |
225 | req_end = start + length; |
226 | rv_end = rv->io_start + rv->io_length; | |
227 | ||
228 | /* | |
229 | * If we have a stashed IO, we haven't changed fds, the error | |
230 | * reporting is the same, and the two extents are close, | |
231 | * we can combine them. | |
232 | */ | |
f1bb1696 | 233 | if (rv->io_length > 0 && |
2000470d DW |
234 | end_arg == rv->io_end_arg && |
235 | ((start >= rv->io_start && start <= rv_end + RVP_IO_BATCH_LOCALITY) || | |
236 | (rv->io_start >= start && | |
237 | rv->io_start <= req_end + RVP_IO_BATCH_LOCALITY))) { | |
238 | rv->io_start = min(rv->io_start, start); | |
239 | rv->io_length = max(req_end, rv_end) - rv->io_start; | |
240 | } else { | |
241 | /* Otherwise, issue the stashed IO (if there is one) */ | |
242 | if (rv->io_length > 0) | |
243 | return read_verify_queue(rvp, rv); | |
244 | ||
245 | /* Stash the new IO. */ | |
2000470d DW |
246 | rv->io_start = start; |
247 | rv->io_length = length; | |
248 | rv->io_end_arg = end_arg; | |
249 | } | |
250 | ||
251 | return true; | |
252 | } | |
253 | ||
254 | /* Force any stashed IOs into the verifier. */ | |
255 | bool | |
256 | read_verify_force_io( | |
41c08606 | 257 | struct read_verify_pool *rvp) |
2000470d | 258 | { |
41c08606 | 259 | struct read_verify *rv; |
2000470d DW |
260 | bool moveon; |
261 | ||
262 | assert(rvp->readbuf); | |
41c08606 | 263 | rv = ptvar_get(rvp->rvstate); |
2000470d DW |
264 | if (rv->io_length == 0) |
265 | return true; | |
266 | ||
267 | moveon = read_verify_queue(rvp, rv); | |
268 | if (moveon) | |
269 | rv->io_length = 0; | |
270 | return moveon; | |
271 | } | |
272 | ||
273 | /* How many bytes has this process verified? */ | |
274 | uint64_t | |
275 | read_verify_bytes( | |
276 | struct read_verify_pool *rvp) | |
277 | { | |
278 | return ptcounter_value(rvp->verified_bytes); | |
279 | } |