]>
Commit | Line | Data |
---|---|---|
2000470d DW |
1 | /* |
2 | * Copyright (C) 2018 Oracle. All Rights Reserved. | |
3 | * | |
4 | * Author: Darrick J. Wong <darrick.wong@oracle.com> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License | |
8 | * as published by the Free Software Foundation; either version 2 | |
9 | * of the License, or (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it would be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write the Free Software Foundation, | |
18 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
19 | */ | |
20 | #include <stdio.h> | |
21 | #include <stdint.h> | |
22 | #include <stdbool.h> | |
23 | #include <stdlib.h> | |
24 | #include <sys/statvfs.h> | |
25 | #include "workqueue.h" | |
26 | #include "path.h" | |
27 | #include "xfs.h" | |
28 | #include "xfs_fs.h" | |
29 | #include "xfs_scrub.h" | |
30 | #include "common.h" | |
31 | #include "counter.h" | |
32 | #include "disk.h" | |
33 | #include "read_verify.h" | |
ed60d210 | 34 | #include "progress.h" |
2000470d DW |
35 | |
36 | /* | |
37 | * Read Verify Pool | |
38 | * | |
39 | * Manages the data block read verification phase. The caller schedules | |
40 | * verification requests, which are then scheduled to be run by a thread | |
41 | * pool worker. Adjacent (or nearly adjacent) requests can be combined | |
42 | * to reduce overhead when free space fragmentation is high. The thread | |
43 | * pool takes care of issuing multiple IOs to the device, if possible. | |
44 | */ | |
45 | ||
46 | /* | |
47 | * Perform all IO in 32M chunks. This cannot exceed 65536 sectors | |
48 | * because that's the biggest SCSI VERIFY(16) we dare to send. | |
49 | */ | |
50 | #define RVP_IO_MAX_SIZE (33554432) | |
51 | #define RVP_IO_MAX_SECTORS (RVP_IO_MAX_SIZE >> BBSHIFT) | |
52 | ||
53 | /* Tolerate 64k holes in adjacent read verify requests. */ | |
54 | #define RVP_IO_BATCH_LOCALITY (65536) | |
55 | ||
56 | struct read_verify_pool { | |
57 | struct workqueue wq; /* thread pool */ | |
58 | struct scrub_ctx *ctx; /* scrub context */ | |
59 | void *readbuf; /* read buffer */ | |
60 | struct ptcounter *verified_bytes; | |
61 | read_verify_ioerr_fn_t ioerr_fn; /* io error callback */ | |
62 | size_t miniosz; /* minimum io size, bytes */ | |
63 | }; | |
64 | ||
65 | /* Create a thread pool to run read verifiers. */ | |
66 | struct read_verify_pool * | |
67 | read_verify_pool_init( | |
68 | struct scrub_ctx *ctx, | |
69 | size_t miniosz, | |
70 | read_verify_ioerr_fn_t ioerr_fn, | |
71 | unsigned int nproc) | |
72 | { | |
73 | struct read_verify_pool *rvp; | |
74 | bool ret; | |
75 | int error; | |
76 | ||
77 | rvp = calloc(1, sizeof(struct read_verify_pool)); | |
78 | if (!rvp) | |
79 | return NULL; | |
80 | ||
81 | error = posix_memalign((void **)&rvp->readbuf, page_size, | |
82 | RVP_IO_MAX_SIZE); | |
83 | if (error || !rvp->readbuf) | |
84 | goto out_free; | |
85 | rvp->verified_bytes = ptcounter_init(nproc); | |
86 | if (!rvp->verified_bytes) | |
87 | goto out_buf; | |
88 | rvp->miniosz = miniosz; | |
89 | rvp->ctx = ctx; | |
90 | rvp->ioerr_fn = ioerr_fn; | |
91 | /* Run in the main thread if we only want one thread. */ | |
92 | if (nproc == 1) | |
93 | nproc = 0; | |
94 | ret = workqueue_create(&rvp->wq, (struct xfs_mount *)rvp, nproc); | |
95 | if (ret) | |
96 | goto out_counter; | |
97 | return rvp; | |
98 | ||
99 | out_counter: | |
100 | ptcounter_free(rvp->verified_bytes); | |
101 | out_buf: | |
102 | free(rvp->readbuf); | |
103 | out_free: | |
104 | free(rvp); | |
105 | return NULL; | |
106 | } | |
107 | ||
108 | /* Finish up any read verification work. */ | |
109 | void | |
110 | read_verify_pool_flush( | |
111 | struct read_verify_pool *rvp) | |
112 | { | |
113 | workqueue_destroy(&rvp->wq); | |
114 | } | |
115 | ||
116 | /* Finish up any read verification work and tear it down. */ | |
117 | void | |
118 | read_verify_pool_destroy( | |
119 | struct read_verify_pool *rvp) | |
120 | { | |
121 | ptcounter_free(rvp->verified_bytes); | |
122 | free(rvp->readbuf); | |
123 | free(rvp); | |
124 | } | |
125 | ||
126 | /* | |
127 | * Issue a read-verify IO in big batches. | |
128 | */ | |
129 | static void | |
130 | read_verify( | |
131 | struct workqueue *wq, | |
132 | xfs_agnumber_t agno, | |
133 | void *arg) | |
134 | { | |
135 | struct read_verify *rv = arg; | |
136 | struct read_verify_pool *rvp; | |
137 | unsigned long long verified = 0; | |
138 | ssize_t sz; | |
139 | ssize_t len; | |
140 | ||
141 | rvp = (struct read_verify_pool *)wq->wq_ctx; | |
142 | while (rv->io_length > 0) { | |
143 | len = min(rv->io_length, RVP_IO_MAX_SIZE); | |
144 | dbg_printf("diskverify %d %"PRIu64" %zu\n", rv->io_disk->d_fd, | |
145 | rv->io_start, len); | |
146 | sz = disk_read_verify(rv->io_disk, rvp->readbuf, | |
147 | rv->io_start, len); | |
148 | if (sz < 0) { | |
149 | dbg_printf("IOERR %d %"PRIu64" %zu\n", | |
150 | rv->io_disk->d_fd, | |
151 | rv->io_start, len); | |
152 | /* IO error, so try the next logical block. */ | |
153 | len = rvp->miniosz; | |
154 | rvp->ioerr_fn(rvp->ctx, rv->io_disk, rv->io_start, len, | |
155 | errno, rv->io_end_arg); | |
156 | } | |
157 | ||
ed60d210 | 158 | progress_add(len); |
2000470d DW |
159 | verified += len; |
160 | rv->io_start += len; | |
161 | rv->io_length -= len; | |
162 | } | |
163 | ||
164 | free(rv); | |
165 | ptcounter_add(rvp->verified_bytes, verified); | |
166 | } | |
167 | ||
168 | /* Queue a read verify request. */ | |
169 | static bool | |
170 | read_verify_queue( | |
171 | struct read_verify_pool *rvp, | |
172 | struct read_verify *rv) | |
173 | { | |
174 | struct read_verify *tmp; | |
175 | bool ret; | |
176 | ||
177 | dbg_printf("verify fd %d start %"PRIu64" len %"PRIu64"\n", | |
178 | rv->io_disk->d_fd, rv->io_start, rv->io_length); | |
179 | ||
180 | tmp = malloc(sizeof(struct read_verify)); | |
181 | if (!tmp) { | |
182 | rvp->ioerr_fn(rvp->ctx, rv->io_disk, rv->io_start, | |
183 | rv->io_length, errno, rv->io_end_arg); | |
184 | return true; | |
185 | } | |
186 | memcpy(tmp, rv, sizeof(*tmp)); | |
187 | ||
188 | ret = workqueue_add(&rvp->wq, read_verify, 0, tmp); | |
189 | if (ret) { | |
82377bde | 190 | str_info(rvp->ctx, rvp->ctx->mntpoint, |
2000470d DW |
191 | _("Could not queue read-verify work.")); |
192 | free(tmp); | |
193 | return false; | |
194 | } | |
195 | rv->io_length = 0; | |
196 | return true; | |
197 | } | |
198 | ||
199 | /* | |
200 | * Issue an IO request. We'll batch subsequent requests if they're | |
201 | * within 64k of each other | |
202 | */ | |
203 | bool | |
204 | read_verify_schedule_io( | |
205 | struct read_verify_pool *rvp, | |
206 | struct read_verify *rv, | |
207 | struct disk *disk, | |
208 | uint64_t start, | |
209 | uint64_t length, | |
210 | void *end_arg) | |
211 | { | |
212 | uint64_t req_end; | |
213 | uint64_t rv_end; | |
214 | ||
215 | assert(rvp->readbuf); | |
216 | req_end = start + length; | |
217 | rv_end = rv->io_start + rv->io_length; | |
218 | ||
219 | /* | |
220 | * If we have a stashed IO, we haven't changed fds, the error | |
221 | * reporting is the same, and the two extents are close, | |
222 | * we can combine them. | |
223 | */ | |
224 | if (rv->io_length > 0 && disk == rv->io_disk && | |
225 | end_arg == rv->io_end_arg && | |
226 | ((start >= rv->io_start && start <= rv_end + RVP_IO_BATCH_LOCALITY) || | |
227 | (rv->io_start >= start && | |
228 | rv->io_start <= req_end + RVP_IO_BATCH_LOCALITY))) { | |
229 | rv->io_start = min(rv->io_start, start); | |
230 | rv->io_length = max(req_end, rv_end) - rv->io_start; | |
231 | } else { | |
232 | /* Otherwise, issue the stashed IO (if there is one) */ | |
233 | if (rv->io_length > 0) | |
234 | return read_verify_queue(rvp, rv); | |
235 | ||
236 | /* Stash the new IO. */ | |
237 | rv->io_disk = disk; | |
238 | rv->io_start = start; | |
239 | rv->io_length = length; | |
240 | rv->io_end_arg = end_arg; | |
241 | } | |
242 | ||
243 | return true; | |
244 | } | |
245 | ||
246 | /* Force any stashed IOs into the verifier. */ | |
247 | bool | |
248 | read_verify_force_io( | |
249 | struct read_verify_pool *rvp, | |
250 | struct read_verify *rv) | |
251 | { | |
252 | bool moveon; | |
253 | ||
254 | assert(rvp->readbuf); | |
255 | if (rv->io_length == 0) | |
256 | return true; | |
257 | ||
258 | moveon = read_verify_queue(rvp, rv); | |
259 | if (moveon) | |
260 | rv->io_length = 0; | |
261 | return moveon; | |
262 | } | |
263 | ||
264 | /* How many bytes has this process verified? */ | |
265 | uint64_t | |
266 | read_verify_bytes( | |
267 | struct read_verify_pool *rvp) | |
268 | { | |
269 | return ptcounter_value(rvp->verified_bytes); | |
270 | } |