]>
Commit | Line | Data |
---|---|---|
959ef981 | 1 | // SPDX-License-Identifier: GPL-2.0+ |
2000470d DW |
2 | /* |
3 | * Copyright (C) 2018 Oracle. All Rights Reserved. | |
2000470d | 4 | * Author: Darrick J. Wong <darrick.wong@oracle.com> |
2000470d | 5 | */ |
a440f877 | 6 | #include "xfs.h" |
2000470d | 7 | #include <stdint.h> |
2000470d DW |
8 | #include <stdlib.h> |
9 | #include <sys/statvfs.h> | |
10 | #include "workqueue.h" | |
11 | #include "path.h" | |
2000470d DW |
12 | #include "xfs_scrub.h" |
13 | #include "common.h" | |
14 | #include "counter.h" | |
15 | #include "disk.h" | |
16 | #include "read_verify.h" | |
ed60d210 | 17 | #include "progress.h" |
2000470d DW |
18 | |
19 | /* | |
20 | * Read Verify Pool | |
21 | * | |
22 | * Manages the data block read verification phase. The caller schedules | |
23 | * verification requests, which are then scheduled to be run by a thread | |
24 | * pool worker. Adjacent (or nearly adjacent) requests can be combined | |
25 | * to reduce overhead when free space fragmentation is high. The thread | |
26 | * pool takes care of issuing multiple IOs to the device, if possible. | |
27 | */ | |
28 | ||
29 | /* | |
30 | * Perform all IO in 32M chunks. This cannot exceed 65536 sectors | |
31 | * because that's the biggest SCSI VERIFY(16) we dare to send. | |
32 | */ | |
33 | #define RVP_IO_MAX_SIZE (33554432) | |
34 | #define RVP_IO_MAX_SECTORS (RVP_IO_MAX_SIZE >> BBSHIFT) | |
35 | ||
36 | /* Tolerate 64k holes in adjacent read verify requests. */ | |
37 | #define RVP_IO_BATCH_LOCALITY (65536) | |
38 | ||
39 | struct read_verify_pool { | |
40 | struct workqueue wq; /* thread pool */ | |
41 | struct scrub_ctx *ctx; /* scrub context */ | |
42 | void *readbuf; /* read buffer */ | |
43 | struct ptcounter *verified_bytes; | |
44 | read_verify_ioerr_fn_t ioerr_fn; /* io error callback */ | |
45 | size_t miniosz; /* minimum io size, bytes */ | |
46 | }; | |
47 | ||
48 | /* Create a thread pool to run read verifiers. */ | |
49 | struct read_verify_pool * | |
50 | read_verify_pool_init( | |
51 | struct scrub_ctx *ctx, | |
52 | size_t miniosz, | |
53 | read_verify_ioerr_fn_t ioerr_fn, | |
54 | unsigned int nproc) | |
55 | { | |
56 | struct read_verify_pool *rvp; | |
57 | bool ret; | |
58 | int error; | |
59 | ||
60 | rvp = calloc(1, sizeof(struct read_verify_pool)); | |
61 | if (!rvp) | |
62 | return NULL; | |
63 | ||
64 | error = posix_memalign((void **)&rvp->readbuf, page_size, | |
65 | RVP_IO_MAX_SIZE); | |
66 | if (error || !rvp->readbuf) | |
67 | goto out_free; | |
68 | rvp->verified_bytes = ptcounter_init(nproc); | |
69 | if (!rvp->verified_bytes) | |
70 | goto out_buf; | |
71 | rvp->miniosz = miniosz; | |
72 | rvp->ctx = ctx; | |
73 | rvp->ioerr_fn = ioerr_fn; | |
74 | /* Run in the main thread if we only want one thread. */ | |
75 | if (nproc == 1) | |
76 | nproc = 0; | |
77 | ret = workqueue_create(&rvp->wq, (struct xfs_mount *)rvp, nproc); | |
78 | if (ret) | |
79 | goto out_counter; | |
80 | return rvp; | |
81 | ||
82 | out_counter: | |
83 | ptcounter_free(rvp->verified_bytes); | |
84 | out_buf: | |
85 | free(rvp->readbuf); | |
86 | out_free: | |
87 | free(rvp); | |
88 | return NULL; | |
89 | } | |
90 | ||
91 | /* Finish up any read verification work. */ | |
92 | void | |
93 | read_verify_pool_flush( | |
94 | struct read_verify_pool *rvp) | |
95 | { | |
96 | workqueue_destroy(&rvp->wq); | |
97 | } | |
98 | ||
99 | /* Finish up any read verification work and tear it down. */ | |
100 | void | |
101 | read_verify_pool_destroy( | |
102 | struct read_verify_pool *rvp) | |
103 | { | |
104 | ptcounter_free(rvp->verified_bytes); | |
105 | free(rvp->readbuf); | |
106 | free(rvp); | |
107 | } | |
108 | ||
109 | /* | |
110 | * Issue a read-verify IO in big batches. | |
111 | */ | |
112 | static void | |
113 | read_verify( | |
114 | struct workqueue *wq, | |
115 | xfs_agnumber_t agno, | |
116 | void *arg) | |
117 | { | |
118 | struct read_verify *rv = arg; | |
119 | struct read_verify_pool *rvp; | |
120 | unsigned long long verified = 0; | |
121 | ssize_t sz; | |
122 | ssize_t len; | |
123 | ||
124 | rvp = (struct read_verify_pool *)wq->wq_ctx; | |
125 | while (rv->io_length > 0) { | |
126 | len = min(rv->io_length, RVP_IO_MAX_SIZE); | |
127 | dbg_printf("diskverify %d %"PRIu64" %zu\n", rv->io_disk->d_fd, | |
128 | rv->io_start, len); | |
129 | sz = disk_read_verify(rv->io_disk, rvp->readbuf, | |
130 | rv->io_start, len); | |
131 | if (sz < 0) { | |
132 | dbg_printf("IOERR %d %"PRIu64" %zu\n", | |
133 | rv->io_disk->d_fd, | |
134 | rv->io_start, len); | |
135 | /* IO error, so try the next logical block. */ | |
136 | len = rvp->miniosz; | |
137 | rvp->ioerr_fn(rvp->ctx, rv->io_disk, rv->io_start, len, | |
138 | errno, rv->io_end_arg); | |
139 | } | |
140 | ||
ed60d210 | 141 | progress_add(len); |
2000470d DW |
142 | verified += len; |
143 | rv->io_start += len; | |
144 | rv->io_length -= len; | |
145 | } | |
146 | ||
147 | free(rv); | |
148 | ptcounter_add(rvp->verified_bytes, verified); | |
149 | } | |
150 | ||
151 | /* Queue a read verify request. */ | |
152 | static bool | |
153 | read_verify_queue( | |
154 | struct read_verify_pool *rvp, | |
155 | struct read_verify *rv) | |
156 | { | |
157 | struct read_verify *tmp; | |
158 | bool ret; | |
159 | ||
160 | dbg_printf("verify fd %d start %"PRIu64" len %"PRIu64"\n", | |
161 | rv->io_disk->d_fd, rv->io_start, rv->io_length); | |
162 | ||
163 | tmp = malloc(sizeof(struct read_verify)); | |
164 | if (!tmp) { | |
165 | rvp->ioerr_fn(rvp->ctx, rv->io_disk, rv->io_start, | |
166 | rv->io_length, errno, rv->io_end_arg); | |
167 | return true; | |
168 | } | |
169 | memcpy(tmp, rv, sizeof(*tmp)); | |
170 | ||
171 | ret = workqueue_add(&rvp->wq, read_verify, 0, tmp); | |
172 | if (ret) { | |
82377bde | 173 | str_info(rvp->ctx, rvp->ctx->mntpoint, |
2000470d DW |
174 | _("Could not queue read-verify work.")); |
175 | free(tmp); | |
176 | return false; | |
177 | } | |
178 | rv->io_length = 0; | |
179 | return true; | |
180 | } | |
181 | ||
182 | /* | |
183 | * Issue an IO request. We'll batch subsequent requests if they're | |
184 | * within 64k of each other | |
185 | */ | |
186 | bool | |
187 | read_verify_schedule_io( | |
188 | struct read_verify_pool *rvp, | |
189 | struct read_verify *rv, | |
190 | struct disk *disk, | |
191 | uint64_t start, | |
192 | uint64_t length, | |
193 | void *end_arg) | |
194 | { | |
195 | uint64_t req_end; | |
196 | uint64_t rv_end; | |
197 | ||
198 | assert(rvp->readbuf); | |
199 | req_end = start + length; | |
200 | rv_end = rv->io_start + rv->io_length; | |
201 | ||
202 | /* | |
203 | * If we have a stashed IO, we haven't changed fds, the error | |
204 | * reporting is the same, and the two extents are close, | |
205 | * we can combine them. | |
206 | */ | |
207 | if (rv->io_length > 0 && disk == rv->io_disk && | |
208 | end_arg == rv->io_end_arg && | |
209 | ((start >= rv->io_start && start <= rv_end + RVP_IO_BATCH_LOCALITY) || | |
210 | (rv->io_start >= start && | |
211 | rv->io_start <= req_end + RVP_IO_BATCH_LOCALITY))) { | |
212 | rv->io_start = min(rv->io_start, start); | |
213 | rv->io_length = max(req_end, rv_end) - rv->io_start; | |
214 | } else { | |
215 | /* Otherwise, issue the stashed IO (if there is one) */ | |
216 | if (rv->io_length > 0) | |
217 | return read_verify_queue(rvp, rv); | |
218 | ||
219 | /* Stash the new IO. */ | |
220 | rv->io_disk = disk; | |
221 | rv->io_start = start; | |
222 | rv->io_length = length; | |
223 | rv->io_end_arg = end_arg; | |
224 | } | |
225 | ||
226 | return true; | |
227 | } | |
228 | ||
229 | /* Force any stashed IOs into the verifier. */ | |
230 | bool | |
231 | read_verify_force_io( | |
232 | struct read_verify_pool *rvp, | |
233 | struct read_verify *rv) | |
234 | { | |
235 | bool moveon; | |
236 | ||
237 | assert(rvp->readbuf); | |
238 | if (rv->io_length == 0) | |
239 | return true; | |
240 | ||
241 | moveon = read_verify_queue(rvp, rv); | |
242 | if (moveon) | |
243 | rv->io_length = 0; | |
244 | return moveon; | |
245 | } | |
246 | ||
247 | /* How many bytes has this process verified? */ | |
248 | uint64_t | |
249 | read_verify_bytes( | |
250 | struct read_verify_pool *rvp) | |
251 | { | |
252 | return ptcounter_value(rvp->verified_bytes); | |
253 | } |