]>
Commit | Line | Data |
---|---|---|
959ef981 | 1 | // SPDX-License-Identifier: GPL-2.0+ |
b364a9c0 DW |
2 | /* |
3 | * Copyright (C) 2018 Oracle. All Rights Reserved. | |
b364a9c0 | 4 | * Author: Darrick J. Wong <darrick.wong@oracle.com> |
b364a9c0 | 5 | */ |
a440f877 | 6 | #include "xfs.h" |
b364a9c0 | 7 | #include <stdint.h> |
b364a9c0 DW |
8 | #include <dirent.h> |
9 | #include <sys/statvfs.h> | |
b364a9c0 | 10 | #include "handle.h" |
42b4c8e8 | 11 | #include "libfrog/paths.h" |
56598728 | 12 | #include "libfrog/workqueue.h" |
b364a9c0 DW |
13 | #include "xfs_scrub.h" |
14 | #include "common.h" | |
a58400ed | 15 | #include "libfrog/bitmap.h" |
b364a9c0 DW |
16 | #include "disk.h" |
17 | #include "filemap.h" | |
ed60d210 | 18 | #include "fscounters.h" |
b364a9c0 DW |
19 | #include "inodes.h" |
20 | #include "read_verify.h" | |
21 | #include "spacemap.h" | |
22 | #include "vfs.h" | |
23 | ||
24 | /* | |
25 | * Phase 6: Verify data file integrity. | |
26 | * | |
27 | * Identify potential data block extents with GETFSMAP, then feed those | |
28 | * extents to the read-verify pool to get the verify commands batched, | |
29 | * issued, and (if there are problems) reported back to us. If there | |
30 | * are errors, we'll record the bad regions and (if available) use rmap | |
31 | * to tell us if metadata are now corrupt. Otherwise, we'll scan the | |
32 | * whole directory tree looking for files that overlap the bad regions | |
33 | * and report the paths of the now corrupt files. | |
34 | */ | |
35 | ||
f1bb1696 DW |
36 | /* Verify disk blocks with GETFSMAP */ |
37 | ||
557f98d7 | 38 | struct media_verify_state { |
f1bb1696 DW |
39 | struct read_verify_pool *rvp_data; |
40 | struct read_verify_pool *rvp_log; | |
41 | struct read_verify_pool *rvp_realtime; | |
42 | struct bitmap *d_bad; /* bytes */ | |
43 | struct bitmap *r_bad; /* bytes */ | |
44 | }; | |
45 | ||
b364a9c0 | 46 | /* Find the fd for a given device identifier. */ |
f1bb1696 DW |
47 | static struct read_verify_pool * |
48 | xfs_dev_to_pool( | |
49 | struct scrub_ctx *ctx, | |
557f98d7 | 50 | struct media_verify_state *vs, |
f1bb1696 | 51 | dev_t dev) |
b364a9c0 DW |
52 | { |
53 | if (dev == ctx->fsinfo.fs_datadev) | |
557f98d7 | 54 | return vs->rvp_data; |
b364a9c0 | 55 | else if (dev == ctx->fsinfo.fs_logdev) |
557f98d7 | 56 | return vs->rvp_log; |
b364a9c0 | 57 | else if (dev == ctx->fsinfo.fs_rtdev) |
557f98d7 | 58 | return vs->rvp_realtime; |
b364a9c0 DW |
59 | abort(); |
60 | } | |
61 | ||
62 | /* Find the device major/minor for a given file descriptor. */ | |
63 | static dev_t | |
64 | xfs_disk_to_dev( | |
65 | struct scrub_ctx *ctx, | |
66 | struct disk *disk) | |
67 | { | |
68 | if (disk == ctx->datadev) | |
69 | return ctx->fsinfo.fs_datadev; | |
70 | else if (disk == ctx->logdev) | |
71 | return ctx->fsinfo.fs_logdev; | |
72 | else if (disk == ctx->rtdev) | |
73 | return ctx->fsinfo.fs_rtdev; | |
74 | abort(); | |
75 | } | |
76 | ||
c9b349bd DW |
77 | /* Find the incore bad blocks bitmap for a given disk. */ |
78 | static struct bitmap * | |
79 | bitmap_for_disk( | |
80 | struct scrub_ctx *ctx, | |
81 | struct disk *disk, | |
82 | struct media_verify_state *vs) | |
83 | { | |
84 | dev_t dev = xfs_disk_to_dev(ctx, disk); | |
85 | ||
86 | if (dev == ctx->fsinfo.fs_datadev) | |
87 | return vs->d_bad; | |
88 | else if (dev == ctx->fsinfo.fs_rtdev) | |
89 | return vs->r_bad; | |
90 | return NULL; | |
91 | } | |
92 | ||
93 | struct disk_ioerr_report { | |
94 | struct scrub_ctx *ctx; | |
95 | struct disk *disk; | |
96 | }; | |
97 | ||
b364a9c0 DW |
98 | struct owner_decode { |
99 | uint64_t owner; | |
100 | const char *descr; | |
101 | }; | |
102 | ||
103 | static const struct owner_decode special_owners[] = { | |
104 | {XFS_FMR_OWN_FREE, "free space"}, | |
105 | {XFS_FMR_OWN_UNKNOWN, "unknown owner"}, | |
106 | {XFS_FMR_OWN_FS, "static FS metadata"}, | |
107 | {XFS_FMR_OWN_LOG, "journalling log"}, | |
108 | {XFS_FMR_OWN_AG, "per-AG metadata"}, | |
109 | {XFS_FMR_OWN_INOBT, "inode btree blocks"}, | |
110 | {XFS_FMR_OWN_INODES, "inodes"}, | |
111 | {XFS_FMR_OWN_REFC, "refcount btree"}, | |
112 | {XFS_FMR_OWN_COW, "CoW staging"}, | |
113 | {XFS_FMR_OWN_DEFECTIVE, "bad blocks"}, | |
114 | {0, NULL}, | |
115 | }; | |
116 | ||
117 | /* Decode a special owner. */ | |
118 | static const char * | |
119 | xfs_decode_special_owner( | |
120 | uint64_t owner) | |
121 | { | |
122 | const struct owner_decode *od = special_owners; | |
123 | ||
124 | while (od->descr) { | |
125 | if (od->owner == owner) | |
126 | return od->descr; | |
127 | od++; | |
128 | } | |
129 | ||
130 | return NULL; | |
131 | } | |
132 | ||
133 | /* Routines to translate bad physical extents into file paths and offsets. */ | |
134 | ||
ed953d26 DW |
135 | struct badfile_report { |
136 | struct scrub_ctx *ctx; | |
137 | const char *descr; | |
138 | struct xfs_bmap *bmap; | |
139 | }; | |
140 | ||
141 | /* Report on bad extents found during a media scan. */ | |
142 | static int | |
143 | report_badfile( | |
144 | uint64_t start, | |
145 | uint64_t length, | |
146 | void *arg) | |
147 | { | |
148 | struct badfile_report *br = arg; | |
149 | unsigned long long bad_offset; | |
150 | unsigned long long bad_length; | |
151 | ||
152 | /* Clamp the bad region to the file mapping. */ | |
153 | if (start < br->bmap->bm_physical) { | |
154 | length -= br->bmap->bm_physical - start; | |
155 | start = br->bmap->bm_physical; | |
156 | } | |
157 | length = min(length, br->bmap->bm_length); | |
158 | ||
159 | /* Figure out how far into the bmap is the bad mapping and report it. */ | |
160 | bad_offset = start - br->bmap->bm_physical; | |
161 | bad_length = min(start + length, | |
162 | br->bmap->bm_physical + br->bmap->bm_length) - start; | |
163 | ||
164 | str_error(br->ctx, br->descr, | |
165 | _("media error at data offset %llu length %llu."), | |
166 | br->bmap->bm_offset + bad_offset, bad_length); | |
167 | return 0; | |
168 | } | |
169 | ||
b364a9c0 DW |
170 | /* Report if this extent overlaps a bad region. */ |
171 | static bool | |
663e02a0 | 172 | report_data_loss( |
b364a9c0 DW |
173 | struct scrub_ctx *ctx, |
174 | const char *descr, | |
175 | int fd, | |
176 | int whichfork, | |
177 | struct fsxattr *fsx, | |
178 | struct xfs_bmap *bmap, | |
179 | void *arg) | |
180 | { | |
ed953d26 DW |
181 | struct badfile_report br = { |
182 | .ctx = ctx, | |
183 | .descr = descr, | |
184 | .bmap = bmap, | |
185 | }; | |
ed5f9cc7 | 186 | struct media_verify_state *vs = arg; |
b364a9c0 | 187 | struct bitmap *bmp; |
ed953d26 | 188 | int ret; |
b364a9c0 DW |
189 | |
190 | /* Only report errors for real extents. */ | |
191 | if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC)) | |
192 | return true; | |
193 | ||
194 | if (fsx->fsx_xflags & FS_XFLAG_REALTIME) | |
ed5f9cc7 | 195 | bmp = vs->r_bad; |
b364a9c0 | 196 | else |
ed5f9cc7 | 197 | bmp = vs->d_bad; |
b364a9c0 | 198 | |
ed953d26 DW |
199 | ret = bitmap_iterate_range(bmp, bmap->bm_physical, bmap->bm_length, |
200 | report_badfile, &br); | |
201 | if (ret) { | |
202 | str_liberror(ctx, ret, descr); | |
203 | return false; | |
204 | } | |
b364a9c0 DW |
205 | return true; |
206 | } | |
207 | ||
663e02a0 DW |
208 | /* Report if the extended attribute data overlaps a bad region. */ |
209 | static bool | |
210 | report_attr_loss( | |
211 | struct scrub_ctx *ctx, | |
212 | const char *descr, | |
213 | int fd, | |
214 | int whichfork, | |
215 | struct fsxattr *fsx, | |
216 | struct xfs_bmap *bmap, | |
217 | void *arg) | |
218 | { | |
219 | struct media_verify_state *vs = arg; | |
220 | struct bitmap *bmp = vs->d_bad; | |
221 | ||
222 | /* Complain about attr fork extents that don't look right. */ | |
223 | if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC)) { | |
224 | str_info(ctx, descr, | |
225 | _("found unexpected unwritten/delalloc attr fork extent.")); | |
226 | return true; | |
227 | } | |
228 | ||
229 | if (fsx->fsx_xflags & FS_XFLAG_REALTIME) { | |
230 | str_info(ctx, descr, | |
231 | _("found unexpected realtime attr fork extent.")); | |
232 | return true; | |
233 | } | |
234 | ||
235 | if (bitmap_test(bmp, bmap->bm_physical, bmap->bm_length)) | |
abc2e70d | 236 | str_corrupt(ctx, descr, |
663e02a0 DW |
237 | _("media error in extended attribute data.")); |
238 | ||
239 | return true; | |
240 | } | |
241 | ||
b364a9c0 DW |
242 | /* Iterate the extent mappings of a file to report errors. */ |
243 | static bool | |
244 | xfs_report_verify_fd( | |
245 | struct scrub_ctx *ctx, | |
246 | const char *descr, | |
247 | int fd, | |
248 | void *arg) | |
249 | { | |
250 | struct xfs_bmap key = {0}; | |
251 | bool moveon; | |
252 | ||
253 | /* data fork */ | |
254 | moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_DATA_FORK, &key, | |
663e02a0 | 255 | report_data_loss, arg); |
b364a9c0 DW |
256 | if (!moveon) |
257 | return false; | |
258 | ||
259 | /* attr fork */ | |
663e02a0 DW |
260 | return xfs_iterate_filemaps(ctx, descr, fd, XFS_ATTR_FORK, &key, |
261 | report_attr_loss, arg); | |
b364a9c0 DW |
262 | } |
263 | ||
264 | /* Report read verify errors in unlinked (but still open) files. */ | |
265 | static int | |
266 | xfs_report_verify_inode( | |
267 | struct scrub_ctx *ctx, | |
268 | struct xfs_handle *handle, | |
4cca629d | 269 | struct xfs_bulkstat *bstat, |
b364a9c0 DW |
270 | void *arg) |
271 | { | |
272 | char descr[DESCR_BUFSZ]; | |
b364a9c0 DW |
273 | bool moveon; |
274 | int fd; | |
275 | int error; | |
276 | ||
b364a9c0 DW |
277 | /* Ignore linked files and things we can't open. */ |
278 | if (bstat->bs_nlink != 0) | |
279 | return 0; | |
280 | if (!S_ISREG(bstat->bs_mode) && !S_ISDIR(bstat->bs_mode)) | |
281 | return 0; | |
282 | ||
15589f0a DW |
283 | scrub_render_ino_descr(ctx, descr, DESCR_BUFSZ, |
284 | bstat->bs_ino, bstat->bs_gen, _("(unlinked)")); | |
285 | ||
b364a9c0 DW |
286 | /* Try to open the inode. */ |
287 | fd = xfs_open_handle(handle); | |
288 | if (fd < 0) { | |
289 | error = errno; | |
290 | if (error == ESTALE) | |
291 | return error; | |
292 | ||
bb5dbd06 DW |
293 | str_info(ctx, descr, |
294 | _("Disappeared during read error reporting.")); | |
b364a9c0 DW |
295 | return error; |
296 | } | |
297 | ||
298 | /* Go find the badness. */ | |
299 | moveon = xfs_report_verify_fd(ctx, descr, fd, arg); | |
6c05cc5d DW |
300 | error = close(fd); |
301 | if (error) | |
302 | str_errno(ctx, descr); | |
b364a9c0 DW |
303 | |
304 | return moveon ? 0 : XFS_ITERATE_INODES_ABORT; | |
305 | } | |
306 | ||
307 | /* Scan a directory for matches in the read verify error list. */ | |
308 | static bool | |
309 | xfs_report_verify_dir( | |
310 | struct scrub_ctx *ctx, | |
311 | const char *path, | |
312 | int dir_fd, | |
313 | void *arg) | |
314 | { | |
315 | return xfs_report_verify_fd(ctx, path, dir_fd, arg); | |
316 | } | |
317 | ||
318 | /* | |
319 | * Scan the inode associated with a directory entry for matches with | |
320 | * the read verify error list. | |
321 | */ | |
322 | static bool | |
323 | xfs_report_verify_dirent( | |
324 | struct scrub_ctx *ctx, | |
325 | const char *path, | |
326 | int dir_fd, | |
327 | struct dirent *dirent, | |
328 | struct stat *sb, | |
329 | void *arg) | |
330 | { | |
331 | bool moveon; | |
332 | int fd; | |
6c05cc5d | 333 | int error; |
b364a9c0 DW |
334 | |
335 | /* Ignore things we can't open. */ | |
336 | if (!S_ISREG(sb->st_mode) && !S_ISDIR(sb->st_mode)) | |
337 | return true; | |
338 | ||
339 | /* Ignore . and .. */ | |
340 | if (!strcmp(".", dirent->d_name) || !strcmp("..", dirent->d_name)) | |
341 | return true; | |
342 | ||
343 | /* | |
344 | * If we were given a dirent, open the associated file under | |
345 | * dir_fd for badblocks scanning. If dirent is NULL, then it's | |
346 | * the directory itself we want to scan. | |
347 | */ | |
348 | fd = openat(dir_fd, dirent->d_name, | |
349 | O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY); | |
350 | if (fd < 0) | |
351 | return true; | |
352 | ||
353 | /* Go find the badness. */ | |
354 | moveon = xfs_report_verify_fd(ctx, path, fd, arg); | |
355 | if (moveon) | |
356 | goto out; | |
357 | ||
358 | out: | |
6c05cc5d DW |
359 | error = close(fd); |
360 | if (error) | |
361 | str_errno(ctx, path); | |
b364a9c0 DW |
362 | return moveon; |
363 | } | |
364 | ||
c9b349bd | 365 | /* Use a fsmap to report metadata lost to a media error. */ |
b364a9c0 | 366 | static bool |
c9b349bd | 367 | report_ioerr_fsmap( |
b364a9c0 DW |
368 | struct scrub_ctx *ctx, |
369 | const char *descr, | |
370 | struct fsmap *map, | |
371 | void *arg) | |
372 | { | |
373 | const char *type; | |
f1f5fd3a | 374 | char buf[DESCR_BUFSZ]; |
b364a9c0 DW |
375 | uint64_t err_physical = *(uint64_t *)arg; |
376 | uint64_t err_off; | |
377 | ||
909c6a54 DW |
378 | /* Don't care about unwritten extents. */ |
379 | if (map->fmr_flags & FMR_OF_PREALLOC) | |
380 | return true; | |
381 | ||
b364a9c0 DW |
382 | if (err_physical > map->fmr_physical) |
383 | err_off = err_physical - map->fmr_physical; | |
384 | else | |
385 | err_off = 0; | |
386 | ||
f1f5fd3a | 387 | /* Report special owners */ |
b364a9c0 | 388 | if (map->fmr_flags & FMR_OF_SPECIAL_OWNER) { |
f1f5fd3a DW |
389 | snprintf(buf, DESCR_BUFSZ, _("disk offset %"PRIu64), |
390 | (uint64_t)map->fmr_physical + err_off); | |
b364a9c0 | 391 | type = xfs_decode_special_owner(map->fmr_owner); |
abc2e70d | 392 | str_corrupt(ctx, buf, _("media error in %s."), type); |
b364a9c0 DW |
393 | } |
394 | ||
02d0069e DW |
395 | /* Report extent maps */ |
396 | if (map->fmr_flags & FMR_OF_EXTENT_MAP) { | |
397 | bool attr = (map->fmr_flags & FMR_OF_ATTR_FORK); | |
398 | ||
399 | scrub_render_ino_descr(ctx, buf, DESCR_BUFSZ, | |
400 | map->fmr_owner, 0, " %s", | |
401 | attr ? _("extended attribute") : | |
402 | _("file data")); | |
abc2e70d | 403 | str_corrupt(ctx, buf, _("media error in extent map")); |
02d0069e DW |
404 | } |
405 | ||
b364a9c0 DW |
406 | /* |
407 | * XXX: If we had a getparent() call we could report IO errors | |
408 | * efficiently. Until then, we'll have to scan the dir tree | |
409 | * to find the bad file's pathname. | |
410 | */ | |
411 | ||
412 | return true; | |
413 | } | |
414 | ||
415 | /* | |
c9b349bd DW |
416 | * For a range of bad blocks, visit each space mapping that overlaps the bad |
417 | * range so that we can report lost metadata. | |
b364a9c0 | 418 | */ |
c9b349bd DW |
419 | static int |
420 | report_ioerr( | |
b364a9c0 DW |
421 | uint64_t start, |
422 | uint64_t length, | |
b364a9c0 DW |
423 | void *arg) |
424 | { | |
425 | struct fsmap keys[2]; | |
426 | char descr[DESCR_BUFSZ]; | |
c9b349bd | 427 | struct disk_ioerr_report *dioerr = arg; |
b364a9c0 | 428 | dev_t dev; |
b364a9c0 | 429 | |
c9b349bd | 430 | dev = xfs_disk_to_dev(dioerr->ctx, dioerr->disk); |
b364a9c0 | 431 | |
c9b349bd DW |
432 | snprintf(descr, DESCR_BUFSZ, |
433 | _("dev %d:%d ioerr @ %"PRIu64":%"PRIu64" "), | |
b364a9c0 DW |
434 | major(dev), minor(dev), start, length); |
435 | ||
436 | /* Go figure out which blocks are bad from the fsmap. */ | |
437 | memset(keys, 0, sizeof(struct fsmap) * 2); | |
438 | keys->fmr_device = dev; | |
439 | keys->fmr_physical = start; | |
440 | (keys + 1)->fmr_device = dev; | |
441 | (keys + 1)->fmr_physical = start + length - 1; | |
442 | (keys + 1)->fmr_owner = ULLONG_MAX; | |
443 | (keys + 1)->fmr_offset = ULLONG_MAX; | |
444 | (keys + 1)->fmr_flags = UINT_MAX; | |
c9b349bd | 445 | xfs_iterate_fsmap(dioerr->ctx, descr, keys, report_ioerr_fsmap, |
b364a9c0 | 446 | &start); |
c9b349bd DW |
447 | return 0; |
448 | } | |
449 | ||
450 | /* Report all the media errors found on a disk. */ | |
451 | static int | |
452 | report_disk_ioerrs( | |
453 | struct scrub_ctx *ctx, | |
454 | struct disk *disk, | |
455 | struct media_verify_state *vs) | |
456 | { | |
457 | struct disk_ioerr_report dioerr = { | |
458 | .ctx = ctx, | |
459 | .disk = disk, | |
460 | }; | |
461 | struct bitmap *tree; | |
462 | ||
463 | if (!disk) | |
464 | return 0; | |
465 | tree = bitmap_for_disk(ctx, disk, vs); | |
466 | if (!tree) | |
467 | return 0; | |
468 | return bitmap_iterate(tree, report_ioerr, &dioerr); | |
469 | } | |
470 | ||
471 | /* Given bad extent lists for the data & rtdev, find bad files. */ | |
472 | static bool | |
473 | report_all_media_errors( | |
474 | struct scrub_ctx *ctx, | |
475 | struct media_verify_state *vs) | |
476 | { | |
477 | bool moveon; | |
478 | int ret; | |
479 | ||
480 | ret = report_disk_ioerrs(ctx, ctx->datadev, vs); | |
481 | if (ret) { | |
482 | str_liberror(ctx, ret, _("walking datadev io errors")); | |
483 | return false; | |
484 | } | |
485 | ||
486 | ret = report_disk_ioerrs(ctx, ctx->rtdev, vs); | |
487 | if (ret) { | |
488 | str_liberror(ctx, ret, _("walking rtdev io errors")); | |
489 | return false; | |
490 | } | |
491 | ||
492 | /* Scan the directory tree to get file paths. */ | |
493 | moveon = scan_fs_tree(ctx, xfs_report_verify_dir, | |
494 | xfs_report_verify_dirent, vs); | |
495 | if (!moveon) | |
496 | return false; | |
497 | ||
498 | /* Scan for unlinked files. */ | |
499 | return xfs_scan_all_inodes(ctx, xfs_report_verify_inode, vs); | |
b364a9c0 DW |
500 | } |
501 | ||
502 | /* Schedule a read-verify of a (data block) extent. */ | |
503 | static bool | |
504 | xfs_check_rmap( | |
505 | struct scrub_ctx *ctx, | |
506 | const char *descr, | |
507 | struct fsmap *map, | |
508 | void *arg) | |
509 | { | |
557f98d7 | 510 | struct media_verify_state *vs = arg; |
f1bb1696 | 511 | struct read_verify_pool *rvp; |
8cab77d3 | 512 | int ret; |
f1bb1696 | 513 | |
557f98d7 | 514 | rvp = xfs_dev_to_pool(ctx, vs, map->fmr_device); |
b364a9c0 DW |
515 | |
516 | dbg_printf("rmap dev %d:%d phys %"PRIu64" owner %"PRId64 | |
517 | " offset %"PRIu64" len %"PRIu64" flags 0x%x\n", | |
518 | major(map->fmr_device), minor(map->fmr_device), | |
519 | (uint64_t)map->fmr_physical, (int64_t)map->fmr_owner, | |
520 | (uint64_t)map->fmr_offset, (uint64_t)map->fmr_length, | |
521 | map->fmr_flags); | |
522 | ||
523 | /* "Unknown" extents should be verified; they could be data. */ | |
524 | if ((map->fmr_flags & FMR_OF_SPECIAL_OWNER) && | |
525 | map->fmr_owner == XFS_FMR_OWN_UNKNOWN) | |
526 | map->fmr_flags &= ~FMR_OF_SPECIAL_OWNER; | |
527 | ||
528 | /* | |
529 | * We only care about read-verifying data extents that have been | |
530 | * written to disk. This means we can skip "special" owners | |
531 | * (metadata), xattr blocks, unwritten extents, and extent maps. | |
532 | * These should all get checked elsewhere in the scrubber. | |
533 | */ | |
534 | if (map->fmr_flags & (FMR_OF_PREALLOC | FMR_OF_ATTR_FORK | | |
535 | FMR_OF_EXTENT_MAP | FMR_OF_SPECIAL_OWNER)) | |
22d658ec | 536 | return true; |
b364a9c0 DW |
537 | |
538 | /* XXX: Filter out directory data blocks. */ | |
539 | ||
540 | /* Schedule the read verify command for (eventual) running. */ | |
8cab77d3 DW |
541 | ret = read_verify_schedule_io(rvp, map->fmr_physical, map->fmr_length, |
542 | vs); | |
543 | if (ret) { | |
544 | str_liberror(ctx, ret, descr); | |
545 | return false; | |
546 | } | |
b364a9c0 | 547 | |
b364a9c0 DW |
548 | return true; |
549 | } | |
550 | ||
f1bb1696 | 551 | /* Wait for read/verify actions to finish, then return # bytes checked. */ |
8cab77d3 | 552 | static int |
f1bb1696 | 553 | clean_pool( |
8cab77d3 DW |
554 | struct read_verify_pool *rvp, |
555 | unsigned long long *bytes_checked) | |
f1bb1696 | 556 | { |
8cab77d3 DW |
557 | uint64_t pool_checked; |
558 | int ret; | |
f1bb1696 DW |
559 | |
560 | if (!rvp) | |
561 | return 0; | |
562 | ||
22d658ec DW |
563 | ret = read_verify_force_io(rvp); |
564 | if (ret) | |
565 | return ret; | |
566 | ||
8cab77d3 DW |
567 | ret = read_verify_pool_flush(rvp); |
568 | if (ret) | |
569 | goto out_destroy; | |
570 | ||
571 | ret = read_verify_bytes(rvp, &pool_checked); | |
572 | if (ret) | |
573 | goto out_destroy; | |
574 | ||
575 | *bytes_checked += pool_checked; | |
576 | out_destroy: | |
f1bb1696 DW |
577 | read_verify_pool_destroy(rvp); |
578 | return ret; | |
579 | } | |
580 | ||
c9b349bd DW |
581 | /* Remember a media error for later. */ |
582 | static void | |
583 | remember_ioerr( | |
584 | struct scrub_ctx *ctx, | |
585 | struct disk *disk, | |
586 | uint64_t start, | |
587 | uint64_t length, | |
588 | int error, | |
589 | void *arg) | |
590 | { | |
591 | struct media_verify_state *vs = arg; | |
592 | struct bitmap *tree; | |
593 | int ret; | |
594 | ||
595 | tree = bitmap_for_disk(ctx, disk, vs); | |
596 | if (!tree) { | |
597 | str_liberror(ctx, ENOENT, _("finding bad block bitmap")); | |
598 | return; | |
599 | } | |
600 | ||
601 | ret = bitmap_set(tree, start, length); | |
602 | if (ret) | |
603 | str_liberror(ctx, ret, _("setting bad block bitmap")); | |
604 | } | |
605 | ||
b364a9c0 DW |
606 | /* |
607 | * Read verify all the file data blocks in a filesystem. Since XFS doesn't | |
608 | * do data checksums, we trust that the underlying storage will pass back | |
609 | * an IO error if it can't retrieve whatever we previously stored there. | |
610 | * If we hit an IO error, we'll record the bad blocks in a bitmap and then | |
611 | * scan the extent maps of the entire fs tree to figure (and the unlinked | |
612 | * inodes) out which files are now broken. | |
613 | */ | |
614 | bool | |
615 | xfs_scan_blocks( | |
616 | struct scrub_ctx *ctx) | |
617 | { | |
557f98d7 | 618 | struct media_verify_state vs = { NULL }; |
93ab49dd | 619 | bool moveon = false; |
233fabee | 620 | int ret; |
b364a9c0 | 621 | |
233fabee DW |
622 | ret = bitmap_alloc(&vs.d_bad); |
623 | if (ret) { | |
624 | str_liberror(ctx, ret, _("creating datadev badblock bitmap")); | |
41c08606 | 625 | goto out; |
b364a9c0 DW |
626 | } |
627 | ||
233fabee DW |
628 | ret = bitmap_alloc(&vs.r_bad); |
629 | if (ret) { | |
630 | str_liberror(ctx, ret, _("creating realtime badblock bitmap")); | |
b364a9c0 DW |
631 | goto out_dbad; |
632 | } | |
633 | ||
8cab77d3 | 634 | ret = read_verify_pool_alloc(ctx, ctx->datadev, |
c9b349bd | 635 | ctx->mnt.fsgeom.blocksize, remember_ioerr, |
8cab77d3 DW |
636 | scrub_nproc(ctx), &vs.rvp_data); |
637 | if (ret) { | |
638 | str_liberror(ctx, ret, _("creating datadev media verifier")); | |
b364a9c0 DW |
639 | goto out_rbad; |
640 | } | |
f1bb1696 | 641 | if (ctx->logdev) { |
8cab77d3 | 642 | ret = read_verify_pool_alloc(ctx, ctx->logdev, |
c9b349bd | 643 | ctx->mnt.fsgeom.blocksize, remember_ioerr, |
8cab77d3 DW |
644 | scrub_nproc(ctx), &vs.rvp_log); |
645 | if (ret) { | |
646 | str_liberror(ctx, ret, | |
647 | _("creating logdev media verifier")); | |
f1bb1696 DW |
648 | goto out_datapool; |
649 | } | |
650 | } | |
651 | if (ctx->rtdev) { | |
8cab77d3 | 652 | ret = read_verify_pool_alloc(ctx, ctx->rtdev, |
c9b349bd | 653 | ctx->mnt.fsgeom.blocksize, remember_ioerr, |
8cab77d3 DW |
654 | scrub_nproc(ctx), &vs.rvp_realtime); |
655 | if (ret) { | |
656 | str_liberror(ctx, ret, | |
657 | _("creating rtdev media verifier")); | |
f1bb1696 DW |
658 | goto out_logpool; |
659 | } | |
660 | } | |
557f98d7 | 661 | moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &vs); |
b364a9c0 | 662 | if (!moveon) |
f1bb1696 | 663 | goto out_rtpool; |
8cab77d3 DW |
664 | |
665 | ret = clean_pool(vs.rvp_data, &ctx->bytes_checked); | |
666 | if (ret) { | |
667 | str_liberror(ctx, ret, _("flushing datadev verify pool")); | |
668 | moveon = false; | |
669 | } | |
670 | ||
671 | ret = clean_pool(vs.rvp_log, &ctx->bytes_checked); | |
672 | if (ret) { | |
673 | str_liberror(ctx, ret, _("flushing logdev verify pool")); | |
674 | moveon = false; | |
675 | } | |
676 | ||
677 | ret = clean_pool(vs.rvp_realtime, &ctx->bytes_checked); | |
678 | if (ret) { | |
679 | str_liberror(ctx, ret, _("flushing rtdev verify pool")); | |
680 | moveon = false; | |
681 | } | |
b364a9c0 DW |
682 | |
683 | /* Scan the whole dir tree to see what matches the bad extents. */ | |
8cab77d3 | 684 | if (moveon && (!bitmap_empty(vs.d_bad) || !bitmap_empty(vs.r_bad))) |
c9b349bd | 685 | moveon = report_all_media_errors(ctx, &vs); |
b364a9c0 | 686 | |
557f98d7 DW |
687 | bitmap_free(&vs.r_bad); |
688 | bitmap_free(&vs.d_bad); | |
b364a9c0 DW |
689 | return moveon; |
690 | ||
f1bb1696 | 691 | out_rtpool: |
7668d01d | 692 | if (vs.rvp_realtime) { |
4cd869e5 | 693 | read_verify_pool_abort(vs.rvp_realtime); |
557f98d7 | 694 | read_verify_pool_destroy(vs.rvp_realtime); |
7668d01d | 695 | } |
f1bb1696 | 696 | out_logpool: |
7668d01d | 697 | if (vs.rvp_log) { |
4cd869e5 | 698 | read_verify_pool_abort(vs.rvp_log); |
557f98d7 | 699 | read_verify_pool_destroy(vs.rvp_log); |
7668d01d | 700 | } |
f1bb1696 | 701 | out_datapool: |
4cd869e5 | 702 | read_verify_pool_abort(vs.rvp_data); |
557f98d7 | 703 | read_verify_pool_destroy(vs.rvp_data); |
b364a9c0 | 704 | out_rbad: |
557f98d7 | 705 | bitmap_free(&vs.r_bad); |
b364a9c0 | 706 | out_dbad: |
557f98d7 | 707 | bitmap_free(&vs.d_bad); |
41c08606 | 708 | out: |
b364a9c0 DW |
709 | return moveon; |
710 | } | |
ed60d210 DW |
711 | |
712 | /* Estimate how much work we're going to do. */ | |
713 | bool | |
714 | xfs_estimate_verify_work( | |
715 | struct scrub_ctx *ctx, | |
716 | uint64_t *items, | |
717 | unsigned int *nr_threads, | |
718 | int *rshift) | |
719 | { | |
720 | unsigned long long d_blocks; | |
721 | unsigned long long d_bfree; | |
722 | unsigned long long r_blocks; | |
723 | unsigned long long r_bfree; | |
724 | unsigned long long f_files; | |
725 | unsigned long long f_free; | |
726 | bool moveon; | |
727 | ||
728 | moveon = xfs_scan_estimate_blocks(ctx, &d_blocks, &d_bfree, | |
729 | &r_blocks, &r_bfree, &f_files, &f_free); | |
730 | if (!moveon) | |
731 | return moveon; | |
732 | ||
a749451c DW |
733 | *items = cvt_off_fsb_to_b(&ctx->mnt, |
734 | (d_blocks - d_bfree) + (r_blocks - r_bfree)); | |
ed60d210 DW |
735 | *nr_threads = disk_heads(ctx->datadev); |
736 | *rshift = 20; | |
737 | return moveon; | |
738 | } |