]>
Commit | Line | Data |
---|---|---|
8d318d62 | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
b364a9c0 | 2 | /* |
52520522 | 3 | * Copyright (C) 2018-2024 Oracle. All Rights Reserved. |
8d318d62 | 4 | * Author: Darrick J. Wong <djwong@kernel.org> |
b364a9c0 | 5 | */ |
a440f877 | 6 | #include "xfs.h" |
b364a9c0 | 7 | #include <stdint.h> |
b364a9c0 DW |
8 | #include <dirent.h> |
9 | #include <sys/statvfs.h> | |
9b72515a | 10 | #include <linux/fsmap.h> |
b364a9c0 | 11 | #include "handle.h" |
42b4c8e8 | 12 | #include "libfrog/paths.h" |
56598728 | 13 | #include "libfrog/workqueue.h" |
b364a9c0 DW |
14 | #include "xfs_scrub.h" |
15 | #include "common.h" | |
a58400ed | 16 | #include "libfrog/bitmap.h" |
b364a9c0 DW |
17 | #include "disk.h" |
18 | #include "filemap.h" | |
ed60d210 | 19 | #include "fscounters.h" |
b364a9c0 DW |
20 | #include "inodes.h" |
21 | #include "read_verify.h" | |
22 | #include "spacemap.h" | |
23 | #include "vfs.h" | |
4d0ce76d | 24 | #include "common.h" |
9b5d1349 | 25 | #include "libfrog/bulkstat.h" |
b364a9c0 DW |
26 | |
27 | /* | |
28 | * Phase 6: Verify data file integrity. | |
29 | * | |
30 | * Identify potential data block extents with GETFSMAP, then feed those | |
31 | * extents to the read-verify pool to get the verify commands batched, | |
32 | * issued, and (if there are problems) reported back to us. If there | |
33 | * are errors, we'll record the bad regions and (if available) use rmap | |
34 | * to tell us if metadata are now corrupt. Otherwise, we'll scan the | |
35 | * whole directory tree looking for files that overlap the bad regions | |
36 | * and report the paths of the now corrupt files. | |
37 | */ | |
38 | ||
f1bb1696 DW |
39 | /* Verify disk blocks with GETFSMAP */ |
40 | ||
557f98d7 | 41 | struct media_verify_state { |
f1bb1696 DW |
42 | struct read_verify_pool *rvp_data; |
43 | struct read_verify_pool *rvp_log; | |
44 | struct read_verify_pool *rvp_realtime; | |
45 | struct bitmap *d_bad; /* bytes */ | |
46 | struct bitmap *r_bad; /* bytes */ | |
47 | }; | |
48 | ||
b364a9c0 | 49 | /* Find the fd for a given device identifier. */ |
f1bb1696 | 50 | static struct read_verify_pool * |
af9eb208 | 51 | dev_to_pool( |
f1bb1696 | 52 | struct scrub_ctx *ctx, |
557f98d7 | 53 | struct media_verify_state *vs, |
f1bb1696 | 54 | dev_t dev) |
b364a9c0 DW |
55 | { |
56 | if (dev == ctx->fsinfo.fs_datadev) | |
557f98d7 | 57 | return vs->rvp_data; |
b364a9c0 | 58 | else if (dev == ctx->fsinfo.fs_logdev) |
557f98d7 | 59 | return vs->rvp_log; |
b364a9c0 | 60 | else if (dev == ctx->fsinfo.fs_rtdev) |
557f98d7 | 61 | return vs->rvp_realtime; |
b364a9c0 DW |
62 | abort(); |
63 | } | |
64 | ||
65 | /* Find the device major/minor for a given file descriptor. */ | |
66 | static dev_t | |
af9eb208 | 67 | disk_to_dev( |
b364a9c0 DW |
68 | struct scrub_ctx *ctx, |
69 | struct disk *disk) | |
70 | { | |
71 | if (disk == ctx->datadev) | |
72 | return ctx->fsinfo.fs_datadev; | |
73 | else if (disk == ctx->logdev) | |
74 | return ctx->fsinfo.fs_logdev; | |
75 | else if (disk == ctx->rtdev) | |
76 | return ctx->fsinfo.fs_rtdev; | |
77 | abort(); | |
78 | } | |
79 | ||
c9b349bd DW |
80 | /* Find the incore bad blocks bitmap for a given disk. */ |
81 | static struct bitmap * | |
82 | bitmap_for_disk( | |
83 | struct scrub_ctx *ctx, | |
84 | struct disk *disk, | |
85 | struct media_verify_state *vs) | |
86 | { | |
af9eb208 | 87 | dev_t dev = disk_to_dev(ctx, disk); |
c9b349bd DW |
88 | |
89 | if (dev == ctx->fsinfo.fs_datadev) | |
90 | return vs->d_bad; | |
91 | else if (dev == ctx->fsinfo.fs_rtdev) | |
92 | return vs->r_bad; | |
93 | return NULL; | |
94 | } | |
95 | ||
96 | struct disk_ioerr_report { | |
97 | struct scrub_ctx *ctx; | |
98 | struct disk *disk; | |
99 | }; | |
100 | ||
b364a9c0 DW |
101 | struct owner_decode { |
102 | uint64_t owner; | |
103 | const char *descr; | |
104 | }; | |
105 | ||
106 | static const struct owner_decode special_owners[] = { | |
107 | {XFS_FMR_OWN_FREE, "free space"}, | |
108 | {XFS_FMR_OWN_UNKNOWN, "unknown owner"}, | |
109 | {XFS_FMR_OWN_FS, "static FS metadata"}, | |
110 | {XFS_FMR_OWN_LOG, "journalling log"}, | |
111 | {XFS_FMR_OWN_AG, "per-AG metadata"}, | |
112 | {XFS_FMR_OWN_INOBT, "inode btree blocks"}, | |
113 | {XFS_FMR_OWN_INODES, "inodes"}, | |
114 | {XFS_FMR_OWN_REFC, "refcount btree"}, | |
115 | {XFS_FMR_OWN_COW, "CoW staging"}, | |
116 | {XFS_FMR_OWN_DEFECTIVE, "bad blocks"}, | |
117 | {0, NULL}, | |
118 | }; | |
119 | ||
120 | /* Decode a special owner. */ | |
121 | static const char * | |
af9eb208 | 122 | decode_special_owner( |
b364a9c0 DW |
123 | uint64_t owner) |
124 | { | |
125 | const struct owner_decode *od = special_owners; | |
126 | ||
127 | while (od->descr) { | |
128 | if (od->owner == owner) | |
129 | return od->descr; | |
130 | od++; | |
131 | } | |
132 | ||
133 | return NULL; | |
134 | } | |
135 | ||
136 | /* Routines to translate bad physical extents into file paths and offsets. */ | |
137 | ||
ed953d26 | 138 | struct badfile_report { |
73ce9669 DW |
139 | struct scrub_ctx *ctx; |
140 | const char *descr; | |
141 | struct media_verify_state *vs; | |
142 | struct file_bmap *bmap; | |
ed953d26 DW |
143 | }; |
144 | ||
145 | /* Report on bad extents found during a media scan. */ | |
146 | static int | |
147 | report_badfile( | |
148 | uint64_t start, | |
149 | uint64_t length, | |
150 | void *arg) | |
151 | { | |
152 | struct badfile_report *br = arg; | |
153 | unsigned long long bad_offset; | |
154 | unsigned long long bad_length; | |
155 | ||
156 | /* Clamp the bad region to the file mapping. */ | |
157 | if (start < br->bmap->bm_physical) { | |
158 | length -= br->bmap->bm_physical - start; | |
159 | start = br->bmap->bm_physical; | |
160 | } | |
161 | length = min(length, br->bmap->bm_length); | |
162 | ||
163 | /* Figure out how far into the bmap is the bad mapping and report it. */ | |
164 | bad_offset = start - br->bmap->bm_physical; | |
165 | bad_length = min(start + length, | |
166 | br->bmap->bm_physical + br->bmap->bm_length) - start; | |
167 | ||
49e05cb0 | 168 | str_unfixable_error(br->ctx, br->descr, |
ed953d26 DW |
169 | _("media error at data offset %llu length %llu."), |
170 | br->bmap->bm_offset + bad_offset, bad_length); | |
171 | return 0; | |
172 | } | |
173 | ||
b364a9c0 | 174 | /* Report if this extent overlaps a bad region. */ |
73ce9669 | 175 | static int |
663e02a0 | 176 | report_data_loss( |
b364a9c0 | 177 | struct scrub_ctx *ctx, |
b364a9c0 DW |
178 | int fd, |
179 | int whichfork, | |
180 | struct fsxattr *fsx, | |
73ce9669 | 181 | struct file_bmap *bmap, |
b364a9c0 DW |
182 | void *arg) |
183 | { | |
73ce9669 DW |
184 | struct badfile_report *br = arg; |
185 | struct media_verify_state *vs = br->vs; | |
b364a9c0 | 186 | struct bitmap *bmp; |
73ce9669 DW |
187 | |
188 | br->bmap = bmap; | |
b364a9c0 DW |
189 | |
190 | /* Only report errors for real extents. */ | |
191 | if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC)) | |
73ce9669 | 192 | return 0; |
b364a9c0 DW |
193 | |
194 | if (fsx->fsx_xflags & FS_XFLAG_REALTIME) | |
ed5f9cc7 | 195 | bmp = vs->r_bad; |
b364a9c0 | 196 | else |
ed5f9cc7 | 197 | bmp = vs->d_bad; |
b364a9c0 | 198 | |
93d69bc7 | 199 | return -bitmap_iterate_range(bmp, bmap->bm_physical, bmap->bm_length, |
73ce9669 | 200 | report_badfile, br); |
b364a9c0 DW |
201 | } |
202 | ||
663e02a0 | 203 | /* Report if the extended attribute data overlaps a bad region. */ |
73ce9669 | 204 | static int |
663e02a0 DW |
205 | report_attr_loss( |
206 | struct scrub_ctx *ctx, | |
663e02a0 DW |
207 | int fd, |
208 | int whichfork, | |
209 | struct fsxattr *fsx, | |
73ce9669 | 210 | struct file_bmap *bmap, |
663e02a0 DW |
211 | void *arg) |
212 | { | |
73ce9669 DW |
213 | struct badfile_report *br = arg; |
214 | struct media_verify_state *vs = br->vs; | |
663e02a0 DW |
215 | struct bitmap *bmp = vs->d_bad; |
216 | ||
217 | /* Complain about attr fork extents that don't look right. */ | |
218 | if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC)) { | |
73ce9669 | 219 | str_info(ctx, br->descr, |
663e02a0 | 220 | _("found unexpected unwritten/delalloc attr fork extent.")); |
73ce9669 | 221 | return 0; |
663e02a0 DW |
222 | } |
223 | ||
224 | if (fsx->fsx_xflags & FS_XFLAG_REALTIME) { | |
73ce9669 | 225 | str_info(ctx, br->descr, |
663e02a0 | 226 | _("found unexpected realtime attr fork extent.")); |
73ce9669 | 227 | return 0; |
663e02a0 DW |
228 | } |
229 | ||
230 | if (bitmap_test(bmp, bmap->bm_physical, bmap->bm_length)) | |
73ce9669 | 231 | str_corrupt(ctx, br->descr, |
663e02a0 DW |
232 | _("media error in extended attribute data.")); |
233 | ||
73ce9669 | 234 | return 0; |
663e02a0 DW |
235 | } |
236 | ||
b364a9c0 | 237 | /* Iterate the extent mappings of a file to report errors. */ |
af9eb208 DW |
238 | static int |
239 | report_fd_loss( | |
b364a9c0 DW |
240 | struct scrub_ctx *ctx, |
241 | const char *descr, | |
242 | int fd, | |
243 | void *arg) | |
244 | { | |
73ce9669 DW |
245 | struct badfile_report br = { |
246 | .ctx = ctx, | |
247 | .vs = arg, | |
248 | .descr = descr, | |
249 | }; | |
250 | struct file_bmap key = {0}; | |
251 | int ret; | |
b364a9c0 DW |
252 | |
253 | /* data fork */ | |
73ce9669 DW |
254 | ret = scrub_iterate_filemaps(ctx, fd, XFS_DATA_FORK, &key, |
255 | report_data_loss, &br); | |
256 | if (ret) { | |
257 | str_liberror(ctx, ret, descr); | |
af9eb208 | 258 | return ret; |
73ce9669 | 259 | } |
b364a9c0 DW |
260 | |
261 | /* attr fork */ | |
73ce9669 DW |
262 | ret = scrub_iterate_filemaps(ctx, fd, XFS_ATTR_FORK, &key, |
263 | report_attr_loss, &br); | |
264 | if (ret) { | |
265 | str_liberror(ctx, ret, descr); | |
af9eb208 | 266 | return ret; |
73ce9669 | 267 | } |
af9eb208 DW |
268 | |
269 | return 0; | |
b364a9c0 DW |
270 | } |
271 | ||
272 | /* Report read verify errors in unlinked (but still open) files. */ | |
273 | static int | |
af9eb208 | 274 | report_inode_loss( |
b364a9c0 DW |
275 | struct scrub_ctx *ctx, |
276 | struct xfs_handle *handle, | |
4cca629d | 277 | struct xfs_bulkstat *bstat, |
b364a9c0 DW |
278 | void *arg) |
279 | { | |
280 | char descr[DESCR_BUFSZ]; | |
b364a9c0 | 281 | int fd; |
af9eb208 | 282 | int error, err2; |
b364a9c0 | 283 | |
b364a9c0 DW |
284 | /* Ignore linked files and things we can't open. */ |
285 | if (bstat->bs_nlink != 0) | |
286 | return 0; | |
287 | if (!S_ISREG(bstat->bs_mode) && !S_ISDIR(bstat->bs_mode)) | |
288 | return 0; | |
289 | ||
15589f0a DW |
290 | scrub_render_ino_descr(ctx, descr, DESCR_BUFSZ, |
291 | bstat->bs_ino, bstat->bs_gen, _("(unlinked)")); | |
292 | ||
b364a9c0 | 293 | /* Try to open the inode. */ |
59f79e0a | 294 | fd = scrub_open_handle(handle); |
b364a9c0 | 295 | if (fd < 0) { |
4d0ce76d DW |
296 | /* Handle is stale, try again. */ |
297 | if (errno == ESTALE) | |
298 | return ESTALE; | |
b364a9c0 | 299 | |
4d0ce76d DW |
300 | str_error(ctx, descr, |
301 | _("Could not open to report read errors: %s."), | |
302 | strerror(errno)); | |
303 | return 0; | |
b364a9c0 DW |
304 | } |
305 | ||
306 | /* Go find the badness. */ | |
af9eb208 DW |
307 | error = report_fd_loss(ctx, descr, fd, arg); |
308 | ||
309 | err2 = close(fd); | |
310 | if (err2) | |
6c05cc5d | 311 | str_errno(ctx, descr); |
b364a9c0 | 312 | |
af9eb208 | 313 | return error; |
b364a9c0 DW |
314 | } |
315 | ||
316 | /* Scan a directory for matches in the read verify error list. */ | |
f544ec31 | 317 | static int |
af9eb208 | 318 | report_dir_loss( |
b364a9c0 DW |
319 | struct scrub_ctx *ctx, |
320 | const char *path, | |
321 | int dir_fd, | |
322 | void *arg) | |
323 | { | |
af9eb208 | 324 | return report_fd_loss(ctx, path, dir_fd, arg); |
b364a9c0 DW |
325 | } |
326 | ||
327 | /* | |
328 | * Scan the inode associated with a directory entry for matches with | |
329 | * the read verify error list. | |
330 | */ | |
f544ec31 | 331 | static int |
af9eb208 | 332 | report_dirent_loss( |
b364a9c0 DW |
333 | struct scrub_ctx *ctx, |
334 | const char *path, | |
335 | int dir_fd, | |
336 | struct dirent *dirent, | |
337 | struct stat *sb, | |
338 | void *arg) | |
339 | { | |
b364a9c0 | 340 | int fd; |
af9eb208 | 341 | int error, err2; |
b364a9c0 DW |
342 | |
343 | /* Ignore things we can't open. */ | |
344 | if (!S_ISREG(sb->st_mode) && !S_ISDIR(sb->st_mode)) | |
f544ec31 | 345 | return 0; |
b364a9c0 DW |
346 | |
347 | /* Ignore . and .. */ | |
348 | if (!strcmp(".", dirent->d_name) || !strcmp("..", dirent->d_name)) | |
f544ec31 | 349 | return 0; |
b364a9c0 DW |
350 | |
351 | /* | |
352 | * If we were given a dirent, open the associated file under | |
353 | * dir_fd for badblocks scanning. If dirent is NULL, then it's | |
354 | * the directory itself we want to scan. | |
355 | */ | |
356 | fd = openat(dir_fd, dirent->d_name, | |
357 | O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY); | |
f544ec31 | 358 | if (fd < 0) { |
4d0ce76d DW |
359 | char descr[PATH_MAX + 1]; |
360 | ||
f544ec31 DW |
361 | if (errno == ENOENT) |
362 | return 0; | |
4d0ce76d DW |
363 | |
364 | snprintf(descr, PATH_MAX, "%s/%s", path, dirent->d_name); | |
365 | descr[PATH_MAX] = 0; | |
366 | ||
367 | str_error(ctx, descr, | |
368 | _("Could not open to report read errors: %s."), | |
369 | strerror(errno)); | |
370 | return 0; | |
f544ec31 | 371 | } |
b364a9c0 DW |
372 | |
373 | /* Go find the badness. */ | |
af9eb208 | 374 | error = report_fd_loss(ctx, path, fd, arg); |
b364a9c0 | 375 | |
af9eb208 DW |
376 | err2 = close(fd); |
377 | if (err2) | |
6c05cc5d | 378 | str_errno(ctx, path); |
af9eb208 DW |
379 | if (!error && err2) |
380 | error = err2; | |
381 | ||
382 | return error; | |
b364a9c0 DW |
383 | } |
384 | ||
9b5d1349 DW |
385 | struct ioerr_filerange { |
386 | uint64_t physical; | |
387 | uint64_t length; | |
388 | }; | |
389 | ||
390 | /* | |
391 | * If reverse mapping and parent pointers are enabled, we can map media errors | |
392 | * directly back to a filename and a file position without needing to walk the | |
393 | * directory tree. | |
394 | */ | |
395 | static inline bool | |
396 | can_use_pptrs( | |
397 | const struct scrub_ctx *ctx) | |
398 | { | |
399 | return (ctx->mnt.fsgeom.flags & XFS_FSOP_GEOM_FLAGS_PARENT) && | |
400 | (ctx->mnt.fsgeom.flags & XFS_FSOP_GEOM_FLAGS_RMAPBT); | |
401 | } | |
402 | ||
c9b349bd | 403 | /* Use a fsmap to report metadata lost to a media error. */ |
7a2eef2b | 404 | static int |
c9b349bd | 405 | report_ioerr_fsmap( |
b364a9c0 | 406 | struct scrub_ctx *ctx, |
b364a9c0 DW |
407 | struct fsmap *map, |
408 | void *arg) | |
409 | { | |
410 | const char *type; | |
9b5d1349 | 411 | struct xfs_bulkstat bs = { }; |
f1f5fd3a | 412 | char buf[DESCR_BUFSZ]; |
9b5d1349 | 413 | struct ioerr_filerange *fr = arg; |
b364a9c0 | 414 | uint64_t err_off; |
9b5d1349 | 415 | int ret; |
b364a9c0 | 416 | |
909c6a54 DW |
417 | /* Don't care about unwritten extents. */ |
418 | if (map->fmr_flags & FMR_OF_PREALLOC) | |
7a2eef2b | 419 | return 0; |
909c6a54 | 420 | |
9b5d1349 DW |
421 | if (fr->physical > map->fmr_physical) |
422 | err_off = fr->physical - map->fmr_physical; | |
b364a9c0 DW |
423 | else |
424 | err_off = 0; | |
425 | ||
f1f5fd3a | 426 | /* Report special owners */ |
b364a9c0 | 427 | if (map->fmr_flags & FMR_OF_SPECIAL_OWNER) { |
f1f5fd3a DW |
428 | snprintf(buf, DESCR_BUFSZ, _("disk offset %"PRIu64), |
429 | (uint64_t)map->fmr_physical + err_off); | |
af9eb208 | 430 | type = decode_special_owner(map->fmr_owner); |
96ac83c8 DW |
431 | /* |
432 | * On filesystems that don't store reverse mappings, the | |
433 | * GETFSMAP call returns OWNER_UNKNOWN for allocated space. | |
434 | * We'll have to let the directory tree walker find the file | |
435 | * that lost data. | |
436 | */ | |
437 | if (!(ctx->mnt.fsgeom.flags & XFS_FSOP_GEOM_FLAGS_RMAPBT) && | |
438 | map->fmr_owner == XFS_FMR_OWN_UNKNOWN) { | |
439 | str_info(ctx, buf, _("media error detected.")); | |
440 | } else { | |
441 | str_corrupt(ctx, buf, _("media error in %s."), type); | |
442 | } | |
b364a9c0 DW |
443 | } |
444 | ||
9b5d1349 DW |
445 | if (can_use_pptrs(ctx)) { |
446 | ret = -xfrog_bulkstat_single(&ctx->mnt, map->fmr_owner, 0, &bs); | |
447 | if (ret) | |
448 | str_liberror(ctx, ret, | |
449 | _("bulkstat for media error report")); | |
450 | } | |
451 | ||
02d0069e DW |
452 | /* Report extent maps */ |
453 | if (map->fmr_flags & FMR_OF_EXTENT_MAP) { | |
454 | bool attr = (map->fmr_flags & FMR_OF_ATTR_FORK); | |
455 | ||
456 | scrub_render_ino_descr(ctx, buf, DESCR_BUFSZ, | |
9b5d1349 | 457 | map->fmr_owner, bs.bs_gen, " %s", |
02d0069e DW |
458 | attr ? _("extended attribute") : |
459 | _("file data")); | |
abc2e70d | 460 | str_corrupt(ctx, buf, _("media error in extent map")); |
02d0069e DW |
461 | } |
462 | ||
b364a9c0 | 463 | /* |
9b5d1349 DW |
464 | * If directory parent pointers are available, use that to find the |
465 | * pathname to a file, and report that path as having lost its | |
466 | * extended attributes, or the precise offset of the lost file data. | |
b364a9c0 | 467 | */ |
9b5d1349 DW |
468 | if (!can_use_pptrs(ctx)) |
469 | return 0; | |
b364a9c0 | 470 | |
9b5d1349 DW |
471 | scrub_render_ino_descr(ctx, buf, DESCR_BUFSZ, map->fmr_owner, |
472 | bs.bs_gen, NULL); | |
473 | ||
474 | if (map->fmr_flags & FMR_OF_ATTR_FORK) { | |
475 | str_corrupt(ctx, buf, _("media error in extended attributes")); | |
476 | return 0; | |
477 | } | |
478 | ||
479 | str_unfixable_error(ctx, buf, | |
480 | _("media error at data offset %llu length %llu."), | |
481 | err_off, fr->length); | |
7a2eef2b | 482 | return 0; |
b364a9c0 DW |
483 | } |
484 | ||
485 | /* | |
c9b349bd DW |
486 | * For a range of bad blocks, visit each space mapping that overlaps the bad |
487 | * range so that we can report lost metadata. | |
b364a9c0 | 488 | */ |
c9b349bd DW |
489 | static int |
490 | report_ioerr( | |
b364a9c0 DW |
491 | uint64_t start, |
492 | uint64_t length, | |
b364a9c0 DW |
493 | void *arg) |
494 | { | |
495 | struct fsmap keys[2]; | |
9b5d1349 DW |
496 | struct ioerr_filerange fr = { |
497 | .physical = start, | |
498 | .length = length, | |
499 | }; | |
c9b349bd | 500 | struct disk_ioerr_report *dioerr = arg; |
b364a9c0 | 501 | dev_t dev; |
b364a9c0 | 502 | |
af9eb208 | 503 | dev = disk_to_dev(dioerr->ctx, dioerr->disk); |
b364a9c0 | 504 | |
b364a9c0 DW |
505 | /* Go figure out which blocks are bad from the fsmap. */ |
506 | memset(keys, 0, sizeof(struct fsmap) * 2); | |
507 | keys->fmr_device = dev; | |
508 | keys->fmr_physical = start; | |
509 | (keys + 1)->fmr_device = dev; | |
510 | (keys + 1)->fmr_physical = start + length - 1; | |
511 | (keys + 1)->fmr_owner = ULLONG_MAX; | |
512 | (keys + 1)->fmr_offset = ULLONG_MAX; | |
513 | (keys + 1)->fmr_flags = UINT_MAX; | |
93d69bc7 | 514 | return -scrub_iterate_fsmap(dioerr->ctx, keys, report_ioerr_fsmap, |
9b5d1349 | 515 | &fr); |
c9b349bd DW |
516 | } |
517 | ||
518 | /* Report all the media errors found on a disk. */ | |
519 | static int | |
520 | report_disk_ioerrs( | |
521 | struct scrub_ctx *ctx, | |
522 | struct disk *disk, | |
523 | struct media_verify_state *vs) | |
524 | { | |
525 | struct disk_ioerr_report dioerr = { | |
526 | .ctx = ctx, | |
527 | .disk = disk, | |
528 | }; | |
529 | struct bitmap *tree; | |
530 | ||
531 | if (!disk) | |
532 | return 0; | |
533 | tree = bitmap_for_disk(ctx, disk, vs); | |
534 | if (!tree) | |
535 | return 0; | |
93d69bc7 | 536 | return -bitmap_iterate(tree, report_ioerr, &dioerr); |
c9b349bd DW |
537 | } |
538 | ||
539 | /* Given bad extent lists for the data & rtdev, find bad files. */ | |
af9eb208 | 540 | static int |
c9b349bd DW |
541 | report_all_media_errors( |
542 | struct scrub_ctx *ctx, | |
543 | struct media_verify_state *vs) | |
544 | { | |
c9b349bd DW |
545 | int ret; |
546 | ||
547 | ret = report_disk_ioerrs(ctx, ctx->datadev, vs); | |
548 | if (ret) { | |
549 | str_liberror(ctx, ret, _("walking datadev io errors")); | |
af9eb208 | 550 | return ret; |
c9b349bd DW |
551 | } |
552 | ||
553 | ret = report_disk_ioerrs(ctx, ctx->rtdev, vs); | |
554 | if (ret) { | |
555 | str_liberror(ctx, ret, _("walking rtdev io errors")); | |
af9eb208 | 556 | return ret; |
c9b349bd DW |
557 | } |
558 | ||
9b5d1349 DW |
559 | /* |
560 | * Scan the directory tree to get file paths if we didn't already use | |
561 | * directory parent pointers to report the loss. | |
562 | */ | |
563 | if (!can_use_pptrs(ctx)) { | |
564 | ret = scan_fs_tree(ctx, report_dir_loss, report_dirent_loss, | |
565 | vs); | |
566 | if (ret) | |
567 | return ret; | |
568 | } | |
c9b349bd DW |
569 | |
570 | /* Scan for unlinked files. */ | |
af9eb208 | 571 | return scrub_scan_all_inodes(ctx, report_inode_loss, vs); |
b364a9c0 DW |
572 | } |
573 | ||
574 | /* Schedule a read-verify of a (data block) extent. */ | |
7a2eef2b DW |
575 | static int |
576 | check_rmap( | |
b364a9c0 | 577 | struct scrub_ctx *ctx, |
b364a9c0 DW |
578 | struct fsmap *map, |
579 | void *arg) | |
580 | { | |
557f98d7 | 581 | struct media_verify_state *vs = arg; |
f1bb1696 | 582 | struct read_verify_pool *rvp; |
8cab77d3 | 583 | int ret; |
f1bb1696 | 584 | |
af9eb208 | 585 | rvp = dev_to_pool(ctx, vs, map->fmr_device); |
b364a9c0 DW |
586 | |
587 | dbg_printf("rmap dev %d:%d phys %"PRIu64" owner %"PRId64 | |
588 | " offset %"PRIu64" len %"PRIu64" flags 0x%x\n", | |
589 | major(map->fmr_device), minor(map->fmr_device), | |
590 | (uint64_t)map->fmr_physical, (int64_t)map->fmr_owner, | |
591 | (uint64_t)map->fmr_offset, (uint64_t)map->fmr_length, | |
592 | map->fmr_flags); | |
593 | ||
594 | /* "Unknown" extents should be verified; they could be data. */ | |
595 | if ((map->fmr_flags & FMR_OF_SPECIAL_OWNER) && | |
596 | map->fmr_owner == XFS_FMR_OWN_UNKNOWN) | |
597 | map->fmr_flags &= ~FMR_OF_SPECIAL_OWNER; | |
598 | ||
599 | /* | |
600 | * We only care about read-verifying data extents that have been | |
601 | * written to disk. This means we can skip "special" owners | |
602 | * (metadata), xattr blocks, unwritten extents, and extent maps. | |
603 | * These should all get checked elsewhere in the scrubber. | |
604 | */ | |
605 | if (map->fmr_flags & (FMR_OF_PREALLOC | FMR_OF_ATTR_FORK | | |
606 | FMR_OF_EXTENT_MAP | FMR_OF_SPECIAL_OWNER)) | |
7a2eef2b | 607 | return 0; |
b364a9c0 DW |
608 | |
609 | /* XXX: Filter out directory data blocks. */ | |
610 | ||
611 | /* Schedule the read verify command for (eventual) running. */ | |
8cab77d3 DW |
612 | ret = read_verify_schedule_io(rvp, map->fmr_physical, map->fmr_length, |
613 | vs); | |
614 | if (ret) { | |
7a2eef2b DW |
615 | str_liberror(ctx, ret, _("scheduling media verify command")); |
616 | return ret; | |
8cab77d3 | 617 | } |
b364a9c0 | 618 | |
7a2eef2b | 619 | return 0; |
b364a9c0 DW |
620 | } |
621 | ||
f1bb1696 | 622 | /* Wait for read/verify actions to finish, then return # bytes checked. */ |
8cab77d3 | 623 | static int |
f1bb1696 | 624 | clean_pool( |
8cab77d3 DW |
625 | struct read_verify_pool *rvp, |
626 | unsigned long long *bytes_checked) | |
f1bb1696 | 627 | { |
8cab77d3 DW |
628 | uint64_t pool_checked; |
629 | int ret; | |
f1bb1696 DW |
630 | |
631 | if (!rvp) | |
632 | return 0; | |
633 | ||
22d658ec DW |
634 | ret = read_verify_force_io(rvp); |
635 | if (ret) | |
636 | return ret; | |
637 | ||
8cab77d3 DW |
638 | ret = read_verify_pool_flush(rvp); |
639 | if (ret) | |
640 | goto out_destroy; | |
641 | ||
642 | ret = read_verify_bytes(rvp, &pool_checked); | |
643 | if (ret) | |
644 | goto out_destroy; | |
645 | ||
646 | *bytes_checked += pool_checked; | |
647 | out_destroy: | |
f1bb1696 DW |
648 | read_verify_pool_destroy(rvp); |
649 | return ret; | |
650 | } | |
651 | ||
c9b349bd DW |
652 | /* Remember a media error for later. */ |
653 | static void | |
654 | remember_ioerr( | |
655 | struct scrub_ctx *ctx, | |
656 | struct disk *disk, | |
657 | uint64_t start, | |
658 | uint64_t length, | |
659 | int error, | |
660 | void *arg) | |
661 | { | |
662 | struct media_verify_state *vs = arg; | |
663 | struct bitmap *tree; | |
664 | int ret; | |
665 | ||
666 | tree = bitmap_for_disk(ctx, disk, vs); | |
667 | if (!tree) { | |
668 | str_liberror(ctx, ENOENT, _("finding bad block bitmap")); | |
669 | return; | |
670 | } | |
671 | ||
93d69bc7 | 672 | ret = -bitmap_set(tree, start, length); |
c9b349bd DW |
673 | if (ret) |
674 | str_liberror(ctx, ret, _("setting bad block bitmap")); | |
675 | } | |
676 | ||
b364a9c0 DW |
677 | /* |
678 | * Read verify all the file data blocks in a filesystem. Since XFS doesn't | |
679 | * do data checksums, we trust that the underlying storage will pass back | |
680 | * an IO error if it can't retrieve whatever we previously stored there. | |
681 | * If we hit an IO error, we'll record the bad blocks in a bitmap and then | |
682 | * scan the extent maps of the entire fs tree to figure (and the unlinked | |
683 | * inodes) out which files are now broken. | |
684 | */ | |
af9eb208 DW |
685 | int |
686 | phase6_func( | |
b364a9c0 DW |
687 | struct scrub_ctx *ctx) |
688 | { | |
557f98d7 | 689 | struct media_verify_state vs = { NULL }; |
af9eb208 | 690 | int ret, ret2, ret3; |
b364a9c0 | 691 | |
93d69bc7 | 692 | ret = -bitmap_alloc(&vs.d_bad); |
233fabee DW |
693 | if (ret) { |
694 | str_liberror(ctx, ret, _("creating datadev badblock bitmap")); | |
af9eb208 | 695 | return ret; |
b364a9c0 DW |
696 | } |
697 | ||
93d69bc7 | 698 | ret = -bitmap_alloc(&vs.r_bad); |
233fabee DW |
699 | if (ret) { |
700 | str_liberror(ctx, ret, _("creating realtime badblock bitmap")); | |
b364a9c0 DW |
701 | goto out_dbad; |
702 | } | |
703 | ||
8cab77d3 | 704 | ret = read_verify_pool_alloc(ctx, ctx->datadev, |
c9b349bd | 705 | ctx->mnt.fsgeom.blocksize, remember_ioerr, |
8cab77d3 DW |
706 | scrub_nproc(ctx), &vs.rvp_data); |
707 | if (ret) { | |
708 | str_liberror(ctx, ret, _("creating datadev media verifier")); | |
b364a9c0 DW |
709 | goto out_rbad; |
710 | } | |
f1bb1696 | 711 | if (ctx->logdev) { |
8cab77d3 | 712 | ret = read_verify_pool_alloc(ctx, ctx->logdev, |
c9b349bd | 713 | ctx->mnt.fsgeom.blocksize, remember_ioerr, |
8cab77d3 DW |
714 | scrub_nproc(ctx), &vs.rvp_log); |
715 | if (ret) { | |
716 | str_liberror(ctx, ret, | |
717 | _("creating logdev media verifier")); | |
f1bb1696 DW |
718 | goto out_datapool; |
719 | } | |
720 | } | |
721 | if (ctx->rtdev) { | |
8cab77d3 | 722 | ret = read_verify_pool_alloc(ctx, ctx->rtdev, |
c9b349bd | 723 | ctx->mnt.fsgeom.blocksize, remember_ioerr, |
8cab77d3 DW |
724 | scrub_nproc(ctx), &vs.rvp_realtime); |
725 | if (ret) { | |
726 | str_liberror(ctx, ret, | |
727 | _("creating rtdev media verifier")); | |
f1bb1696 DW |
728 | goto out_logpool; |
729 | } | |
730 | } | |
7a2eef2b | 731 | ret = scrub_scan_all_spacemaps(ctx, check_rmap, &vs); |
af9eb208 | 732 | if (ret) |
f1bb1696 | 733 | goto out_rtpool; |
8cab77d3 DW |
734 | |
735 | ret = clean_pool(vs.rvp_data, &ctx->bytes_checked); | |
af9eb208 | 736 | if (ret) |
8cab77d3 | 737 | str_liberror(ctx, ret, _("flushing datadev verify pool")); |
8cab77d3 | 738 | |
af9eb208 DW |
739 | ret2 = clean_pool(vs.rvp_log, &ctx->bytes_checked); |
740 | if (ret2) | |
741 | str_liberror(ctx, ret2, _("flushing logdev verify pool")); | |
8cab77d3 | 742 | |
af9eb208 DW |
743 | ret3 = clean_pool(vs.rvp_realtime, &ctx->bytes_checked); |
744 | if (ret3) | |
745 | str_liberror(ctx, ret3, _("flushing rtdev verify pool")); | |
746 | ||
747 | /* | |
748 | * If the verify flush didn't work or we found no bad blocks, we're | |
749 | * done! No errors detected. | |
750 | */ | |
751 | if (ret || ret2 || ret3) | |
752 | goto out_rbad; | |
753 | if (bitmap_empty(vs.d_bad) && bitmap_empty(vs.r_bad)) | |
754 | goto out_rbad; | |
b364a9c0 DW |
755 | |
756 | /* Scan the whole dir tree to see what matches the bad extents. */ | |
af9eb208 | 757 | ret = report_all_media_errors(ctx, &vs); |
b364a9c0 | 758 | |
557f98d7 DW |
759 | bitmap_free(&vs.r_bad); |
760 | bitmap_free(&vs.d_bad); | |
af9eb208 | 761 | return ret; |
b364a9c0 | 762 | |
f1bb1696 | 763 | out_rtpool: |
7668d01d | 764 | if (vs.rvp_realtime) { |
4cd869e5 | 765 | read_verify_pool_abort(vs.rvp_realtime); |
557f98d7 | 766 | read_verify_pool_destroy(vs.rvp_realtime); |
7668d01d | 767 | } |
f1bb1696 | 768 | out_logpool: |
7668d01d | 769 | if (vs.rvp_log) { |
4cd869e5 | 770 | read_verify_pool_abort(vs.rvp_log); |
557f98d7 | 771 | read_verify_pool_destroy(vs.rvp_log); |
7668d01d | 772 | } |
f1bb1696 | 773 | out_datapool: |
4cd869e5 | 774 | read_verify_pool_abort(vs.rvp_data); |
557f98d7 | 775 | read_verify_pool_destroy(vs.rvp_data); |
b364a9c0 | 776 | out_rbad: |
557f98d7 | 777 | bitmap_free(&vs.r_bad); |
b364a9c0 | 778 | out_dbad: |
557f98d7 | 779 | bitmap_free(&vs.d_bad); |
af9eb208 | 780 | return ret; |
b364a9c0 | 781 | } |
ed60d210 | 782 | |
af9eb208 DW |
783 | /* Estimate how much work we're going to do. */ |
784 | int | |
785 | phase6_estimate( | |
ed60d210 DW |
786 | struct scrub_ctx *ctx, |
787 | uint64_t *items, | |
788 | unsigned int *nr_threads, | |
789 | int *rshift) | |
790 | { | |
791 | unsigned long long d_blocks; | |
792 | unsigned long long d_bfree; | |
793 | unsigned long long r_blocks; | |
794 | unsigned long long r_bfree; | |
0b78ac05 | 795 | unsigned long long dontcare; |
934d8d3a | 796 | int ret; |
ed60d210 | 797 | |
0b78ac05 DW |
798 | ret = scrub_scan_estimate_blocks(ctx, &d_blocks, &d_bfree, &r_blocks, |
799 | &r_bfree, &dontcare); | |
934d8d3a DW |
800 | if (ret) { |
801 | str_liberror(ctx, ret, _("estimating verify work")); | |
af9eb208 | 802 | return ret; |
934d8d3a | 803 | } |
ed60d210 | 804 | |
a749451c DW |
805 | *items = cvt_off_fsb_to_b(&ctx->mnt, |
806 | (d_blocks - d_bfree) + (r_blocks - r_bfree)); | |
13eedd45 DW |
807 | |
808 | /* | |
809 | * Each read-verify pool starts a thread pool, and each worker thread | |
810 | * can contribute to the progress counter. Hence we need to set | |
811 | * nr_threads appropriately to handle that many threads. | |
812 | */ | |
ed60d210 | 813 | *nr_threads = disk_heads(ctx->datadev); |
13eedd45 DW |
814 | if (ctx->rtdev) |
815 | *nr_threads += disk_heads(ctx->rtdev); | |
816 | if (ctx->logdev) | |
817 | *nr_threads += disk_heads(ctx->logdev); | |
ed60d210 | 818 | *rshift = 20; |
af9eb208 DW |
819 | return 0; |
820 | } |