]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - scrub/phase6.c
xfsprogs: Release v6.15.0
[thirdparty/xfsprogs-dev.git] / scrub / phase6.c
CommitLineData
8d318d62 1// SPDX-License-Identifier: GPL-2.0-or-later
b364a9c0 2/*
52520522 3 * Copyright (C) 2018-2024 Oracle. All Rights Reserved.
8d318d62 4 * Author: Darrick J. Wong <djwong@kernel.org>
b364a9c0 5 */
a440f877 6#include "xfs.h"
b364a9c0 7#include <stdint.h>
b364a9c0
DW
8#include <dirent.h>
9#include <sys/statvfs.h>
9b72515a 10#include <linux/fsmap.h>
b364a9c0 11#include "handle.h"
42b4c8e8 12#include "libfrog/paths.h"
56598728 13#include "libfrog/workqueue.h"
b364a9c0
DW
14#include "xfs_scrub.h"
15#include "common.h"
a58400ed 16#include "libfrog/bitmap.h"
b364a9c0
DW
17#include "disk.h"
18#include "filemap.h"
ed60d210 19#include "fscounters.h"
b364a9c0
DW
20#include "inodes.h"
21#include "read_verify.h"
22#include "spacemap.h"
23#include "vfs.h"
4d0ce76d 24#include "common.h"
9b5d1349 25#include "libfrog/bulkstat.h"
b364a9c0
DW
26
27/*
28 * Phase 6: Verify data file integrity.
29 *
30 * Identify potential data block extents with GETFSMAP, then feed those
31 * extents to the read-verify pool to get the verify commands batched,
32 * issued, and (if there are problems) reported back to us. If there
33 * are errors, we'll record the bad regions and (if available) use rmap
34 * to tell us if metadata are now corrupt. Otherwise, we'll scan the
35 * whole directory tree looking for files that overlap the bad regions
36 * and report the paths of the now corrupt files.
37 */
38
f1bb1696
DW
39/* Verify disk blocks with GETFSMAP */
40
557f98d7 41struct media_verify_state {
f1bb1696
DW
42 struct read_verify_pool *rvp_data;
43 struct read_verify_pool *rvp_log;
44 struct read_verify_pool *rvp_realtime;
45 struct bitmap *d_bad; /* bytes */
46 struct bitmap *r_bad; /* bytes */
a6e08990
DW
47 bool d_trunc:1;
48 bool r_trunc:1;
49 bool l_trunc:1;
f1bb1696
DW
50};
51
b364a9c0 52/* Find the fd for a given device identifier. */
f1bb1696 53static struct read_verify_pool *
af9eb208 54dev_to_pool(
f1bb1696 55 struct scrub_ctx *ctx,
557f98d7 56 struct media_verify_state *vs,
f1bb1696 57 dev_t dev)
b364a9c0 58{
37591ef3
CH
59 if (ctx->mnt.fsgeom.rtstart) {
60 if (dev == XFS_DEV_DATA)
61 return vs->rvp_data;
62 if (dev == XFS_DEV_LOG)
63 return vs->rvp_log;
64 if (dev == XFS_DEV_RT)
65 return vs->rvp_realtime;
66 } else {
67 if (dev == ctx->fsinfo.fs_datadev)
68 return vs->rvp_data;
69 if (dev == ctx->fsinfo.fs_logdev)
70 return vs->rvp_log;
71 if (dev == ctx->fsinfo.fs_rtdev)
72 return vs->rvp_realtime;
73 }
b364a9c0
DW
74 abort();
75}
76
77/* Find the device major/minor for a given file descriptor. */
78static dev_t
af9eb208 79disk_to_dev(
b364a9c0
DW
80 struct scrub_ctx *ctx,
81 struct disk *disk)
82{
37591ef3
CH
83 if (ctx->mnt.fsgeom.rtstart) {
84 if (disk == ctx->datadev)
85 return XFS_DEV_DATA;
86 if (disk == ctx->logdev)
87 return XFS_DEV_LOG;
88 if (disk == ctx->rtdev)
89 return XFS_DEV_RT;
90 } else {
91 if (disk == ctx->datadev)
92 return ctx->fsinfo.fs_datadev;
93 if (disk == ctx->logdev)
94 return ctx->fsinfo.fs_logdev;
95 if (disk == ctx->rtdev)
96 return ctx->fsinfo.fs_rtdev;
97 }
b364a9c0
DW
98 abort();
99}
100
c9b349bd
DW
101/* Find the incore bad blocks bitmap for a given disk. */
102static struct bitmap *
103bitmap_for_disk(
104 struct scrub_ctx *ctx,
105 struct disk *disk,
106 struct media_verify_state *vs)
107{
37591ef3 108 if (disk == ctx->datadev)
c9b349bd 109 return vs->d_bad;
37591ef3 110 if (disk == ctx->rtdev)
c9b349bd
DW
111 return vs->r_bad;
112 return NULL;
113}
114
115struct disk_ioerr_report {
116 struct scrub_ctx *ctx;
117 struct disk *disk;
118};
119
b364a9c0
DW
120struct owner_decode {
121 uint64_t owner;
122 const char *descr;
123};
124
125static const struct owner_decode special_owners[] = {
126 {XFS_FMR_OWN_FREE, "free space"},
127 {XFS_FMR_OWN_UNKNOWN, "unknown owner"},
128 {XFS_FMR_OWN_FS, "static FS metadata"},
129 {XFS_FMR_OWN_LOG, "journalling log"},
130 {XFS_FMR_OWN_AG, "per-AG metadata"},
131 {XFS_FMR_OWN_INOBT, "inode btree blocks"},
132 {XFS_FMR_OWN_INODES, "inodes"},
133 {XFS_FMR_OWN_REFC, "refcount btree"},
134 {XFS_FMR_OWN_COW, "CoW staging"},
135 {XFS_FMR_OWN_DEFECTIVE, "bad blocks"},
136 {0, NULL},
137};
138
139/* Decode a special owner. */
140static const char *
af9eb208 141decode_special_owner(
b364a9c0
DW
142 uint64_t owner)
143{
144 const struct owner_decode *od = special_owners;
145
146 while (od->descr) {
147 if (od->owner == owner)
148 return od->descr;
149 od++;
150 }
151
152 return NULL;
153}
154
155/* Routines to translate bad physical extents into file paths and offsets. */
156
ed953d26 157struct badfile_report {
73ce9669
DW
158 struct scrub_ctx *ctx;
159 const char *descr;
160 struct media_verify_state *vs;
161 struct file_bmap *bmap;
ed953d26
DW
162};
163
164/* Report on bad extents found during a media scan. */
165static int
166report_badfile(
167 uint64_t start,
168 uint64_t length,
169 void *arg)
170{
171 struct badfile_report *br = arg;
172 unsigned long long bad_offset;
173 unsigned long long bad_length;
174
175 /* Clamp the bad region to the file mapping. */
176 if (start < br->bmap->bm_physical) {
177 length -= br->bmap->bm_physical - start;
178 start = br->bmap->bm_physical;
179 }
180 length = min(length, br->bmap->bm_length);
181
182 /* Figure out how far into the bmap is the bad mapping and report it. */
183 bad_offset = start - br->bmap->bm_physical;
184 bad_length = min(start + length,
185 br->bmap->bm_physical + br->bmap->bm_length) - start;
186
49e05cb0 187 str_unfixable_error(br->ctx, br->descr,
ed953d26
DW
188_("media error at data offset %llu length %llu."),
189 br->bmap->bm_offset + bad_offset, bad_length);
190 return 0;
191}
192
b364a9c0 193/* Report if this extent overlaps a bad region. */
73ce9669 194static int
663e02a0 195report_data_loss(
b364a9c0 196 struct scrub_ctx *ctx,
b364a9c0
DW
197 int fd,
198 int whichfork,
199 struct fsxattr *fsx,
73ce9669 200 struct file_bmap *bmap,
b364a9c0
DW
201 void *arg)
202{
73ce9669
DW
203 struct badfile_report *br = arg;
204 struct media_verify_state *vs = br->vs;
b364a9c0 205 struct bitmap *bmp;
73ce9669
DW
206
207 br->bmap = bmap;
b364a9c0
DW
208
209 /* Only report errors for real extents. */
210 if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC))
73ce9669 211 return 0;
b364a9c0
DW
212
213 if (fsx->fsx_xflags & FS_XFLAG_REALTIME)
ed5f9cc7 214 bmp = vs->r_bad;
b364a9c0 215 else
ed5f9cc7 216 bmp = vs->d_bad;
b364a9c0 217
93d69bc7 218 return -bitmap_iterate_range(bmp, bmap->bm_physical, bmap->bm_length,
73ce9669 219 report_badfile, br);
b364a9c0
DW
220}
221
663e02a0 222/* Report if the extended attribute data overlaps a bad region. */
73ce9669 223static int
663e02a0
DW
224report_attr_loss(
225 struct scrub_ctx *ctx,
663e02a0
DW
226 int fd,
227 int whichfork,
228 struct fsxattr *fsx,
73ce9669 229 struct file_bmap *bmap,
663e02a0
DW
230 void *arg)
231{
73ce9669
DW
232 struct badfile_report *br = arg;
233 struct media_verify_state *vs = br->vs;
663e02a0
DW
234 struct bitmap *bmp = vs->d_bad;
235
236 /* Complain about attr fork extents that don't look right. */
237 if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC)) {
73ce9669 238 str_info(ctx, br->descr,
663e02a0 239_("found unexpected unwritten/delalloc attr fork extent."));
73ce9669 240 return 0;
663e02a0
DW
241 }
242
243 if (fsx->fsx_xflags & FS_XFLAG_REALTIME) {
73ce9669 244 str_info(ctx, br->descr,
663e02a0 245_("found unexpected realtime attr fork extent."));
73ce9669 246 return 0;
663e02a0
DW
247 }
248
249 if (bitmap_test(bmp, bmap->bm_physical, bmap->bm_length))
73ce9669 250 str_corrupt(ctx, br->descr,
663e02a0
DW
251_("media error in extended attribute data."));
252
73ce9669 253 return 0;
663e02a0
DW
254}
255
b364a9c0 256/* Iterate the extent mappings of a file to report errors. */
af9eb208
DW
257static int
258report_fd_loss(
b364a9c0
DW
259 struct scrub_ctx *ctx,
260 const char *descr,
261 int fd,
262 void *arg)
263{
73ce9669
DW
264 struct badfile_report br = {
265 .ctx = ctx,
266 .vs = arg,
267 .descr = descr,
268 };
269 struct file_bmap key = {0};
270 int ret;
b364a9c0
DW
271
272 /* data fork */
73ce9669
DW
273 ret = scrub_iterate_filemaps(ctx, fd, XFS_DATA_FORK, &key,
274 report_data_loss, &br);
275 if (ret) {
276 str_liberror(ctx, ret, descr);
af9eb208 277 return ret;
73ce9669 278 }
b364a9c0
DW
279
280 /* attr fork */
73ce9669
DW
281 ret = scrub_iterate_filemaps(ctx, fd, XFS_ATTR_FORK, &key,
282 report_attr_loss, &br);
283 if (ret) {
284 str_liberror(ctx, ret, descr);
af9eb208 285 return ret;
73ce9669 286 }
af9eb208
DW
287
288 return 0;
b364a9c0
DW
289}
290
291/* Report read verify errors in unlinked (but still open) files. */
292static int
af9eb208 293report_inode_loss(
b364a9c0
DW
294 struct scrub_ctx *ctx,
295 struct xfs_handle *handle,
4cca629d 296 struct xfs_bulkstat *bstat,
b364a9c0
DW
297 void *arg)
298{
299 char descr[DESCR_BUFSZ];
b364a9c0 300 int fd;
af9eb208 301 int error, err2;
b364a9c0 302
b364a9c0
DW
303 /* Ignore linked files and things we can't open. */
304 if (bstat->bs_nlink != 0)
305 return 0;
306 if (!S_ISREG(bstat->bs_mode) && !S_ISDIR(bstat->bs_mode))
307 return 0;
308
15589f0a
DW
309 scrub_render_ino_descr(ctx, descr, DESCR_BUFSZ,
310 bstat->bs_ino, bstat->bs_gen, _("(unlinked)"));
311
b364a9c0 312 /* Try to open the inode. */
59f79e0a 313 fd = scrub_open_handle(handle);
b364a9c0 314 if (fd < 0) {
4d0ce76d
DW
315 /* Handle is stale, try again. */
316 if (errno == ESTALE)
317 return ESTALE;
b364a9c0 318
4d0ce76d
DW
319 str_error(ctx, descr,
320 _("Could not open to report read errors: %s."),
321 strerror(errno));
322 return 0;
b364a9c0
DW
323 }
324
325 /* Go find the badness. */
af9eb208
DW
326 error = report_fd_loss(ctx, descr, fd, arg);
327
328 err2 = close(fd);
329 if (err2)
6c05cc5d 330 str_errno(ctx, descr);
b364a9c0 331
af9eb208 332 return error;
b364a9c0
DW
333}
334
335/* Scan a directory for matches in the read verify error list. */
f544ec31 336static int
af9eb208 337report_dir_loss(
b364a9c0
DW
338 struct scrub_ctx *ctx,
339 const char *path,
340 int dir_fd,
341 void *arg)
342{
af9eb208 343 return report_fd_loss(ctx, path, dir_fd, arg);
b364a9c0
DW
344}
345
346/*
347 * Scan the inode associated with a directory entry for matches with
348 * the read verify error list.
349 */
f544ec31 350static int
af9eb208 351report_dirent_loss(
b364a9c0
DW
352 struct scrub_ctx *ctx,
353 const char *path,
354 int dir_fd,
355 struct dirent *dirent,
356 struct stat *sb,
357 void *arg)
358{
b364a9c0 359 int fd;
af9eb208 360 int error, err2;
b364a9c0
DW
361
362 /* Ignore things we can't open. */
363 if (!S_ISREG(sb->st_mode) && !S_ISDIR(sb->st_mode))
f544ec31 364 return 0;
b364a9c0
DW
365
366 /* Ignore . and .. */
367 if (!strcmp(".", dirent->d_name) || !strcmp("..", dirent->d_name))
f544ec31 368 return 0;
b364a9c0
DW
369
370 /*
371 * If we were given a dirent, open the associated file under
372 * dir_fd for badblocks scanning. If dirent is NULL, then it's
373 * the directory itself we want to scan.
374 */
375 fd = openat(dir_fd, dirent->d_name,
376 O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY);
f544ec31 377 if (fd < 0) {
4d0ce76d
DW
378 char descr[PATH_MAX + 1];
379
f544ec31
DW
380 if (errno == ENOENT)
381 return 0;
4d0ce76d
DW
382
383 snprintf(descr, PATH_MAX, "%s/%s", path, dirent->d_name);
384 descr[PATH_MAX] = 0;
385
386 str_error(ctx, descr,
387 _("Could not open to report read errors: %s."),
388 strerror(errno));
389 return 0;
f544ec31 390 }
b364a9c0
DW
391
392 /* Go find the badness. */
af9eb208 393 error = report_fd_loss(ctx, path, fd, arg);
b364a9c0 394
af9eb208
DW
395 err2 = close(fd);
396 if (err2)
6c05cc5d 397 str_errno(ctx, path);
af9eb208
DW
398 if (!error && err2)
399 error = err2;
400
401 return error;
b364a9c0
DW
402}
403
9b5d1349
DW
404struct ioerr_filerange {
405 uint64_t physical;
406 uint64_t length;
407};
408
409/*
410 * If reverse mapping and parent pointers are enabled, we can map media errors
411 * directly back to a filename and a file position without needing to walk the
412 * directory tree.
413 */
414static inline bool
415can_use_pptrs(
416 const struct scrub_ctx *ctx)
417{
418 return (ctx->mnt.fsgeom.flags & XFS_FSOP_GEOM_FLAGS_PARENT) &&
419 (ctx->mnt.fsgeom.flags & XFS_FSOP_GEOM_FLAGS_RMAPBT);
420}
421
c9b349bd 422/* Use a fsmap to report metadata lost to a media error. */
7a2eef2b 423static int
c9b349bd 424report_ioerr_fsmap(
b364a9c0 425 struct scrub_ctx *ctx,
b364a9c0
DW
426 struct fsmap *map,
427 void *arg)
428{
429 const char *type;
9b5d1349 430 struct xfs_bulkstat bs = { };
f1f5fd3a 431 char buf[DESCR_BUFSZ];
9b5d1349 432 struct ioerr_filerange *fr = arg;
b364a9c0 433 uint64_t err_off;
9b5d1349 434 int ret;
b364a9c0 435
909c6a54
DW
436 /* Don't care about unwritten extents. */
437 if (map->fmr_flags & FMR_OF_PREALLOC)
7a2eef2b 438 return 0;
909c6a54 439
9b5d1349
DW
440 if (fr->physical > map->fmr_physical)
441 err_off = fr->physical - map->fmr_physical;
b364a9c0
DW
442 else
443 err_off = 0;
444
f1f5fd3a 445 /* Report special owners */
b364a9c0 446 if (map->fmr_flags & FMR_OF_SPECIAL_OWNER) {
f1f5fd3a
DW
447 snprintf(buf, DESCR_BUFSZ, _("disk offset %"PRIu64),
448 (uint64_t)map->fmr_physical + err_off);
af9eb208 449 type = decode_special_owner(map->fmr_owner);
96ac83c8
DW
450 /*
451 * On filesystems that don't store reverse mappings, the
452 * GETFSMAP call returns OWNER_UNKNOWN for allocated space.
453 * We'll have to let the directory tree walker find the file
454 * that lost data.
455 */
456 if (!(ctx->mnt.fsgeom.flags & XFS_FSOP_GEOM_FLAGS_RMAPBT) &&
457 map->fmr_owner == XFS_FMR_OWN_UNKNOWN) {
458 str_info(ctx, buf, _("media error detected."));
459 } else {
460 str_corrupt(ctx, buf, _("media error in %s."), type);
461 }
b364a9c0
DW
462 }
463
9b5d1349
DW
464 if (can_use_pptrs(ctx)) {
465 ret = -xfrog_bulkstat_single(&ctx->mnt, map->fmr_owner, 0, &bs);
466 if (ret)
467 str_liberror(ctx, ret,
468 _("bulkstat for media error report"));
469 }
470
02d0069e
DW
471 /* Report extent maps */
472 if (map->fmr_flags & FMR_OF_EXTENT_MAP) {
473 bool attr = (map->fmr_flags & FMR_OF_ATTR_FORK);
474
475 scrub_render_ino_descr(ctx, buf, DESCR_BUFSZ,
9b5d1349 476 map->fmr_owner, bs.bs_gen, " %s",
02d0069e
DW
477 attr ? _("extended attribute") :
478 _("file data"));
abc2e70d 479 str_corrupt(ctx, buf, _("media error in extent map"));
02d0069e
DW
480 }
481
b364a9c0 482 /*
9b5d1349
DW
483 * If directory parent pointers are available, use that to find the
484 * pathname to a file, and report that path as having lost its
485 * extended attributes, or the precise offset of the lost file data.
b364a9c0 486 */
9b5d1349
DW
487 if (!can_use_pptrs(ctx))
488 return 0;
b364a9c0 489
9b5d1349
DW
490 scrub_render_ino_descr(ctx, buf, DESCR_BUFSZ, map->fmr_owner,
491 bs.bs_gen, NULL);
492
493 if (map->fmr_flags & FMR_OF_ATTR_FORK) {
494 str_corrupt(ctx, buf, _("media error in extended attributes"));
495 return 0;
496 }
497
498 str_unfixable_error(ctx, buf,
499 _("media error at data offset %llu length %llu."),
500 err_off, fr->length);
7a2eef2b 501 return 0;
b364a9c0
DW
502}
503
504/*
c9b349bd
DW
505 * For a range of bad blocks, visit each space mapping that overlaps the bad
506 * range so that we can report lost metadata.
b364a9c0 507 */
c9b349bd
DW
508static int
509report_ioerr(
b364a9c0
DW
510 uint64_t start,
511 uint64_t length,
b364a9c0
DW
512 void *arg)
513{
04c33913 514 struct fsmap keys[2] = { };
9b5d1349
DW
515 struct ioerr_filerange fr = {
516 .physical = start,
517 .length = length,
518 };
c9b349bd 519 struct disk_ioerr_report *dioerr = arg;
b364a9c0 520
b364a9c0 521 /* Go figure out which blocks are bad from the fsmap. */
37591ef3 522 keys[0].fmr_device = disk_to_dev(dioerr->ctx, dioerr->disk);
04c33913 523 keys[0].fmr_physical = start;
37591ef3 524 keys[1].fmr_device = keys[0].fmr_device;
04c33913
CH
525 keys[1].fmr_physical = start + length - 1;
526 keys[1].fmr_owner = ULLONG_MAX;
527 keys[1].fmr_offset = ULLONG_MAX;
528 keys[1].fmr_flags = UINT_MAX;
93d69bc7 529 return -scrub_iterate_fsmap(dioerr->ctx, keys, report_ioerr_fsmap,
9b5d1349 530 &fr);
c9b349bd
DW
531}
532
533/* Report all the media errors found on a disk. */
534static int
535report_disk_ioerrs(
536 struct scrub_ctx *ctx,
537 struct disk *disk,
538 struct media_verify_state *vs)
539{
540 struct disk_ioerr_report dioerr = {
541 .ctx = ctx,
542 .disk = disk,
543 };
544 struct bitmap *tree;
545
546 if (!disk)
547 return 0;
548 tree = bitmap_for_disk(ctx, disk, vs);
549 if (!tree)
550 return 0;
93d69bc7 551 return -bitmap_iterate(tree, report_ioerr, &dioerr);
c9b349bd
DW
552}
553
554/* Given bad extent lists for the data & rtdev, find bad files. */
af9eb208 555static int
c9b349bd
DW
556report_all_media_errors(
557 struct scrub_ctx *ctx,
558 struct media_verify_state *vs)
559{
c9b349bd
DW
560 int ret;
561
a6e08990
DW
562 if (vs->d_trunc)
563 str_corrupt(ctx, ctx->mntpoint, _("data device truncated"));
564 if (vs->l_trunc)
565 str_corrupt(ctx, ctx->mntpoint, _("log device truncated"));
566 if (vs->r_trunc)
567 str_corrupt(ctx, ctx->mntpoint, _("rt device truncated"));
568
c9b349bd
DW
569 ret = report_disk_ioerrs(ctx, ctx->datadev, vs);
570 if (ret) {
571 str_liberror(ctx, ret, _("walking datadev io errors"));
af9eb208 572 return ret;
c9b349bd
DW
573 }
574
575 ret = report_disk_ioerrs(ctx, ctx->rtdev, vs);
576 if (ret) {
577 str_liberror(ctx, ret, _("walking rtdev io errors"));
af9eb208 578 return ret;
c9b349bd
DW
579 }
580
9b5d1349
DW
581 /*
582 * Scan the directory tree to get file paths if we didn't already use
cb3647bb
DW
583 * directory parent pointers to report the loss. If parent pointers
584 * are enabled, report_ioerr_fsmap will have already reported file
585 * paths that have lost file data and xattrs.
9b5d1349 586 */
cb3647bb
DW
587 if (can_use_pptrs(ctx))
588 return 0;
589
590 ret = scan_fs_tree(ctx, report_dir_loss, report_dirent_loss, vs);
591 if (ret)
592 return ret;
c9b349bd
DW
593
594 /* Scan for unlinked files. */
279b0d0e 595 return scrub_scan_user_files(ctx, report_inode_loss, vs);
b364a9c0
DW
596}
597
598/* Schedule a read-verify of a (data block) extent. */
7a2eef2b
DW
599static int
600check_rmap(
b364a9c0 601 struct scrub_ctx *ctx,
b364a9c0
DW
602 struct fsmap *map,
603 void *arg)
604{
557f98d7 605 struct media_verify_state *vs = arg;
f1bb1696 606 struct read_verify_pool *rvp;
8cab77d3 607 int ret;
f1bb1696 608
af9eb208 609 rvp = dev_to_pool(ctx, vs, map->fmr_device);
b364a9c0
DW
610
611 dbg_printf("rmap dev %d:%d phys %"PRIu64" owner %"PRId64
612 " offset %"PRIu64" len %"PRIu64" flags 0x%x\n",
613 major(map->fmr_device), minor(map->fmr_device),
614 (uint64_t)map->fmr_physical, (int64_t)map->fmr_owner,
615 (uint64_t)map->fmr_offset, (uint64_t)map->fmr_length,
616 map->fmr_flags);
617
618 /* "Unknown" extents should be verified; they could be data. */
619 if ((map->fmr_flags & FMR_OF_SPECIAL_OWNER) &&
620 map->fmr_owner == XFS_FMR_OWN_UNKNOWN)
621 map->fmr_flags &= ~FMR_OF_SPECIAL_OWNER;
622
623 /*
624 * We only care about read-verifying data extents that have been
625 * written to disk. This means we can skip "special" owners
626 * (metadata), xattr blocks, unwritten extents, and extent maps.
627 * These should all get checked elsewhere in the scrubber.
628 */
629 if (map->fmr_flags & (FMR_OF_PREALLOC | FMR_OF_ATTR_FORK |
630 FMR_OF_EXTENT_MAP | FMR_OF_SPECIAL_OWNER))
7a2eef2b 631 return 0;
b364a9c0
DW
632
633 /* XXX: Filter out directory data blocks. */
634
635 /* Schedule the read verify command for (eventual) running. */
8cab77d3
DW
636 ret = read_verify_schedule_io(rvp, map->fmr_physical, map->fmr_length,
637 vs);
638 if (ret) {
7a2eef2b
DW
639 str_liberror(ctx, ret, _("scheduling media verify command"));
640 return ret;
8cab77d3 641 }
b364a9c0 642
7a2eef2b 643 return 0;
b364a9c0
DW
644}
645
f1bb1696 646/* Wait for read/verify actions to finish, then return # bytes checked. */
8cab77d3 647static int
f1bb1696 648clean_pool(
8cab77d3
DW
649 struct read_verify_pool *rvp,
650 unsigned long long *bytes_checked)
f1bb1696 651{
8cab77d3
DW
652 uint64_t pool_checked;
653 int ret;
f1bb1696
DW
654
655 if (!rvp)
656 return 0;
657
22d658ec
DW
658 ret = read_verify_force_io(rvp);
659 if (ret)
660 return ret;
661
8cab77d3
DW
662 ret = read_verify_pool_flush(rvp);
663 if (ret)
664 goto out_destroy;
665
666 ret = read_verify_bytes(rvp, &pool_checked);
667 if (ret)
668 goto out_destroy;
669
670 *bytes_checked += pool_checked;
671out_destroy:
f1bb1696
DW
672 read_verify_pool_destroy(rvp);
673 return ret;
674}
675
c9b349bd
DW
676/* Remember a media error for later. */
677static void
678remember_ioerr(
679 struct scrub_ctx *ctx,
680 struct disk *disk,
681 uint64_t start,
682 uint64_t length,
683 int error,
684 void *arg)
685{
686 struct media_verify_state *vs = arg;
687 struct bitmap *tree;
688 int ret;
689
a6e08990 690 if (!length) {
37591ef3 691 if (disk == ctx->datadev)
a6e08990 692 vs->d_trunc = true;
37591ef3 693 else if (disk == ctx->logdev)
a6e08990 694 vs->l_trunc = true;
37591ef3
CH
695 else if (disk == ctx->rtdev)
696 vs->r_trunc = true;
a6e08990
DW
697 return;
698 }
699
c9b349bd
DW
700 tree = bitmap_for_disk(ctx, disk, vs);
701 if (!tree) {
702 str_liberror(ctx, ENOENT, _("finding bad block bitmap"));
703 return;
704 }
705
93d69bc7 706 ret = -bitmap_set(tree, start, length);
c9b349bd
DW
707 if (ret)
708 str_liberror(ctx, ret, _("setting bad block bitmap"));
709}
710
b364a9c0
DW
711/*
712 * Read verify all the file data blocks in a filesystem. Since XFS doesn't
713 * do data checksums, we trust that the underlying storage will pass back
714 * an IO error if it can't retrieve whatever we previously stored there.
715 * If we hit an IO error, we'll record the bad blocks in a bitmap and then
716 * scan the extent maps of the entire fs tree to figure (and the unlinked
717 * inodes) out which files are now broken.
718 */
af9eb208
DW
719int
720phase6_func(
b364a9c0
DW
721 struct scrub_ctx *ctx)
722{
557f98d7 723 struct media_verify_state vs = { NULL };
af9eb208 724 int ret, ret2, ret3;
b364a9c0 725
93d69bc7 726 ret = -bitmap_alloc(&vs.d_bad);
233fabee
DW
727 if (ret) {
728 str_liberror(ctx, ret, _("creating datadev badblock bitmap"));
af9eb208 729 return ret;
b364a9c0
DW
730 }
731
93d69bc7 732 ret = -bitmap_alloc(&vs.r_bad);
233fabee
DW
733 if (ret) {
734 str_liberror(ctx, ret, _("creating realtime badblock bitmap"));
b364a9c0
DW
735 goto out_dbad;
736 }
737
8cab77d3 738 ret = read_verify_pool_alloc(ctx, ctx->datadev,
c9b349bd 739 ctx->mnt.fsgeom.blocksize, remember_ioerr,
8cab77d3
DW
740 scrub_nproc(ctx), &vs.rvp_data);
741 if (ret) {
742 str_liberror(ctx, ret, _("creating datadev media verifier"));
b364a9c0
DW
743 goto out_rbad;
744 }
f1bb1696 745 if (ctx->logdev) {
8cab77d3 746 ret = read_verify_pool_alloc(ctx, ctx->logdev,
c9b349bd 747 ctx->mnt.fsgeom.blocksize, remember_ioerr,
8cab77d3
DW
748 scrub_nproc(ctx), &vs.rvp_log);
749 if (ret) {
750 str_liberror(ctx, ret,
751 _("creating logdev media verifier"));
f1bb1696
DW
752 goto out_datapool;
753 }
754 }
755 if (ctx->rtdev) {
8cab77d3 756 ret = read_verify_pool_alloc(ctx, ctx->rtdev,
c9b349bd 757 ctx->mnt.fsgeom.blocksize, remember_ioerr,
8cab77d3
DW
758 scrub_nproc(ctx), &vs.rvp_realtime);
759 if (ret) {
760 str_liberror(ctx, ret,
761 _("creating rtdev media verifier"));
f1bb1696
DW
762 goto out_logpool;
763 }
764 }
7a2eef2b 765 ret = scrub_scan_all_spacemaps(ctx, check_rmap, &vs);
af9eb208 766 if (ret)
f1bb1696 767 goto out_rtpool;
8cab77d3
DW
768
769 ret = clean_pool(vs.rvp_data, &ctx->bytes_checked);
af9eb208 770 if (ret)
8cab77d3 771 str_liberror(ctx, ret, _("flushing datadev verify pool"));
8cab77d3 772
af9eb208
DW
773 ret2 = clean_pool(vs.rvp_log, &ctx->bytes_checked);
774 if (ret2)
775 str_liberror(ctx, ret2, _("flushing logdev verify pool"));
8cab77d3 776
af9eb208
DW
777 ret3 = clean_pool(vs.rvp_realtime, &ctx->bytes_checked);
778 if (ret3)
779 str_liberror(ctx, ret3, _("flushing rtdev verify pool"));
780
781 /*
782 * If the verify flush didn't work or we found no bad blocks, we're
783 * done! No errors detected.
784 */
785 if (ret || ret2 || ret3)
786 goto out_rbad;
787 if (bitmap_empty(vs.d_bad) && bitmap_empty(vs.r_bad))
788 goto out_rbad;
b364a9c0
DW
789
790 /* Scan the whole dir tree to see what matches the bad extents. */
af9eb208 791 ret = report_all_media_errors(ctx, &vs);
b364a9c0 792
557f98d7
DW
793 bitmap_free(&vs.r_bad);
794 bitmap_free(&vs.d_bad);
af9eb208 795 return ret;
b364a9c0 796
f1bb1696 797out_rtpool:
7668d01d 798 if (vs.rvp_realtime) {
4cd869e5 799 read_verify_pool_abort(vs.rvp_realtime);
557f98d7 800 read_verify_pool_destroy(vs.rvp_realtime);
7668d01d 801 }
f1bb1696 802out_logpool:
7668d01d 803 if (vs.rvp_log) {
4cd869e5 804 read_verify_pool_abort(vs.rvp_log);
557f98d7 805 read_verify_pool_destroy(vs.rvp_log);
7668d01d 806 }
f1bb1696 807out_datapool:
4cd869e5 808 read_verify_pool_abort(vs.rvp_data);
557f98d7 809 read_verify_pool_destroy(vs.rvp_data);
b364a9c0 810out_rbad:
557f98d7 811 bitmap_free(&vs.r_bad);
b364a9c0 812out_dbad:
557f98d7 813 bitmap_free(&vs.d_bad);
af9eb208 814 return ret;
b364a9c0 815}
ed60d210 816
af9eb208
DW
817/* Estimate how much work we're going to do. */
818int
819phase6_estimate(
ed60d210
DW
820 struct scrub_ctx *ctx,
821 uint64_t *items,
822 unsigned int *nr_threads,
823 int *rshift)
824{
825 unsigned long long d_blocks;
826 unsigned long long d_bfree;
827 unsigned long long r_blocks;
828 unsigned long long r_bfree;
0b78ac05 829 unsigned long long dontcare;
934d8d3a 830 int ret;
ed60d210 831
0b78ac05
DW
832 ret = scrub_scan_estimate_blocks(ctx, &d_blocks, &d_bfree, &r_blocks,
833 &r_bfree, &dontcare);
934d8d3a
DW
834 if (ret) {
835 str_liberror(ctx, ret, _("estimating verify work"));
af9eb208 836 return ret;
934d8d3a 837 }
ed60d210 838
a749451c
DW
839 *items = cvt_off_fsb_to_b(&ctx->mnt,
840 (d_blocks - d_bfree) + (r_blocks - r_bfree));
13eedd45
DW
841
842 /*
843 * Each read-verify pool starts a thread pool, and each worker thread
844 * can contribute to the progress counter. Hence we need to set
845 * nr_threads appropriately to handle that many threads.
846 */
ed60d210 847 *nr_threads = disk_heads(ctx->datadev);
13eedd45
DW
848 if (ctx->rtdev)
849 *nr_threads += disk_heads(ctx->rtdev);
850 if (ctx->logdev)
851 *nr_threads += disk_heads(ctx->logdev);
ed60d210 852 *rshift = 20;
af9eb208
DW
853 return 0;
854}