]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - scrub/phase6.c
xfs_scrub: abort all read verification work immediately on error
[thirdparty/xfsprogs-dev.git] / scrub / phase6.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * Copyright (C) 2018 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6 #include "xfs.h"
7 #include <stdint.h>
8 #include <dirent.h>
9 #include <sys/statvfs.h>
10 #include "handle.h"
11 #include "libfrog/paths.h"
12 #include "libfrog/workqueue.h"
13 #include "xfs_scrub.h"
14 #include "common.h"
15 #include "libfrog/bitmap.h"
16 #include "disk.h"
17 #include "filemap.h"
18 #include "fscounters.h"
19 #include "inodes.h"
20 #include "read_verify.h"
21 #include "spacemap.h"
22 #include "vfs.h"
23
24 /*
25 * Phase 6: Verify data file integrity.
26 *
27 * Identify potential data block extents with GETFSMAP, then feed those
28 * extents to the read-verify pool to get the verify commands batched,
29 * issued, and (if there are problems) reported back to us. If there
30 * are errors, we'll record the bad regions and (if available) use rmap
31 * to tell us if metadata are now corrupt. Otherwise, we'll scan the
32 * whole directory tree looking for files that overlap the bad regions
33 * and report the paths of the now corrupt files.
34 */
35
36 /* Verify disk blocks with GETFSMAP */
37
38 struct media_verify_state {
39 struct read_verify_pool *rvp_data;
40 struct read_verify_pool *rvp_log;
41 struct read_verify_pool *rvp_realtime;
42 struct bitmap *d_bad; /* bytes */
43 struct bitmap *r_bad; /* bytes */
44 };
45
46 /* Find the fd for a given device identifier. */
47 static struct read_verify_pool *
48 xfs_dev_to_pool(
49 struct scrub_ctx *ctx,
50 struct media_verify_state *vs,
51 dev_t dev)
52 {
53 if (dev == ctx->fsinfo.fs_datadev)
54 return vs->rvp_data;
55 else if (dev == ctx->fsinfo.fs_logdev)
56 return vs->rvp_log;
57 else if (dev == ctx->fsinfo.fs_rtdev)
58 return vs->rvp_realtime;
59 abort();
60 }
61
62 /* Find the device major/minor for a given file descriptor. */
63 static dev_t
64 xfs_disk_to_dev(
65 struct scrub_ctx *ctx,
66 struct disk *disk)
67 {
68 if (disk == ctx->datadev)
69 return ctx->fsinfo.fs_datadev;
70 else if (disk == ctx->logdev)
71 return ctx->fsinfo.fs_logdev;
72 else if (disk == ctx->rtdev)
73 return ctx->fsinfo.fs_rtdev;
74 abort();
75 }
76
77 struct owner_decode {
78 uint64_t owner;
79 const char *descr;
80 };
81
82 static const struct owner_decode special_owners[] = {
83 {XFS_FMR_OWN_FREE, "free space"},
84 {XFS_FMR_OWN_UNKNOWN, "unknown owner"},
85 {XFS_FMR_OWN_FS, "static FS metadata"},
86 {XFS_FMR_OWN_LOG, "journalling log"},
87 {XFS_FMR_OWN_AG, "per-AG metadata"},
88 {XFS_FMR_OWN_INOBT, "inode btree blocks"},
89 {XFS_FMR_OWN_INODES, "inodes"},
90 {XFS_FMR_OWN_REFC, "refcount btree"},
91 {XFS_FMR_OWN_COW, "CoW staging"},
92 {XFS_FMR_OWN_DEFECTIVE, "bad blocks"},
93 {0, NULL},
94 };
95
96 /* Decode a special owner. */
97 static const char *
98 xfs_decode_special_owner(
99 uint64_t owner)
100 {
101 const struct owner_decode *od = special_owners;
102
103 while (od->descr) {
104 if (od->owner == owner)
105 return od->descr;
106 od++;
107 }
108
109 return NULL;
110 }
111
112 /* Routines to translate bad physical extents into file paths and offsets. */
113
114 /* Report if this extent overlaps a bad region. */
115 static bool
116 xfs_report_verify_inode_bmap(
117 struct scrub_ctx *ctx,
118 const char *descr,
119 int fd,
120 int whichfork,
121 struct fsxattr *fsx,
122 struct xfs_bmap *bmap,
123 void *arg)
124 {
125 struct media_verify_state *vs = arg;
126 struct bitmap *bmp;
127
128 /* Only report errors for real extents. */
129 if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC))
130 return true;
131
132 if (fsx->fsx_xflags & FS_XFLAG_REALTIME)
133 bmp = vs->r_bad;
134 else
135 bmp = vs->d_bad;
136
137 if (!bitmap_test(bmp, bmap->bm_physical, bmap->bm_length))
138 return true;
139
140 str_error(ctx, descr,
141 _("offset %llu failed read verification."), bmap->bm_offset);
142 return true;
143 }
144
145 /* Iterate the extent mappings of a file to report errors. */
146 static bool
147 xfs_report_verify_fd(
148 struct scrub_ctx *ctx,
149 const char *descr,
150 int fd,
151 void *arg)
152 {
153 struct xfs_bmap key = {0};
154 bool moveon;
155
156 /* data fork */
157 moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_DATA_FORK, &key,
158 xfs_report_verify_inode_bmap, arg);
159 if (!moveon)
160 return false;
161
162 /* attr fork */
163 moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_ATTR_FORK, &key,
164 xfs_report_verify_inode_bmap, arg);
165 if (!moveon)
166 return false;
167 return true;
168 }
169
170 /* Report read verify errors in unlinked (but still open) files. */
171 static int
172 xfs_report_verify_inode(
173 struct scrub_ctx *ctx,
174 struct xfs_handle *handle,
175 struct xfs_bulkstat *bstat,
176 void *arg)
177 {
178 char descr[DESCR_BUFSZ];
179 bool moveon;
180 int fd;
181 int error;
182
183 snprintf(descr, DESCR_BUFSZ, _("inode %"PRIu64" (unlinked)"),
184 (uint64_t)bstat->bs_ino);
185
186 /* Ignore linked files and things we can't open. */
187 if (bstat->bs_nlink != 0)
188 return 0;
189 if (!S_ISREG(bstat->bs_mode) && !S_ISDIR(bstat->bs_mode))
190 return 0;
191
192 /* Try to open the inode. */
193 fd = xfs_open_handle(handle);
194 if (fd < 0) {
195 error = errno;
196 if (error == ESTALE)
197 return error;
198
199 str_info(ctx, descr,
200 _("Disappeared during read error reporting."));
201 return error;
202 }
203
204 /* Go find the badness. */
205 moveon = xfs_report_verify_fd(ctx, descr, fd, arg);
206 error = close(fd);
207 if (error)
208 str_errno(ctx, descr);
209
210 return moveon ? 0 : XFS_ITERATE_INODES_ABORT;
211 }
212
213 /* Scan a directory for matches in the read verify error list. */
214 static bool
215 xfs_report_verify_dir(
216 struct scrub_ctx *ctx,
217 const char *path,
218 int dir_fd,
219 void *arg)
220 {
221 return xfs_report_verify_fd(ctx, path, dir_fd, arg);
222 }
223
224 /*
225 * Scan the inode associated with a directory entry for matches with
226 * the read verify error list.
227 */
228 static bool
229 xfs_report_verify_dirent(
230 struct scrub_ctx *ctx,
231 const char *path,
232 int dir_fd,
233 struct dirent *dirent,
234 struct stat *sb,
235 void *arg)
236 {
237 bool moveon;
238 int fd;
239 int error;
240
241 /* Ignore things we can't open. */
242 if (!S_ISREG(sb->st_mode) && !S_ISDIR(sb->st_mode))
243 return true;
244
245 /* Ignore . and .. */
246 if (!strcmp(".", dirent->d_name) || !strcmp("..", dirent->d_name))
247 return true;
248
249 /*
250 * If we were given a dirent, open the associated file under
251 * dir_fd for badblocks scanning. If dirent is NULL, then it's
252 * the directory itself we want to scan.
253 */
254 fd = openat(dir_fd, dirent->d_name,
255 O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY);
256 if (fd < 0)
257 return true;
258
259 /* Go find the badness. */
260 moveon = xfs_report_verify_fd(ctx, path, fd, arg);
261 if (moveon)
262 goto out;
263
264 out:
265 error = close(fd);
266 if (error)
267 str_errno(ctx, path);
268 return moveon;
269 }
270
271 /* Given bad extent lists for the data & rtdev, find bad files. */
272 static bool
273 xfs_report_verify_errors(
274 struct scrub_ctx *ctx,
275 struct media_verify_state *vs)
276 {
277 bool moveon;
278
279 /* Scan the directory tree to get file paths. */
280 moveon = scan_fs_tree(ctx, xfs_report_verify_dir,
281 xfs_report_verify_dirent, vs);
282 if (!moveon)
283 return false;
284
285 /* Scan for unlinked files. */
286 return xfs_scan_all_inodes(ctx, xfs_report_verify_inode, vs);
287 }
288
289 /* Report an IO error resulting from read-verify based off getfsmap. */
290 static bool
291 xfs_check_rmap_error_report(
292 struct scrub_ctx *ctx,
293 const char *descr,
294 struct fsmap *map,
295 void *arg)
296 {
297 const char *type;
298 char buf[32];
299 uint64_t err_physical = *(uint64_t *)arg;
300 uint64_t err_off;
301
302 if (err_physical > map->fmr_physical)
303 err_off = err_physical - map->fmr_physical;
304 else
305 err_off = 0;
306
307 snprintf(buf, 32, _("disk offset %"PRIu64),
308 (uint64_t)BTOBB(map->fmr_physical + err_off));
309
310 if (map->fmr_flags & FMR_OF_SPECIAL_OWNER) {
311 type = xfs_decode_special_owner(map->fmr_owner);
312 str_error(ctx, buf,
313 _("%s failed read verification."),
314 type);
315 }
316
317 /*
318 * XXX: If we had a getparent() call we could report IO errors
319 * efficiently. Until then, we'll have to scan the dir tree
320 * to find the bad file's pathname.
321 */
322
323 return true;
324 }
325
326 /*
327 * Remember a read error for later, and see if rmap will tell us about the
328 * owner ahead of time.
329 */
330 static void
331 xfs_check_rmap_ioerr(
332 struct scrub_ctx *ctx,
333 struct disk *disk,
334 uint64_t start,
335 uint64_t length,
336 int error,
337 void *arg)
338 {
339 struct fsmap keys[2];
340 char descr[DESCR_BUFSZ];
341 struct media_verify_state *vs = arg;
342 struct bitmap *tree;
343 dev_t dev;
344 int ret;
345
346 dev = xfs_disk_to_dev(ctx, disk);
347
348 /*
349 * If we don't have parent pointers, save the bad extent for
350 * later rescanning.
351 */
352 if (dev == ctx->fsinfo.fs_datadev)
353 tree = vs->d_bad;
354 else if (dev == ctx->fsinfo.fs_rtdev)
355 tree = vs->r_bad;
356 else
357 tree = NULL;
358 if (tree) {
359 ret = bitmap_set(tree, start, length);
360 if (ret)
361 str_liberror(ctx, ret, _("setting bad block bitmap"));
362 }
363
364 snprintf(descr, DESCR_BUFSZ, _("dev %d:%d ioerr @ %"PRIu64":%"PRIu64" "),
365 major(dev), minor(dev), start, length);
366
367 /* Go figure out which blocks are bad from the fsmap. */
368 memset(keys, 0, sizeof(struct fsmap) * 2);
369 keys->fmr_device = dev;
370 keys->fmr_physical = start;
371 (keys + 1)->fmr_device = dev;
372 (keys + 1)->fmr_physical = start + length - 1;
373 (keys + 1)->fmr_owner = ULLONG_MAX;
374 (keys + 1)->fmr_offset = ULLONG_MAX;
375 (keys + 1)->fmr_flags = UINT_MAX;
376 xfs_iterate_fsmap(ctx, descr, keys, xfs_check_rmap_error_report,
377 &start);
378 }
379
380 /* Schedule a read-verify of a (data block) extent. */
381 static bool
382 xfs_check_rmap(
383 struct scrub_ctx *ctx,
384 const char *descr,
385 struct fsmap *map,
386 void *arg)
387 {
388 struct media_verify_state *vs = arg;
389 struct read_verify_pool *rvp;
390
391 rvp = xfs_dev_to_pool(ctx, vs, map->fmr_device);
392
393 dbg_printf("rmap dev %d:%d phys %"PRIu64" owner %"PRId64
394 " offset %"PRIu64" len %"PRIu64" flags 0x%x\n",
395 major(map->fmr_device), minor(map->fmr_device),
396 (uint64_t)map->fmr_physical, (int64_t)map->fmr_owner,
397 (uint64_t)map->fmr_offset, (uint64_t)map->fmr_length,
398 map->fmr_flags);
399
400 /* "Unknown" extents should be verified; they could be data. */
401 if ((map->fmr_flags & FMR_OF_SPECIAL_OWNER) &&
402 map->fmr_owner == XFS_FMR_OWN_UNKNOWN)
403 map->fmr_flags &= ~FMR_OF_SPECIAL_OWNER;
404
405 /*
406 * We only care about read-verifying data extents that have been
407 * written to disk. This means we can skip "special" owners
408 * (metadata), xattr blocks, unwritten extents, and extent maps.
409 * These should all get checked elsewhere in the scrubber.
410 */
411 if (map->fmr_flags & (FMR_OF_PREALLOC | FMR_OF_ATTR_FORK |
412 FMR_OF_EXTENT_MAP | FMR_OF_SPECIAL_OWNER))
413 goto out;
414
415 /* XXX: Filter out directory data blocks. */
416
417 /* Schedule the read verify command for (eventual) running. */
418 read_verify_schedule_io(rvp, map->fmr_physical, map->fmr_length, vs);
419
420 out:
421 /* Is this the last extent? Fire off the read. */
422 if (map->fmr_flags & FMR_OF_LAST)
423 read_verify_force_io(rvp);
424
425 return true;
426 }
427
428 /* Wait for read/verify actions to finish, then return # bytes checked. */
429 static uint64_t
430 clean_pool(
431 struct read_verify_pool *rvp)
432 {
433 uint64_t ret;
434
435 if (!rvp)
436 return 0;
437
438 read_verify_pool_flush(rvp);
439 ret = read_verify_bytes(rvp);
440 read_verify_pool_destroy(rvp);
441 return ret;
442 }
443
444 /*
445 * Read verify all the file data blocks in a filesystem. Since XFS doesn't
446 * do data checksums, we trust that the underlying storage will pass back
447 * an IO error if it can't retrieve whatever we previously stored there.
448 * If we hit an IO error, we'll record the bad blocks in a bitmap and then
449 * scan the extent maps of the entire fs tree to figure (and the unlinked
450 * inodes) out which files are now broken.
451 */
452 bool
453 xfs_scan_blocks(
454 struct scrub_ctx *ctx)
455 {
456 struct media_verify_state vs = { NULL };
457 bool moveon = false;
458 int ret;
459
460 ret = bitmap_alloc(&vs.d_bad);
461 if (ret) {
462 str_liberror(ctx, ret, _("creating datadev badblock bitmap"));
463 goto out;
464 }
465
466 ret = bitmap_alloc(&vs.r_bad);
467 if (ret) {
468 str_liberror(ctx, ret, _("creating realtime badblock bitmap"));
469 goto out_dbad;
470 }
471
472 vs.rvp_data = read_verify_pool_init(ctx, ctx->datadev,
473 ctx->mnt.fsgeom.blocksize, xfs_check_rmap_ioerr,
474 scrub_nproc(ctx));
475 if (!vs.rvp_data) {
476 str_info(ctx, ctx->mntpoint,
477 _("Could not create data device media verifier."));
478 goto out_rbad;
479 }
480 if (ctx->logdev) {
481 vs.rvp_log = read_verify_pool_init(ctx, ctx->logdev,
482 ctx->mnt.fsgeom.blocksize, xfs_check_rmap_ioerr,
483 scrub_nproc(ctx));
484 if (!vs.rvp_log) {
485 str_info(ctx, ctx->mntpoint,
486 _("Could not create log device media verifier."));
487 goto out_datapool;
488 }
489 }
490 if (ctx->rtdev) {
491 vs.rvp_realtime = read_verify_pool_init(ctx, ctx->rtdev,
492 ctx->mnt.fsgeom.blocksize, xfs_check_rmap_ioerr,
493 scrub_nproc(ctx));
494 if (!vs.rvp_realtime) {
495 str_info(ctx, ctx->mntpoint,
496 _("Could not create realtime device media verifier."));
497 goto out_logpool;
498 }
499 }
500 moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &vs);
501 if (!moveon)
502 goto out_rtpool;
503 ctx->bytes_checked += clean_pool(vs.rvp_data);
504 ctx->bytes_checked += clean_pool(vs.rvp_log);
505 ctx->bytes_checked += clean_pool(vs.rvp_realtime);
506
507 /* Scan the whole dir tree to see what matches the bad extents. */
508 if (!bitmap_empty(vs.d_bad) || !bitmap_empty(vs.r_bad))
509 moveon = xfs_report_verify_errors(ctx, &vs);
510
511 bitmap_free(&vs.r_bad);
512 bitmap_free(&vs.d_bad);
513 return moveon;
514
515 out_rtpool:
516 if (vs.rvp_realtime) {
517 read_verify_pool_abort(vs.rvp_realtime);
518 read_verify_pool_destroy(vs.rvp_realtime);
519 }
520 out_logpool:
521 if (vs.rvp_log) {
522 read_verify_pool_abort(vs.rvp_log);
523 read_verify_pool_destroy(vs.rvp_log);
524 }
525 out_datapool:
526 read_verify_pool_abort(vs.rvp_data);
527 read_verify_pool_destroy(vs.rvp_data);
528 out_rbad:
529 bitmap_free(&vs.r_bad);
530 out_dbad:
531 bitmap_free(&vs.d_bad);
532 out:
533 return moveon;
534 }
535
536 /* Estimate how much work we're going to do. */
537 bool
538 xfs_estimate_verify_work(
539 struct scrub_ctx *ctx,
540 uint64_t *items,
541 unsigned int *nr_threads,
542 int *rshift)
543 {
544 unsigned long long d_blocks;
545 unsigned long long d_bfree;
546 unsigned long long r_blocks;
547 unsigned long long r_bfree;
548 unsigned long long f_files;
549 unsigned long long f_free;
550 bool moveon;
551
552 moveon = xfs_scan_estimate_blocks(ctx, &d_blocks, &d_bfree,
553 &r_blocks, &r_bfree, &f_files, &f_free);
554 if (!moveon)
555 return moveon;
556
557 *items = cvt_off_fsb_to_b(&ctx->mnt,
558 (d_blocks - d_bfree) + (r_blocks - r_bfree));
559 *nr_threads = disk_heads(ctx->datadev);
560 *rshift = 20;
561 return moveon;
562 }