]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - scrub/phase6.c
fe1217696c3656fa26a4f12f8be8bfb55a3f43f2
[thirdparty/xfsprogs-dev.git] / scrub / phase6.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * Copyright (C) 2018 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6 #include "xfs.h"
7 #include <stdint.h>
8 #include <dirent.h>
9 #include <sys/statvfs.h>
10 #include "handle.h"
11 #include "path.h"
12 #include "workqueue.h"
13 #include "xfs_scrub.h"
14 #include "common.h"
15 #include "bitmap.h"
16 #include "disk.h"
17 #include "filemap.h"
18 #include "fscounters.h"
19 #include "inodes.h"
20 #include "read_verify.h"
21 #include "spacemap.h"
22 #include "vfs.h"
23
24 /*
25 * Phase 6: Verify data file integrity.
26 *
27 * Identify potential data block extents with GETFSMAP, then feed those
28 * extents to the read-verify pool to get the verify commands batched,
29 * issued, and (if there are problems) reported back to us. If there
30 * are errors, we'll record the bad regions and (if available) use rmap
31 * to tell us if metadata are now corrupt. Otherwise, we'll scan the
32 * whole directory tree looking for files that overlap the bad regions
33 * and report the paths of the now corrupt files.
34 */
35
36 /* Find the fd for a given device identifier. */
37 static struct disk *
38 xfs_dev_to_disk(
39 struct scrub_ctx *ctx,
40 dev_t dev)
41 {
42 if (dev == ctx->fsinfo.fs_datadev)
43 return ctx->datadev;
44 else if (dev == ctx->fsinfo.fs_logdev)
45 return ctx->logdev;
46 else if (dev == ctx->fsinfo.fs_rtdev)
47 return ctx->rtdev;
48 abort();
49 }
50
51 /* Find the device major/minor for a given file descriptor. */
52 static dev_t
53 xfs_disk_to_dev(
54 struct scrub_ctx *ctx,
55 struct disk *disk)
56 {
57 if (disk == ctx->datadev)
58 return ctx->fsinfo.fs_datadev;
59 else if (disk == ctx->logdev)
60 return ctx->fsinfo.fs_logdev;
61 else if (disk == ctx->rtdev)
62 return ctx->fsinfo.fs_rtdev;
63 abort();
64 }
65
66 struct owner_decode {
67 uint64_t owner;
68 const char *descr;
69 };
70
71 static const struct owner_decode special_owners[] = {
72 {XFS_FMR_OWN_FREE, "free space"},
73 {XFS_FMR_OWN_UNKNOWN, "unknown owner"},
74 {XFS_FMR_OWN_FS, "static FS metadata"},
75 {XFS_FMR_OWN_LOG, "journalling log"},
76 {XFS_FMR_OWN_AG, "per-AG metadata"},
77 {XFS_FMR_OWN_INOBT, "inode btree blocks"},
78 {XFS_FMR_OWN_INODES, "inodes"},
79 {XFS_FMR_OWN_REFC, "refcount btree"},
80 {XFS_FMR_OWN_COW, "CoW staging"},
81 {XFS_FMR_OWN_DEFECTIVE, "bad blocks"},
82 {0, NULL},
83 };
84
85 /* Decode a special owner. */
86 static const char *
87 xfs_decode_special_owner(
88 uint64_t owner)
89 {
90 const struct owner_decode *od = special_owners;
91
92 while (od->descr) {
93 if (od->owner == owner)
94 return od->descr;
95 od++;
96 }
97
98 return NULL;
99 }
100
101 /* Routines to translate bad physical extents into file paths and offsets. */
102
103 struct xfs_verify_error_info {
104 struct bitmap *d_bad; /* bytes */
105 struct bitmap *r_bad; /* bytes */
106 };
107
108 /* Report if this extent overlaps a bad region. */
109 static bool
110 xfs_report_verify_inode_bmap(
111 struct scrub_ctx *ctx,
112 const char *descr,
113 int fd,
114 int whichfork,
115 struct fsxattr *fsx,
116 struct xfs_bmap *bmap,
117 void *arg)
118 {
119 struct xfs_verify_error_info *vei = arg;
120 struct bitmap *bmp;
121
122 /* Only report errors for real extents. */
123 if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC))
124 return true;
125
126 if (fsx->fsx_xflags & FS_XFLAG_REALTIME)
127 bmp = vei->r_bad;
128 else
129 bmp = vei->d_bad;
130
131 if (!bitmap_test(bmp, bmap->bm_physical, bmap->bm_length))
132 return true;
133
134 str_error(ctx, descr,
135 _("offset %llu failed read verification."), bmap->bm_offset);
136 return true;
137 }
138
139 /* Iterate the extent mappings of a file to report errors. */
140 static bool
141 xfs_report_verify_fd(
142 struct scrub_ctx *ctx,
143 const char *descr,
144 int fd,
145 void *arg)
146 {
147 struct xfs_bmap key = {0};
148 bool moveon;
149
150 /* data fork */
151 moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_DATA_FORK, &key,
152 xfs_report_verify_inode_bmap, arg);
153 if (!moveon)
154 return false;
155
156 /* attr fork */
157 moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_ATTR_FORK, &key,
158 xfs_report_verify_inode_bmap, arg);
159 if (!moveon)
160 return false;
161 return true;
162 }
163
164 /* Report read verify errors in unlinked (but still open) files. */
165 static int
166 xfs_report_verify_inode(
167 struct scrub_ctx *ctx,
168 struct xfs_handle *handle,
169 struct xfs_bstat *bstat,
170 void *arg)
171 {
172 char descr[DESCR_BUFSZ];
173 bool moveon;
174 int fd;
175 int error;
176
177 snprintf(descr, DESCR_BUFSZ, _("inode %"PRIu64" (unlinked)"),
178 (uint64_t)bstat->bs_ino);
179
180 /* Ignore linked files and things we can't open. */
181 if (bstat->bs_nlink != 0)
182 return 0;
183 if (!S_ISREG(bstat->bs_mode) && !S_ISDIR(bstat->bs_mode))
184 return 0;
185
186 /* Try to open the inode. */
187 fd = xfs_open_handle(handle);
188 if (fd < 0) {
189 error = errno;
190 if (error == ESTALE)
191 return error;
192
193 str_info(ctx, descr,
194 _("Disappeared during read error reporting."));
195 return error;
196 }
197
198 /* Go find the badness. */
199 moveon = xfs_report_verify_fd(ctx, descr, fd, arg);
200 error = close(fd);
201 if (error)
202 str_errno(ctx, descr);
203
204 return moveon ? 0 : XFS_ITERATE_INODES_ABORT;
205 }
206
207 /* Scan a directory for matches in the read verify error list. */
208 static bool
209 xfs_report_verify_dir(
210 struct scrub_ctx *ctx,
211 const char *path,
212 int dir_fd,
213 void *arg)
214 {
215 return xfs_report_verify_fd(ctx, path, dir_fd, arg);
216 }
217
218 /*
219 * Scan the inode associated with a directory entry for matches with
220 * the read verify error list.
221 */
222 static bool
223 xfs_report_verify_dirent(
224 struct scrub_ctx *ctx,
225 const char *path,
226 int dir_fd,
227 struct dirent *dirent,
228 struct stat *sb,
229 void *arg)
230 {
231 bool moveon;
232 int fd;
233 int error;
234
235 /* Ignore things we can't open. */
236 if (!S_ISREG(sb->st_mode) && !S_ISDIR(sb->st_mode))
237 return true;
238
239 /* Ignore . and .. */
240 if (!strcmp(".", dirent->d_name) || !strcmp("..", dirent->d_name))
241 return true;
242
243 /*
244 * If we were given a dirent, open the associated file under
245 * dir_fd for badblocks scanning. If dirent is NULL, then it's
246 * the directory itself we want to scan.
247 */
248 fd = openat(dir_fd, dirent->d_name,
249 O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY);
250 if (fd < 0)
251 return true;
252
253 /* Go find the badness. */
254 moveon = xfs_report_verify_fd(ctx, path, fd, arg);
255 if (moveon)
256 goto out;
257
258 out:
259 error = close(fd);
260 if (error)
261 str_errno(ctx, path);
262 return moveon;
263 }
264
265 /* Given bad extent lists for the data & rtdev, find bad files. */
266 static bool
267 xfs_report_verify_errors(
268 struct scrub_ctx *ctx,
269 struct bitmap *d_bad,
270 struct bitmap *r_bad)
271 {
272 struct xfs_verify_error_info vei;
273 bool moveon;
274
275 vei.d_bad = d_bad;
276 vei.r_bad = r_bad;
277
278 /* Scan the directory tree to get file paths. */
279 moveon = scan_fs_tree(ctx, xfs_report_verify_dir,
280 xfs_report_verify_dirent, &vei);
281 if (!moveon)
282 return false;
283
284 /* Scan for unlinked files. */
285 return xfs_scan_all_inodes(ctx, xfs_report_verify_inode, &vei);
286 }
287
288 /* Verify disk blocks with GETFSMAP */
289
290 struct xfs_verify_extent {
291 struct read_verify_pool *readverify;
292 struct bitmap *d_bad; /* bytes */
293 struct bitmap *r_bad; /* bytes */
294 };
295
296 /* Report an IO error resulting from read-verify based off getfsmap. */
297 static bool
298 xfs_check_rmap_error_report(
299 struct scrub_ctx *ctx,
300 const char *descr,
301 struct fsmap *map,
302 void *arg)
303 {
304 const char *type;
305 char buf[32];
306 uint64_t err_physical = *(uint64_t *)arg;
307 uint64_t err_off;
308
309 if (err_physical > map->fmr_physical)
310 err_off = err_physical - map->fmr_physical;
311 else
312 err_off = 0;
313
314 snprintf(buf, 32, _("disk offset %"PRIu64),
315 (uint64_t)BTOBB(map->fmr_physical + err_off));
316
317 if (map->fmr_flags & FMR_OF_SPECIAL_OWNER) {
318 type = xfs_decode_special_owner(map->fmr_owner);
319 str_error(ctx, buf,
320 _("%s failed read verification."),
321 type);
322 }
323
324 /*
325 * XXX: If we had a getparent() call we could report IO errors
326 * efficiently. Until then, we'll have to scan the dir tree
327 * to find the bad file's pathname.
328 */
329
330 return true;
331 }
332
333 /*
334 * Remember a read error for later, and see if rmap will tell us about the
335 * owner ahead of time.
336 */
337 static void
338 xfs_check_rmap_ioerr(
339 struct scrub_ctx *ctx,
340 struct disk *disk,
341 uint64_t start,
342 uint64_t length,
343 int error,
344 void *arg)
345 {
346 struct fsmap keys[2];
347 char descr[DESCR_BUFSZ];
348 struct xfs_verify_extent *ve = arg;
349 struct bitmap *tree;
350 dev_t dev;
351 bool moveon;
352
353 dev = xfs_disk_to_dev(ctx, disk);
354
355 /*
356 * If we don't have parent pointers, save the bad extent for
357 * later rescanning.
358 */
359 if (dev == ctx->fsinfo.fs_datadev)
360 tree = ve->d_bad;
361 else if (dev == ctx->fsinfo.fs_rtdev)
362 tree = ve->r_bad;
363 else
364 tree = NULL;
365 if (tree) {
366 moveon = bitmap_set(tree, start, length);
367 if (!moveon)
368 str_errno(ctx, ctx->mntpoint);
369 }
370
371 snprintf(descr, DESCR_BUFSZ, _("dev %d:%d ioerr @ %"PRIu64":%"PRIu64" "),
372 major(dev), minor(dev), start, length);
373
374 /* Go figure out which blocks are bad from the fsmap. */
375 memset(keys, 0, sizeof(struct fsmap) * 2);
376 keys->fmr_device = dev;
377 keys->fmr_physical = start;
378 (keys + 1)->fmr_device = dev;
379 (keys + 1)->fmr_physical = start + length - 1;
380 (keys + 1)->fmr_owner = ULLONG_MAX;
381 (keys + 1)->fmr_offset = ULLONG_MAX;
382 (keys + 1)->fmr_flags = UINT_MAX;
383 xfs_iterate_fsmap(ctx, descr, keys, xfs_check_rmap_error_report,
384 &start);
385 }
386
387 /* Schedule a read-verify of a (data block) extent. */
388 static bool
389 xfs_check_rmap(
390 struct scrub_ctx *ctx,
391 const char *descr,
392 struct fsmap *map,
393 void *arg)
394 {
395 struct xfs_verify_extent *ve = arg;
396 struct disk *disk;
397
398 dbg_printf("rmap dev %d:%d phys %"PRIu64" owner %"PRId64
399 " offset %"PRIu64" len %"PRIu64" flags 0x%x\n",
400 major(map->fmr_device), minor(map->fmr_device),
401 (uint64_t)map->fmr_physical, (int64_t)map->fmr_owner,
402 (uint64_t)map->fmr_offset, (uint64_t)map->fmr_length,
403 map->fmr_flags);
404
405 /* "Unknown" extents should be verified; they could be data. */
406 if ((map->fmr_flags & FMR_OF_SPECIAL_OWNER) &&
407 map->fmr_owner == XFS_FMR_OWN_UNKNOWN)
408 map->fmr_flags &= ~FMR_OF_SPECIAL_OWNER;
409
410 /*
411 * We only care about read-verifying data extents that have been
412 * written to disk. This means we can skip "special" owners
413 * (metadata), xattr blocks, unwritten extents, and extent maps.
414 * These should all get checked elsewhere in the scrubber.
415 */
416 if (map->fmr_flags & (FMR_OF_PREALLOC | FMR_OF_ATTR_FORK |
417 FMR_OF_EXTENT_MAP | FMR_OF_SPECIAL_OWNER))
418 goto out;
419
420 /* XXX: Filter out directory data blocks. */
421
422 /* Schedule the read verify command for (eventual) running. */
423 disk = xfs_dev_to_disk(ctx, map->fmr_device);
424
425 read_verify_schedule_io(ve->readverify, disk, map->fmr_physical,
426 map->fmr_length, ve);
427
428 out:
429 /* Is this the last extent? Fire off the read. */
430 if (map->fmr_flags & FMR_OF_LAST)
431 read_verify_force_io(ve->readverify);
432
433 return true;
434 }
435
436 /*
437 * Read verify all the file data blocks in a filesystem. Since XFS doesn't
438 * do data checksums, we trust that the underlying storage will pass back
439 * an IO error if it can't retrieve whatever we previously stored there.
440 * If we hit an IO error, we'll record the bad blocks in a bitmap and then
441 * scan the extent maps of the entire fs tree to figure (and the unlinked
442 * inodes) out which files are now broken.
443 */
444 bool
445 xfs_scan_blocks(
446 struct scrub_ctx *ctx)
447 {
448 struct xfs_verify_extent ve;
449 bool moveon;
450
451 moveon = bitmap_init(&ve.d_bad);
452 if (!moveon) {
453 str_errno(ctx, ctx->mntpoint);
454 goto out;
455 }
456
457 moveon = bitmap_init(&ve.r_bad);
458 if (!moveon) {
459 str_errno(ctx, ctx->mntpoint);
460 goto out_dbad;
461 }
462
463 ve.readverify = read_verify_pool_init(ctx, ctx->geo.blocksize,
464 xfs_check_rmap_ioerr, disk_heads(ctx->datadev),
465 scrub_nproc(ctx));
466 if (!ve.readverify) {
467 moveon = false;
468 str_info(ctx, ctx->mntpoint,
469 _("Could not create media verifier."));
470 goto out_rbad;
471 }
472 moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &ve);
473 if (!moveon)
474 goto out_pool;
475 read_verify_pool_flush(ve.readverify);
476 ctx->bytes_checked += read_verify_bytes(ve.readverify);
477 read_verify_pool_destroy(ve.readverify);
478
479 /* Scan the whole dir tree to see what matches the bad extents. */
480 if (!bitmap_empty(ve.d_bad) || !bitmap_empty(ve.r_bad))
481 moveon = xfs_report_verify_errors(ctx, ve.d_bad, ve.r_bad);
482
483 bitmap_free(&ve.r_bad);
484 bitmap_free(&ve.d_bad);
485 return moveon;
486
487 out_pool:
488 read_verify_pool_destroy(ve.readverify);
489 out_rbad:
490 bitmap_free(&ve.r_bad);
491 out_dbad:
492 bitmap_free(&ve.d_bad);
493 out:
494 return moveon;
495 }
496
497 /* Estimate how much work we're going to do. */
498 bool
499 xfs_estimate_verify_work(
500 struct scrub_ctx *ctx,
501 uint64_t *items,
502 unsigned int *nr_threads,
503 int *rshift)
504 {
505 unsigned long long d_blocks;
506 unsigned long long d_bfree;
507 unsigned long long r_blocks;
508 unsigned long long r_bfree;
509 unsigned long long f_files;
510 unsigned long long f_free;
511 bool moveon;
512
513 moveon = xfs_scan_estimate_blocks(ctx, &d_blocks, &d_bfree,
514 &r_blocks, &r_bfree, &f_files, &f_free);
515 if (!moveon)
516 return moveon;
517
518 *items = ((d_blocks - d_bfree) + (r_blocks - r_bfree)) << ctx->blocklog;
519 *nr_threads = disk_heads(ctx->datadev);
520 *rshift = 20;
521 return moveon;
522 }