]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - scrub/phase6.c
xfs: zero length symlinks are not valid
[thirdparty/xfsprogs-dev.git] / scrub / phase6.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * Copyright (C) 2018 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6 #include "xfs.h"
7 #include <stdint.h>
8 #include <dirent.h>
9 #include <sys/statvfs.h>
10 #include "handle.h"
11 #include "path.h"
12 #include "ptvar.h"
13 #include "workqueue.h"
14 #include "xfs_scrub.h"
15 #include "common.h"
16 #include "bitmap.h"
17 #include "disk.h"
18 #include "filemap.h"
19 #include "fscounters.h"
20 #include "inodes.h"
21 #include "read_verify.h"
22 #include "spacemap.h"
23 #include "vfs.h"
24
25 /*
26 * Phase 6: Verify data file integrity.
27 *
28 * Identify potential data block extents with GETFSMAP, then feed those
29 * extents to the read-verify pool to get the verify commands batched,
30 * issued, and (if there are problems) reported back to us. If there
31 * are errors, we'll record the bad regions and (if available) use rmap
32 * to tell us if metadata are now corrupt. Otherwise, we'll scan the
33 * whole directory tree looking for files that overlap the bad regions
34 * and report the paths of the now corrupt files.
35 */
36
37 /* Find the fd for a given device identifier. */
38 static struct disk *
39 xfs_dev_to_disk(
40 struct scrub_ctx *ctx,
41 dev_t dev)
42 {
43 if (dev == ctx->fsinfo.fs_datadev)
44 return ctx->datadev;
45 else if (dev == ctx->fsinfo.fs_logdev)
46 return ctx->logdev;
47 else if (dev == ctx->fsinfo.fs_rtdev)
48 return ctx->rtdev;
49 abort();
50 }
51
52 /* Find the device major/minor for a given file descriptor. */
53 static dev_t
54 xfs_disk_to_dev(
55 struct scrub_ctx *ctx,
56 struct disk *disk)
57 {
58 if (disk == ctx->datadev)
59 return ctx->fsinfo.fs_datadev;
60 else if (disk == ctx->logdev)
61 return ctx->fsinfo.fs_logdev;
62 else if (disk == ctx->rtdev)
63 return ctx->fsinfo.fs_rtdev;
64 abort();
65 }
66
67 struct owner_decode {
68 uint64_t owner;
69 const char *descr;
70 };
71
72 static const struct owner_decode special_owners[] = {
73 {XFS_FMR_OWN_FREE, "free space"},
74 {XFS_FMR_OWN_UNKNOWN, "unknown owner"},
75 {XFS_FMR_OWN_FS, "static FS metadata"},
76 {XFS_FMR_OWN_LOG, "journalling log"},
77 {XFS_FMR_OWN_AG, "per-AG metadata"},
78 {XFS_FMR_OWN_INOBT, "inode btree blocks"},
79 {XFS_FMR_OWN_INODES, "inodes"},
80 {XFS_FMR_OWN_REFC, "refcount btree"},
81 {XFS_FMR_OWN_COW, "CoW staging"},
82 {XFS_FMR_OWN_DEFECTIVE, "bad blocks"},
83 {0, NULL},
84 };
85
86 /* Decode a special owner. */
87 static const char *
88 xfs_decode_special_owner(
89 uint64_t owner)
90 {
91 const struct owner_decode *od = special_owners;
92
93 while (od->descr) {
94 if (od->owner == owner)
95 return od->descr;
96 od++;
97 }
98
99 return NULL;
100 }
101
102 /* Routines to translate bad physical extents into file paths and offsets. */
103
104 struct xfs_verify_error_info {
105 struct bitmap *d_bad; /* bytes */
106 struct bitmap *r_bad; /* bytes */
107 };
108
109 /* Report if this extent overlaps a bad region. */
110 static bool
111 xfs_report_verify_inode_bmap(
112 struct scrub_ctx *ctx,
113 const char *descr,
114 int fd,
115 int whichfork,
116 struct fsxattr *fsx,
117 struct xfs_bmap *bmap,
118 void *arg)
119 {
120 struct xfs_verify_error_info *vei = arg;
121 struct bitmap *bmp;
122
123 /* Only report errors for real extents. */
124 if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC))
125 return true;
126
127 if (fsx->fsx_xflags & FS_XFLAG_REALTIME)
128 bmp = vei->r_bad;
129 else
130 bmp = vei->d_bad;
131
132 if (!bitmap_test(bmp, bmap->bm_physical, bmap->bm_length))
133 return true;
134
135 str_error(ctx, descr,
136 _("offset %llu failed read verification."), bmap->bm_offset);
137 return true;
138 }
139
140 /* Iterate the extent mappings of a file to report errors. */
141 static bool
142 xfs_report_verify_fd(
143 struct scrub_ctx *ctx,
144 const char *descr,
145 int fd,
146 void *arg)
147 {
148 struct xfs_bmap key = {0};
149 bool moveon;
150
151 /* data fork */
152 moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_DATA_FORK, &key,
153 xfs_report_verify_inode_bmap, arg);
154 if (!moveon)
155 return false;
156
157 /* attr fork */
158 moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_ATTR_FORK, &key,
159 xfs_report_verify_inode_bmap, arg);
160 if (!moveon)
161 return false;
162 return true;
163 }
164
165 /* Report read verify errors in unlinked (but still open) files. */
166 static int
167 xfs_report_verify_inode(
168 struct scrub_ctx *ctx,
169 struct xfs_handle *handle,
170 struct xfs_bstat *bstat,
171 void *arg)
172 {
173 char descr[DESCR_BUFSZ];
174 bool moveon;
175 int fd;
176 int error;
177
178 snprintf(descr, DESCR_BUFSZ, _("inode %"PRIu64" (unlinked)"),
179 (uint64_t)bstat->bs_ino);
180
181 /* Ignore linked files and things we can't open. */
182 if (bstat->bs_nlink != 0)
183 return 0;
184 if (!S_ISREG(bstat->bs_mode) && !S_ISDIR(bstat->bs_mode))
185 return 0;
186
187 /* Try to open the inode. */
188 fd = xfs_open_handle(handle);
189 if (fd < 0) {
190 error = errno;
191 if (error == ESTALE)
192 return error;
193
194 str_info(ctx, descr,
195 _("Disappeared during read error reporting."));
196 return error;
197 }
198
199 /* Go find the badness. */
200 moveon = xfs_report_verify_fd(ctx, descr, fd, arg);
201 error = close(fd);
202 if (error)
203 str_errno(ctx, descr);
204
205 return moveon ? 0 : XFS_ITERATE_INODES_ABORT;
206 }
207
208 /* Scan a directory for matches in the read verify error list. */
209 static bool
210 xfs_report_verify_dir(
211 struct scrub_ctx *ctx,
212 const char *path,
213 int dir_fd,
214 void *arg)
215 {
216 return xfs_report_verify_fd(ctx, path, dir_fd, arg);
217 }
218
219 /*
220 * Scan the inode associated with a directory entry for matches with
221 * the read verify error list.
222 */
223 static bool
224 xfs_report_verify_dirent(
225 struct scrub_ctx *ctx,
226 const char *path,
227 int dir_fd,
228 struct dirent *dirent,
229 struct stat *sb,
230 void *arg)
231 {
232 bool moveon;
233 int fd;
234 int error;
235
236 /* Ignore things we can't open. */
237 if (!S_ISREG(sb->st_mode) && !S_ISDIR(sb->st_mode))
238 return true;
239
240 /* Ignore . and .. */
241 if (!strcmp(".", dirent->d_name) || !strcmp("..", dirent->d_name))
242 return true;
243
244 /*
245 * If we were given a dirent, open the associated file under
246 * dir_fd for badblocks scanning. If dirent is NULL, then it's
247 * the directory itself we want to scan.
248 */
249 fd = openat(dir_fd, dirent->d_name,
250 O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY);
251 if (fd < 0)
252 return true;
253
254 /* Go find the badness. */
255 moveon = xfs_report_verify_fd(ctx, path, fd, arg);
256 if (moveon)
257 goto out;
258
259 out:
260 error = close(fd);
261 if (error)
262 str_errno(ctx, path);
263 return moveon;
264 }
265
266 /* Given bad extent lists for the data & rtdev, find bad files. */
267 static bool
268 xfs_report_verify_errors(
269 struct scrub_ctx *ctx,
270 struct bitmap *d_bad,
271 struct bitmap *r_bad)
272 {
273 struct xfs_verify_error_info vei;
274 bool moveon;
275
276 vei.d_bad = d_bad;
277 vei.r_bad = r_bad;
278
279 /* Scan the directory tree to get file paths. */
280 moveon = scan_fs_tree(ctx, xfs_report_verify_dir,
281 xfs_report_verify_dirent, &vei);
282 if (!moveon)
283 return false;
284
285 /* Scan for unlinked files. */
286 return xfs_scan_all_inodes(ctx, xfs_report_verify_inode, &vei);
287 }
288
289 /* Verify disk blocks with GETFSMAP */
290
291 struct xfs_verify_extent {
292 struct read_verify_pool *readverify;
293 struct ptvar *rvstate;
294 struct bitmap *d_bad; /* bytes */
295 struct bitmap *r_bad; /* bytes */
296 };
297
298 /* Report an IO error resulting from read-verify based off getfsmap. */
299 static bool
300 xfs_check_rmap_error_report(
301 struct scrub_ctx *ctx,
302 const char *descr,
303 struct fsmap *map,
304 void *arg)
305 {
306 const char *type;
307 char buf[32];
308 uint64_t err_physical = *(uint64_t *)arg;
309 uint64_t err_off;
310
311 if (err_physical > map->fmr_physical)
312 err_off = err_physical - map->fmr_physical;
313 else
314 err_off = 0;
315
316 snprintf(buf, 32, _("disk offset %"PRIu64),
317 (uint64_t)BTOBB(map->fmr_physical + err_off));
318
319 if (map->fmr_flags & FMR_OF_SPECIAL_OWNER) {
320 type = xfs_decode_special_owner(map->fmr_owner);
321 str_error(ctx, buf,
322 _("%s failed read verification."),
323 type);
324 }
325
326 /*
327 * XXX: If we had a getparent() call we could report IO errors
328 * efficiently. Until then, we'll have to scan the dir tree
329 * to find the bad file's pathname.
330 */
331
332 return true;
333 }
334
335 /*
336 * Remember a read error for later, and see if rmap will tell us about the
337 * owner ahead of time.
338 */
339 static void
340 xfs_check_rmap_ioerr(
341 struct scrub_ctx *ctx,
342 struct disk *disk,
343 uint64_t start,
344 uint64_t length,
345 int error,
346 void *arg)
347 {
348 struct fsmap keys[2];
349 char descr[DESCR_BUFSZ];
350 struct xfs_verify_extent *ve = arg;
351 struct bitmap *tree;
352 dev_t dev;
353 bool moveon;
354
355 dev = xfs_disk_to_dev(ctx, disk);
356
357 /*
358 * If we don't have parent pointers, save the bad extent for
359 * later rescanning.
360 */
361 if (dev == ctx->fsinfo.fs_datadev)
362 tree = ve->d_bad;
363 else if (dev == ctx->fsinfo.fs_rtdev)
364 tree = ve->r_bad;
365 else
366 tree = NULL;
367 if (tree) {
368 moveon = bitmap_set(tree, start, length);
369 if (!moveon)
370 str_errno(ctx, ctx->mntpoint);
371 }
372
373 snprintf(descr, DESCR_BUFSZ, _("dev %d:%d ioerr @ %"PRIu64":%"PRIu64" "),
374 major(dev), minor(dev), start, length);
375
376 /* Go figure out which blocks are bad from the fsmap. */
377 memset(keys, 0, sizeof(struct fsmap) * 2);
378 keys->fmr_device = dev;
379 keys->fmr_physical = start;
380 (keys + 1)->fmr_device = dev;
381 (keys + 1)->fmr_physical = start + length - 1;
382 (keys + 1)->fmr_owner = ULLONG_MAX;
383 (keys + 1)->fmr_offset = ULLONG_MAX;
384 (keys + 1)->fmr_flags = UINT_MAX;
385 xfs_iterate_fsmap(ctx, descr, keys, xfs_check_rmap_error_report,
386 &start);
387 }
388
389 /* Schedule a read-verify of a (data block) extent. */
390 static bool
391 xfs_check_rmap(
392 struct scrub_ctx *ctx,
393 const char *descr,
394 struct fsmap *map,
395 void *arg)
396 {
397 struct xfs_verify_extent *ve = arg;
398 struct disk *disk;
399
400 dbg_printf("rmap dev %d:%d phys %"PRIu64" owner %"PRId64
401 " offset %"PRIu64" len %"PRIu64" flags 0x%x\n",
402 major(map->fmr_device), minor(map->fmr_device),
403 (uint64_t)map->fmr_physical, (int64_t)map->fmr_owner,
404 (uint64_t)map->fmr_offset, (uint64_t)map->fmr_length,
405 map->fmr_flags);
406
407 /* "Unknown" extents should be verified; they could be data. */
408 if ((map->fmr_flags & FMR_OF_SPECIAL_OWNER) &&
409 map->fmr_owner == XFS_FMR_OWN_UNKNOWN)
410 map->fmr_flags &= ~FMR_OF_SPECIAL_OWNER;
411
412 /*
413 * We only care about read-verifying data extents that have been
414 * written to disk. This means we can skip "special" owners
415 * (metadata), xattr blocks, unwritten extents, and extent maps.
416 * These should all get checked elsewhere in the scrubber.
417 */
418 if (map->fmr_flags & (FMR_OF_PREALLOC | FMR_OF_ATTR_FORK |
419 FMR_OF_EXTENT_MAP | FMR_OF_SPECIAL_OWNER))
420 goto out;
421
422 /* XXX: Filter out directory data blocks. */
423
424 /* Schedule the read verify command for (eventual) running. */
425 disk = xfs_dev_to_disk(ctx, map->fmr_device);
426
427 read_verify_schedule_io(ve->readverify, ptvar_get(ve->rvstate), disk,
428 map->fmr_physical, map->fmr_length, ve);
429
430 out:
431 /* Is this the last extent? Fire off the read. */
432 if (map->fmr_flags & FMR_OF_LAST)
433 read_verify_force_io(ve->readverify, ptvar_get(ve->rvstate));
434
435 return true;
436 }
437
438 /*
439 * Read verify all the file data blocks in a filesystem. Since XFS doesn't
440 * do data checksums, we trust that the underlying storage will pass back
441 * an IO error if it can't retrieve whatever we previously stored there.
442 * If we hit an IO error, we'll record the bad blocks in a bitmap and then
443 * scan the extent maps of the entire fs tree to figure (and the unlinked
444 * inodes) out which files are now broken.
445 */
446 bool
447 xfs_scan_blocks(
448 struct scrub_ctx *ctx)
449 {
450 struct xfs_verify_extent ve;
451 bool moveon;
452
453 ve.rvstate = ptvar_init(scrub_nproc(ctx), sizeof(struct read_verify));
454 if (!ve.rvstate) {
455 str_errno(ctx, ctx->mntpoint);
456 return false;
457 }
458
459 moveon = bitmap_init(&ve.d_bad);
460 if (!moveon) {
461 str_errno(ctx, ctx->mntpoint);
462 goto out_ve;
463 }
464
465 moveon = bitmap_init(&ve.r_bad);
466 if (!moveon) {
467 str_errno(ctx, ctx->mntpoint);
468 goto out_dbad;
469 }
470
471 ve.readverify = read_verify_pool_init(ctx, ctx->geo.blocksize,
472 xfs_check_rmap_ioerr, disk_heads(ctx->datadev));
473 if (!ve.readverify) {
474 moveon = false;
475 str_info(ctx, ctx->mntpoint,
476 _("Could not create media verifier."));
477 goto out_rbad;
478 }
479 moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &ve);
480 if (!moveon)
481 goto out_pool;
482 read_verify_pool_flush(ve.readverify);
483 ctx->bytes_checked += read_verify_bytes(ve.readverify);
484 read_verify_pool_destroy(ve.readverify);
485
486 /* Scan the whole dir tree to see what matches the bad extents. */
487 if (!bitmap_empty(ve.d_bad) || !bitmap_empty(ve.r_bad))
488 moveon = xfs_report_verify_errors(ctx, ve.d_bad, ve.r_bad);
489
490 bitmap_free(&ve.r_bad);
491 bitmap_free(&ve.d_bad);
492 ptvar_free(ve.rvstate);
493 return moveon;
494
495 out_pool:
496 read_verify_pool_destroy(ve.readverify);
497 out_rbad:
498 bitmap_free(&ve.r_bad);
499 out_dbad:
500 bitmap_free(&ve.d_bad);
501 out_ve:
502 ptvar_free(ve.rvstate);
503 return moveon;
504 }
505
506 /* Estimate how much work we're going to do. */
507 bool
508 xfs_estimate_verify_work(
509 struct scrub_ctx *ctx,
510 uint64_t *items,
511 unsigned int *nr_threads,
512 int *rshift)
513 {
514 unsigned long long d_blocks;
515 unsigned long long d_bfree;
516 unsigned long long r_blocks;
517 unsigned long long r_bfree;
518 unsigned long long f_files;
519 unsigned long long f_free;
520 bool moveon;
521
522 moveon = xfs_scan_estimate_blocks(ctx, &d_blocks, &d_bfree,
523 &r_blocks, &r_bfree, &f_files, &f_free);
524 if (!moveon)
525 return moveon;
526
527 *items = ((d_blocks - d_bfree) + (r_blocks - r_bfree)) << ctx->blocklog;
528 *nr_threads = disk_heads(ctx->datadev);
529 *rshift = 20;
530 return moveon;
531 }