]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - repair/phase6.c
repair: convert the dir byaddr hash to a radix tree
[thirdparty/xfsprogs-dev.git] / repair / phase6.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6
7 #include "libxfs.h"
8 #include "threads.h"
9 #include "threads.h"
10 #include "prefetch.h"
11 #include "avl.h"
12 #include "globals.h"
13 #include "agheader.h"
14 #include "incore.h"
15 #include "dir2.h"
16 #include "protos.h"
17 #include "err_protos.h"
18 #include "dinode.h"
19 #include "progress.h"
20 #include "versions.h"
21
22 static struct cred zerocr;
23 static struct fsxattr zerofsx;
24 static xfs_ino_t orphanage_ino;
25
26 static struct xfs_name xfs_name_dot = {(unsigned char *)".",
27 1,
28 XFS_DIR3_FT_DIR};
29
30 /*
31 * Data structures used to keep track of directories where the ".."
32 * entries are updated. These must be rebuilt after the initial pass
33 */
34 typedef struct dotdot_update {
35 struct list_head list;
36 ino_tree_node_t *irec;
37 xfs_agnumber_t agno;
38 int ino_offset;
39 } dotdot_update_t;
40
41 static LIST_HEAD(dotdot_update_list);
42 static int dotdot_update;
43
44 static void
45 add_dotdot_update(
46 xfs_agnumber_t agno,
47 ino_tree_node_t *irec,
48 int ino_offset)
49 {
50 dotdot_update_t *dir = malloc(sizeof(dotdot_update_t));
51
52 if (!dir)
53 do_error(_("malloc failed add_dotdot_update (%zu bytes)\n"),
54 sizeof(dotdot_update_t));
55
56 INIT_LIST_HEAD(&dir->list);
57 dir->irec = irec;
58 dir->agno = agno;
59 dir->ino_offset = ino_offset;
60
61 list_add(&dir->list, &dotdot_update_list);
62 }
63
64 /*
65 * Data structures and routines to keep track of directory entries
66 * and whether their leaf entry has been seen. Also used for name
67 * duplicate checking and rebuilding step if required.
68 */
69 struct dir_hash_ent {
70 struct dir_hash_ent *nextbyhash; /* next in name bucket */
71 struct dir_hash_ent *nextbyorder; /* next in order added */
72 xfs_dahash_t hashval; /* hash value of name */
73 uint32_t address; /* offset of data entry */
74 xfs_ino_t inum; /* inode num of entry */
75 short junkit; /* name starts with / */
76 short seen; /* have seen leaf entry */
77 struct xfs_name name;
78 unsigned char namebuf[];
79 };
80
81 struct dir_hash_tab {
82 int size; /* size of hash tables */
83 struct dir_hash_ent *first; /* ptr to first added entry */
84 struct dir_hash_ent *last; /* ptr to last added entry */
85 struct dir_hash_ent **byhash; /* ptr to name hash buckets */
86 #define HT_UNSEEN 1
87 struct radix_tree_root byaddr;
88 };
89
90 #define DIR_HASH_TAB_SIZE(n) \
91 (sizeof(struct dir_hash_tab) + (sizeof(struct dir_hash_ent *) * (n)))
92 #define DIR_HASH_FUNC(t,a) ((a) % (t)->size)
93
94 /*
95 * Track the contents of the freespace table in a directory.
96 */
97 typedef struct freetab {
98 int naents; /* expected number of data blocks */
99 int nents; /* number of data blocks processed */
100 struct freetab_ent {
101 xfs_dir2_data_off_t v;
102 short s;
103 } ents[1];
104 } freetab_t;
105 #define FREETAB_SIZE(n) \
106 (offsetof(freetab_t, ents) + (sizeof(struct freetab_ent) * (n)))
107
108 #define DIR_HASH_CK_OK 0
109 #define DIR_HASH_CK_DUPLEAF 1
110 #define DIR_HASH_CK_BADHASH 2
111 #define DIR_HASH_CK_NODATA 3
112 #define DIR_HASH_CK_NOLEAF 4
113 #define DIR_HASH_CK_BADSTALE 5
114 #define DIR_HASH_CK_TOTAL 6
115
116 /*
117 * Need to handle CRC and validation errors specially here. If there is a
118 * validator error, re-read without the verifier so that we get a buffer we can
119 * check and repair. Re-attach the ops to the buffer after the read so that when
120 * it is rewritten the CRC is recalculated.
121 *
122 * If the buffer was not read, we return an error. If the buffer was read but
123 * had a CRC or corruption error, we reread it without the verifier and if it is
124 * read successfully we increment *crc_error and return 0. Otherwise we
125 * return the read error.
126 */
127 static int
128 dir_read_buf(
129 struct xfs_inode *ip,
130 xfs_dablk_t bno,
131 struct xfs_buf **bpp,
132 const struct xfs_buf_ops *ops,
133 int *crc_error)
134 {
135 int error;
136 int error2;
137
138 error = -libxfs_da_read_buf(NULL, ip, bno, 0, bpp, XFS_DATA_FORK, ops);
139
140 if (error != EFSBADCRC && error != EFSCORRUPTED)
141 return error;
142
143 error2 = -libxfs_da_read_buf(NULL, ip, bno, 0, bpp, XFS_DATA_FORK,
144 NULL);
145 if (error2)
146 return error2;
147
148 (*crc_error)++;
149 (*bpp)->b_ops = ops;
150 return 0;
151 }
152
153 /*
154 * Returns 0 if the name already exists (ie. a duplicate)
155 */
156 static int
157 dir_hash_add(
158 struct xfs_mount *mp,
159 struct dir_hash_tab *hashtab,
160 uint32_t addr,
161 xfs_ino_t inum,
162 int namelen,
163 unsigned char *name,
164 uint8_t ftype)
165 {
166 xfs_dahash_t hash = 0;
167 int byhash = 0;
168 struct dir_hash_ent *p;
169 int dup;
170 short junk;
171 struct xfs_name xname;
172 int error;
173
174 xname.name = name;
175 xname.len = namelen;
176 xname.type = ftype;
177
178 junk = name[0] == '/';
179 dup = 0;
180
181 if (!junk) {
182 hash = libxfs_dir2_hashname(mp, &xname);
183 byhash = DIR_HASH_FUNC(hashtab, hash);
184
185 /*
186 * search hash bucket for existing name.
187 */
188 for (p = hashtab->byhash[byhash]; p; p = p->nextbyhash) {
189 if (p->hashval == hash && p->name.len == namelen) {
190 if (memcmp(p->name.name, name, namelen) == 0) {
191 dup = 1;
192 junk = 1;
193 break;
194 }
195 }
196 }
197 }
198
199 /*
200 * Allocate enough space for the hash entry and the name in a single
201 * allocation so we can store our own copy of the name for later use.
202 */
203 p = calloc(1, sizeof(*p) + namelen + 1);
204 if (!p)
205 do_error(_("malloc failed in dir_hash_add (%zu bytes)\n"),
206 sizeof(*p));
207
208 error = radix_tree_insert(&hashtab->byaddr, addr, p);
209 if (error == EEXIST) {
210 do_warn(_("duplicate addrs %u in directory!\n"), addr);
211 free(p);
212 return 0;
213 }
214 radix_tree_tag_set(&hashtab->byaddr, addr, HT_UNSEEN);
215
216 if (hashtab->last)
217 hashtab->last->nextbyorder = p;
218 else
219 hashtab->first = p;
220 p->nextbyorder = NULL;
221 hashtab->last = p;
222
223 if (!(p->junkit = junk)) {
224 p->hashval = hash;
225 p->nextbyhash = hashtab->byhash[byhash];
226 hashtab->byhash[byhash] = p;
227 }
228 p->address = addr;
229 p->inum = inum;
230 p->seen = 0;
231
232 /* Set up the name in the region trailing the hash entry. */
233 memcpy(p->namebuf, name, namelen);
234 p->name.name = p->namebuf;
235 p->name.len = namelen;
236 p->name.type = ftype;
237 return !dup;
238 }
239
240 static int
241 dir_hash_check(
242 struct dir_hash_tab *hashtab,
243 struct xfs_inode *ip,
244 int seeval)
245 {
246 static char *seevalstr[DIR_HASH_CK_TOTAL];
247 static int done;
248
249 if (!done) {
250 seevalstr[DIR_HASH_CK_OK] = _("ok");
251 seevalstr[DIR_HASH_CK_DUPLEAF] = _("duplicate leaf");
252 seevalstr[DIR_HASH_CK_BADHASH] = _("hash value mismatch");
253 seevalstr[DIR_HASH_CK_NODATA] = _("no data entry");
254 seevalstr[DIR_HASH_CK_NOLEAF] = _("no leaf entry");
255 seevalstr[DIR_HASH_CK_BADSTALE] = _("bad stale count");
256 done = 1;
257 }
258
259 if (seeval == DIR_HASH_CK_OK &&
260 radix_tree_tagged(&hashtab->byaddr, HT_UNSEEN))
261 seeval = DIR_HASH_CK_NOLEAF;
262 if (seeval == DIR_HASH_CK_OK)
263 return 0;
264 do_warn(_("bad hash table for directory inode %" PRIu64 " (%s): "),
265 ip->i_ino, seevalstr[seeval]);
266 if (!no_modify)
267 do_warn(_("rebuilding\n"));
268 else
269 do_warn(_("would rebuild\n"));
270 return 1;
271 }
272
273 static void
274 dir_hash_done(
275 struct dir_hash_tab *hashtab)
276 {
277 int i;
278 struct dir_hash_ent *n;
279 struct dir_hash_ent *p;
280
281 for (i = 0; i < hashtab->size; i++) {
282 for (p = hashtab->byhash[i]; p; p = n) {
283 n = p->nextbyhash;
284 radix_tree_delete(&hashtab->byaddr, p->address);
285 free(p);
286 }
287 }
288 free(hashtab);
289 }
290
291 static struct dir_hash_tab *
292 dir_hash_init(
293 xfs_fsize_t size)
294 {
295 struct dir_hash_tab *hashtab;
296 int hsize;
297
298 hsize = size / (16 * 4);
299 if (hsize > 65536)
300 hsize = 63336;
301 else if (hsize < 16)
302 hsize = 16;
303 if ((hashtab = calloc(DIR_HASH_TAB_SIZE(hsize), 1)) == NULL)
304 do_error(_("calloc failed in dir_hash_init\n"));
305 hashtab->size = hsize;
306 hashtab->byhash = (struct dir_hash_ent **)((char *)hashtab +
307 sizeof(struct dir_hash_tab));
308 INIT_RADIX_TREE(&hashtab->byaddr, 0);
309 return hashtab;
310 }
311
312 static int
313 dir_hash_see(
314 struct dir_hash_tab *hashtab,
315 xfs_dahash_t hash,
316 xfs_dir2_dataptr_t addr)
317 {
318 struct dir_hash_ent *p;
319
320 p = radix_tree_lookup(&hashtab->byaddr, addr);
321 if (!p)
322 return DIR_HASH_CK_NODATA;
323 if (!radix_tree_tag_get(&hashtab->byaddr, addr, HT_UNSEEN))
324 return DIR_HASH_CK_DUPLEAF;
325 if (p->junkit == 0 && p->hashval != hash)
326 return DIR_HASH_CK_BADHASH;
327 radix_tree_tag_clear(&hashtab->byaddr, addr, HT_UNSEEN);
328 return DIR_HASH_CK_OK;
329 }
330
331 static void
332 dir_hash_update_ftype(
333 struct dir_hash_tab *hashtab,
334 xfs_dir2_dataptr_t addr,
335 uint8_t ftype)
336 {
337 struct dir_hash_ent *p;
338
339 p = radix_tree_lookup(&hashtab->byaddr, addr);
340 if (!p)
341 return;
342 p->name.type = ftype;
343 }
344
345 /*
346 * checks to make sure leafs match a data entry, and that the stale
347 * count is valid.
348 */
349 static int
350 dir_hash_see_all(
351 struct dir_hash_tab *hashtab,
352 xfs_dir2_leaf_entry_t *ents,
353 int count,
354 int stale)
355 {
356 int i;
357 int j;
358 int rval;
359
360 for (i = j = 0; i < count; i++) {
361 if (be32_to_cpu(ents[i].address) == XFS_DIR2_NULL_DATAPTR) {
362 j++;
363 continue;
364 }
365 rval = dir_hash_see(hashtab, be32_to_cpu(ents[i].hashval),
366 be32_to_cpu(ents[i].address));
367 if (rval != DIR_HASH_CK_OK)
368 return rval;
369 }
370 return j == stale ? DIR_HASH_CK_OK : DIR_HASH_CK_BADSTALE;
371 }
372
373 /*
374 * Given a block number in a fork, return the next valid block number (not a
375 * hole). If this is the last block number then NULLFILEOFF is returned.
376 */
377 static int
378 bmap_next_offset(
379 struct xfs_inode *ip,
380 xfs_fileoff_t *bnop)
381 {
382 xfs_fileoff_t bno;
383 int error;
384 struct xfs_bmbt_irec got;
385 struct xfs_iext_cursor icur;
386
387 switch (ip->i_df.if_format) {
388 case XFS_DINODE_FMT_LOCAL:
389 *bnop = NULLFILEOFF;
390 return 0;
391 case XFS_DINODE_FMT_BTREE:
392 case XFS_DINODE_FMT_EXTENTS:
393 break;
394 default:
395 return EIO;
396 }
397
398 if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) {
399 error = -libxfs_iread_extents(NULL, ip, XFS_DATA_FORK);
400 if (error)
401 return error;
402 }
403
404 bno = *bnop + 1;
405 if (!libxfs_iext_lookup_extent(ip, &ip->i_df, bno, &icur, &got))
406 *bnop = NULLFILEOFF;
407 else
408 *bnop = got.br_startoff < bno ? bno : got.br_startoff;
409 return 0;
410 }
411
412 static void
413 res_failed(
414 int err)
415 {
416 if (err == ENOSPC) {
417 do_error(_("ran out of disk space!\n"));
418 } else
419 do_error(_("xfs_trans_reserve returned %d\n"), err);
420 }
421
422 static void
423 mk_rbmino(xfs_mount_t *mp)
424 {
425 xfs_trans_t *tp;
426 xfs_inode_t *ip;
427 xfs_bmbt_irec_t *ep;
428 int i;
429 int nmap;
430 int error;
431 xfs_fileoff_t bno;
432 xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP];
433 int times;
434 uint blocks;
435
436 /*
437 * first set up inode
438 */
439 i = -libxfs_trans_alloc_rollable(mp, 10, &tp);
440 if (i)
441 res_failed(i);
442
443 error = -libxfs_iget(mp, tp, mp->m_sb.sb_rbmino, 0, &ip);
444 if (error) {
445 do_error(
446 _("couldn't iget realtime bitmap inode -- error - %d\n"),
447 error);
448 }
449
450 memset(&ip->i_d, 0, sizeof(ip->i_d));
451
452 VFS_I(ip)->i_mode = S_IFREG;
453 ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
454 if (ip->i_afp)
455 ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
456
457 set_nlink(VFS_I(ip), 1); /* account for sb ptr */
458
459 times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
460 if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
461 VFS_I(ip)->i_version = 1;
462 ip->i_d.di_flags2 = 0;
463 times |= XFS_ICHGTIME_CREATE;
464 }
465 libxfs_trans_ichgtime(tp, ip, times);
466
467 /*
468 * now the ifork
469 */
470 ip->i_df.if_flags = XFS_IFEXTENTS;
471 ip->i_df.if_bytes = 0;
472 ip->i_df.if_u1.if_root = NULL;
473
474 ip->i_d.di_size = mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize;
475
476 /*
477 * commit changes
478 */
479 libxfs_trans_ijoin(tp, ip, 0);
480 libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
481 error = -libxfs_trans_commit(tp);
482 if (error)
483 do_error(_("%s: commit failed, error %d\n"), __func__, error);
484
485 /*
486 * then allocate blocks for file and fill with zeroes (stolen
487 * from mkfs)
488 */
489 blocks = mp->m_sb.sb_rbmblocks +
490 XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1;
491 error = -libxfs_trans_alloc_rollable(mp, blocks, &tp);
492 if (error)
493 res_failed(error);
494
495 libxfs_trans_ijoin(tp, ip, 0);
496 bno = 0;
497 while (bno < mp->m_sb.sb_rbmblocks) {
498 nmap = XFS_BMAP_MAX_NMAP;
499 error = -libxfs_bmapi_write(tp, ip, bno,
500 (xfs_extlen_t)(mp->m_sb.sb_rbmblocks - bno),
501 0, mp->m_sb.sb_rbmblocks, map, &nmap);
502 if (error) {
503 do_error(
504 _("couldn't allocate realtime bitmap, error = %d\n"),
505 error);
506 }
507 for (i = 0, ep = map; i < nmap; i++, ep++) {
508 libxfs_device_zero(mp->m_ddev_targp,
509 XFS_FSB_TO_DADDR(mp, ep->br_startblock),
510 XFS_FSB_TO_BB(mp, ep->br_blockcount));
511 bno += ep->br_blockcount;
512 }
513 }
514 error = -libxfs_trans_commit(tp);
515 if (error) {
516 do_error(
517 _("allocation of the realtime bitmap failed, error = %d\n"),
518 error);
519 }
520 libxfs_irele(ip);
521 }
522
523 static int
524 fill_rbmino(xfs_mount_t *mp)
525 {
526 struct xfs_buf *bp;
527 xfs_trans_t *tp;
528 xfs_inode_t *ip;
529 xfs_rtword_t *bmp;
530 int nmap;
531 int error;
532 xfs_fileoff_t bno;
533 xfs_bmbt_irec_t map;
534
535 bmp = btmcompute;
536 bno = 0;
537
538 error = -libxfs_trans_alloc_rollable(mp, 10, &tp);
539 if (error)
540 res_failed(error);
541
542 error = -libxfs_iget(mp, tp, mp->m_sb.sb_rbmino, 0, &ip);
543 if (error) {
544 do_error(
545 _("couldn't iget realtime bitmap inode -- error - %d\n"),
546 error);
547 }
548
549 while (bno < mp->m_sb.sb_rbmblocks) {
550 /*
551 * fill the file one block at a time
552 */
553 nmap = 1;
554 error = -libxfs_bmapi_write(tp, ip, bno, 1, 0, 1, &map, &nmap);
555 if (error || nmap != 1) {
556 do_error(
557 _("couldn't map realtime bitmap block %" PRIu64 ", error = %d\n"),
558 bno, error);
559 }
560
561 ASSERT(map.br_startblock != HOLESTARTBLOCK);
562
563 error = -libxfs_trans_read_buf(
564 mp, tp, mp->m_dev,
565 XFS_FSB_TO_DADDR(mp, map.br_startblock),
566 XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL);
567
568 if (error) {
569 do_warn(
570 _("can't access block %" PRIu64 " (fsbno %" PRIu64 ") of realtime bitmap inode %" PRIu64 "\n"),
571 bno, map.br_startblock, mp->m_sb.sb_rbmino);
572 return(1);
573 }
574
575 memmove(bp->b_addr, bmp, mp->m_sb.sb_blocksize);
576
577 libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
578
579 bmp = (xfs_rtword_t *)((intptr_t) bmp + mp->m_sb.sb_blocksize);
580 bno++;
581 }
582
583 libxfs_trans_ijoin(tp, ip, 0);
584 error = -libxfs_trans_commit(tp);
585 if (error)
586 do_error(_("%s: commit failed, error %d\n"), __func__, error);
587 libxfs_irele(ip);
588 return(0);
589 }
590
591 static int
592 fill_rsumino(xfs_mount_t *mp)
593 {
594 struct xfs_buf *bp;
595 xfs_trans_t *tp;
596 xfs_inode_t *ip;
597 xfs_suminfo_t *smp;
598 int nmap;
599 int error;
600 xfs_fileoff_t bno;
601 xfs_fileoff_t end_bno;
602 xfs_bmbt_irec_t map;
603
604 smp = sumcompute;
605 bno = 0;
606 end_bno = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
607
608 error = -libxfs_trans_alloc_rollable(mp, 10, &tp);
609 if (error)
610 res_failed(error);
611
612 error = -libxfs_iget(mp, tp, mp->m_sb.sb_rsumino, 0, &ip);
613 if (error) {
614 do_error(
615 _("couldn't iget realtime summary inode -- error - %d\n"),
616 error);
617 }
618
619 while (bno < end_bno) {
620 /*
621 * fill the file one block at a time
622 */
623 nmap = 1;
624 error = -libxfs_bmapi_write(tp, ip, bno, 1, 0, 1, &map, &nmap);
625 if (error || nmap != 1) {
626 do_error(
627 _("couldn't map realtime summary inode block %" PRIu64 ", error = %d\n"),
628 bno, error);
629 }
630
631 ASSERT(map.br_startblock != HOLESTARTBLOCK);
632
633 error = -libxfs_trans_read_buf(
634 mp, tp, mp->m_dev,
635 XFS_FSB_TO_DADDR(mp, map.br_startblock),
636 XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL);
637
638 if (error) {
639 do_warn(
640 _("can't access block %" PRIu64 " (fsbno %" PRIu64 ") of realtime summary inode %" PRIu64 "\n"),
641 bno, map.br_startblock, mp->m_sb.sb_rsumino);
642 libxfs_irele(ip);
643 return(1);
644 }
645
646 memmove(bp->b_addr, smp, mp->m_sb.sb_blocksize);
647
648 libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
649
650 smp = (xfs_suminfo_t *)((intptr_t)smp + mp->m_sb.sb_blocksize);
651 bno++;
652 }
653
654 libxfs_trans_ijoin(tp, ip, 0);
655 error = -libxfs_trans_commit(tp);
656 if (error)
657 do_error(_("%s: commit failed, error %d\n"), __func__, error);
658 libxfs_irele(ip);
659 return(0);
660 }
661
662 static void
663 mk_rsumino(xfs_mount_t *mp)
664 {
665 xfs_trans_t *tp;
666 xfs_inode_t *ip;
667 xfs_bmbt_irec_t *ep;
668 int i;
669 int nmap;
670 int error;
671 int nsumblocks;
672 xfs_fileoff_t bno;
673 xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP];
674 int times;
675 uint blocks;
676
677 /*
678 * first set up inode
679 */
680 i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 10, 0, 0, &tp);
681 if (i)
682 res_failed(i);
683
684 error = -libxfs_iget(mp, tp, mp->m_sb.sb_rsumino, 0, &ip);
685 if (error) {
686 do_error(
687 _("couldn't iget realtime summary inode -- error - %d\n"),
688 error);
689 }
690
691 memset(&ip->i_d, 0, sizeof(ip->i_d));
692
693 VFS_I(ip)->i_mode = S_IFREG;
694 ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
695 if (ip->i_afp)
696 ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
697
698 set_nlink(VFS_I(ip), 1); /* account for sb ptr */
699
700 times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
701 if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
702 VFS_I(ip)->i_version = 1;
703 ip->i_d.di_flags2 = 0;
704 times |= XFS_ICHGTIME_CREATE;
705 }
706 libxfs_trans_ichgtime(tp, ip, times);
707
708 /*
709 * now the ifork
710 */
711 ip->i_df.if_flags = XFS_IFEXTENTS;
712 ip->i_df.if_bytes = 0;
713 ip->i_df.if_u1.if_root = NULL;
714
715 ip->i_d.di_size = mp->m_rsumsize;
716
717 /*
718 * commit changes
719 */
720 libxfs_trans_ijoin(tp, ip, 0);
721 libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
722 error = -libxfs_trans_commit(tp);
723 if (error)
724 do_error(_("%s: commit failed, error %d\n"), __func__, error);
725
726 /*
727 * then allocate blocks for file and fill with zeroes (stolen
728 * from mkfs)
729 */
730 nsumblocks = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
731 blocks = nsumblocks + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1;
732 error = -libxfs_trans_alloc_rollable(mp, blocks, &tp);
733 if (error)
734 res_failed(error);
735
736 libxfs_trans_ijoin(tp, ip, 0);
737 bno = 0;
738 while (bno < nsumblocks) {
739 nmap = XFS_BMAP_MAX_NMAP;
740 error = -libxfs_bmapi_write(tp, ip, bno,
741 (xfs_extlen_t)(nsumblocks - bno),
742 0, nsumblocks, map, &nmap);
743 if (error) {
744 do_error(
745 _("couldn't allocate realtime summary inode, error = %d\n"),
746 error);
747 }
748 for (i = 0, ep = map; i < nmap; i++, ep++) {
749 libxfs_device_zero(mp->m_ddev_targp,
750 XFS_FSB_TO_DADDR(mp, ep->br_startblock),
751 XFS_FSB_TO_BB(mp, ep->br_blockcount));
752 bno += ep->br_blockcount;
753 }
754 }
755 error = -libxfs_trans_commit(tp);
756 if (error) {
757 do_error(
758 _("allocation of the realtime summary ino failed, error = %d\n"),
759 error);
760 }
761 libxfs_irele(ip);
762 }
763
764 /*
765 * makes a new root directory.
766 */
767 static void
768 mk_root_dir(xfs_mount_t *mp)
769 {
770 xfs_trans_t *tp;
771 xfs_inode_t *ip;
772 int i;
773 int error;
774 const mode_t mode = 0755;
775 ino_tree_node_t *irec;
776 int times;
777
778 ip = NULL;
779 i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 10, 0, 0, &tp);
780 if (i)
781 res_failed(i);
782
783 error = -libxfs_iget(mp, tp, mp->m_sb.sb_rootino, 0, &ip);
784 if (error) {
785 do_error(_("could not iget root inode -- error - %d\n"), error);
786 }
787
788 /*
789 * take care of the core -- initialization from xfs_ialloc()
790 */
791 memset(&ip->i_d, 0, sizeof(ip->i_d));
792
793 VFS_I(ip)->i_mode = mode|S_IFDIR;
794 ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
795 if (ip->i_afp)
796 ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
797
798 set_nlink(VFS_I(ip), 2); /* account for . and .. */
799
800 times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
801 if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
802 VFS_I(ip)->i_version = 1;
803 ip->i_d.di_flags2 = 0;
804 times |= XFS_ICHGTIME_CREATE;
805 }
806 libxfs_trans_ichgtime(tp, ip, times);
807 libxfs_trans_ijoin(tp, ip, 0);
808 libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
809
810 /*
811 * now the ifork
812 */
813 ip->i_df.if_flags = XFS_IFEXTENTS;
814 ip->i_df.if_bytes = 0;
815 ip->i_df.if_u1.if_root = NULL;
816
817 /*
818 * initialize the directory
819 */
820 libxfs_dir_init(tp, ip, ip);
821
822 error = -libxfs_trans_commit(tp);
823 if (error)
824 do_error(_("%s: commit failed, error %d\n"), __func__, error);
825
826 libxfs_irele(ip);
827
828 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
829 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
830 set_inode_isadir(irec, XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino) -
831 irec->ino_startnum);
832 }
833
834 /*
835 * orphanage name == lost+found
836 */
837 static xfs_ino_t
838 mk_orphanage(xfs_mount_t *mp)
839 {
840 xfs_ino_t ino;
841 xfs_trans_t *tp;
842 xfs_inode_t *ip;
843 xfs_inode_t *pip;
844 ino_tree_node_t *irec;
845 int ino_offset = 0;
846 int i;
847 int error;
848 const int mode = 0755;
849 int nres;
850 struct xfs_name xname;
851
852 /*
853 * check for an existing lost+found first, if it exists, return
854 * its inode. Otherwise, we can create it. Bad lost+found inodes
855 * would have been cleared in phase3 and phase4.
856 */
857
858 i = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip);
859 if (i)
860 do_error(_("%d - couldn't iget root inode to obtain %s\n"),
861 i, ORPHANAGE);
862
863 xname.name = (unsigned char *)ORPHANAGE;
864 xname.len = strlen(ORPHANAGE);
865 xname.type = XFS_DIR3_FT_DIR;
866
867 if (libxfs_dir_lookup(NULL, pip, &xname, &ino, NULL) == 0)
868 return ino;
869
870 /*
871 * could not be found, create it
872 */
873 nres = XFS_MKDIR_SPACE_RES(mp, xname.len);
874 i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir, nres, 0, 0, &tp);
875 if (i)
876 res_failed(i);
877
878 /*
879 * use iget/ijoin instead of trans_iget because the ialloc
880 * wrapper can commit the transaction and start a new one
881 */
882 /* i = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip);
883 if (i)
884 do_error(_("%d - couldn't iget root inode to make %s\n"),
885 i, ORPHANAGE);*/
886
887 error = -libxfs_dir_ialloc(&tp, pip, mode|S_IFDIR,
888 1, 0, &zerocr, &zerofsx, &ip);
889 if (error) {
890 do_error(_("%s inode allocation failed %d\n"),
891 ORPHANAGE, error);
892 }
893 inc_nlink(VFS_I(ip)); /* account for . */
894 ino = ip->i_ino;
895
896 irec = find_inode_rec(mp,
897 XFS_INO_TO_AGNO(mp, ino),
898 XFS_INO_TO_AGINO(mp, ino));
899
900 if (irec == NULL) {
901 /*
902 * This inode is allocated from a newly created inode
903 * chunk and therefore did not exist when inode chunks
904 * were processed in phase3. Add this group of inodes to
905 * the entry avl tree as if they were discovered in phase3.
906 */
907 irec = set_inode_free_alloc(mp, XFS_INO_TO_AGNO(mp, ino),
908 XFS_INO_TO_AGINO(mp, ino));
909 alloc_ex_data(irec);
910
911 for (i = 0; i < XFS_INODES_PER_CHUNK; i++)
912 set_inode_free(irec, i);
913 }
914
915 ino_offset = get_inode_offset(mp, ino, irec);
916
917 /*
918 * Mark the inode allocated to lost+found as used in the AVL tree
919 * so it is not skipped in phase 7
920 */
921 set_inode_used(irec, ino_offset);
922 add_inode_ref(irec, ino_offset);
923 add_inode_reached(irec, ino_offset);
924
925 /*
926 * now that we know the transaction will stay around,
927 * add the root inode to it
928 */
929 libxfs_trans_ijoin(tp, pip, 0);
930
931 /*
932 * create the actual entry
933 */
934 error = -libxfs_dir_createname(tp, pip, &xname, ip->i_ino, nres);
935 if (error)
936 do_error(
937 _("can't make %s, createname error %d\n"),
938 ORPHANAGE, error);
939
940 /*
941 * bump up the link count in the root directory to account
942 * for .. in the new directory, and update the irec copy of the
943 * on-disk nlink so we don't fail the link count check later.
944 */
945 inc_nlink(VFS_I(pip));
946 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
947 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
948 add_inode_ref(irec, 0);
949 set_inode_disk_nlinks(irec, 0, get_inode_disk_nlinks(irec, 0) + 1);
950
951 libxfs_trans_log_inode(tp, pip, XFS_ILOG_CORE);
952 libxfs_dir_init(tp, ip, pip);
953 libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
954 error = -libxfs_trans_commit(tp);
955 if (error) {
956 do_error(_("%s directory creation failed -- bmapf error %d\n"),
957 ORPHANAGE, error);
958 }
959 libxfs_irele(ip);
960 libxfs_irele(pip);
961
962 return(ino);
963 }
964
965 /*
966 * move a file to the orphange.
967 */
968 static void
969 mv_orphanage(
970 xfs_mount_t *mp,
971 xfs_ino_t ino, /* inode # to be moved */
972 int isa_dir) /* 1 if inode is a directory */
973 {
974 xfs_inode_t *orphanage_ip;
975 xfs_ino_t entry_ino_num;
976 xfs_inode_t *ino_p;
977 xfs_trans_t *tp;
978 int err;
979 unsigned char fname[MAXPATHLEN + 1];
980 int nres;
981 int incr;
982 ino_tree_node_t *irec;
983 int ino_offset = 0;
984 struct xfs_name xname;
985
986 xname.name = fname;
987 xname.len = snprintf((char *)fname, sizeof(fname), "%llu",
988 (unsigned long long)ino);
989
990 err = -libxfs_iget(mp, NULL, orphanage_ino, 0, &orphanage_ip);
991 if (err)
992 do_error(_("%d - couldn't iget orphanage inode\n"), err);
993 /*
994 * Make sure the filename is unique in the lost+found
995 */
996 incr = 0;
997 while (libxfs_dir_lookup(NULL, orphanage_ip, &xname, &entry_ino_num,
998 NULL) == 0)
999 xname.len = snprintf((char *)fname, sizeof(fname), "%llu.%d",
1000 (unsigned long long)ino, ++incr);
1001
1002 /* Orphans may not have a proper parent, so use custom ops here */
1003 err = -libxfs_iget(mp, NULL, ino, 0, &ino_p);
1004 if (err)
1005 do_error(_("%d - couldn't iget disconnected inode\n"), err);
1006
1007 xname.type = libxfs_mode_to_ftype(VFS_I(ino_p)->i_mode);
1008
1009 if (isa_dir) {
1010 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, orphanage_ino),
1011 XFS_INO_TO_AGINO(mp, orphanage_ino));
1012 if (irec)
1013 ino_offset = XFS_INO_TO_AGINO(mp, orphanage_ino) -
1014 irec->ino_startnum;
1015 nres = XFS_DIRENTER_SPACE_RES(mp, fnamelen) +
1016 XFS_DIRENTER_SPACE_RES(mp, 2);
1017 err = -libxfs_dir_lookup(NULL, ino_p, &xfs_name_dotdot,
1018 &entry_ino_num, NULL);
1019 if (err) {
1020 ASSERT(err == ENOENT);
1021
1022 err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_rename,
1023 nres, 0, 0, &tp);
1024 if (err)
1025 do_error(
1026 _("space reservation failed (%d), filesystem may be out of space\n"),
1027 err);
1028
1029 libxfs_trans_ijoin(tp, orphanage_ip, 0);
1030 libxfs_trans_ijoin(tp, ino_p, 0);
1031
1032 err = -libxfs_dir_createname(tp, orphanage_ip, &xname,
1033 ino, nres);
1034 if (err)
1035 do_error(
1036 _("name create failed in %s (%d), filesystem may be out of space\n"),
1037 ORPHANAGE, err);
1038
1039 if (irec)
1040 add_inode_ref(irec, ino_offset);
1041 else
1042 inc_nlink(VFS_I(orphanage_ip));
1043 libxfs_trans_log_inode(tp, orphanage_ip, XFS_ILOG_CORE);
1044
1045 err = -libxfs_dir_createname(tp, ino_p, &xfs_name_dotdot,
1046 orphanage_ino, nres);
1047 if (err)
1048 do_error(
1049 _("creation of .. entry failed (%d), filesystem may be out of space\n"),
1050 err);
1051
1052 inc_nlink(VFS_I(ino_p));
1053 libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
1054 err = -libxfs_trans_commit(tp);
1055 if (err)
1056 do_error(
1057 _("creation of .. entry failed (%d)\n"), err);
1058 } else {
1059 err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_rename,
1060 nres, 0, 0, &tp);
1061 if (err)
1062 do_error(
1063 _("space reservation failed (%d), filesystem may be out of space\n"),
1064 err);
1065
1066 libxfs_trans_ijoin(tp, orphanage_ip, 0);
1067 libxfs_trans_ijoin(tp, ino_p, 0);
1068
1069
1070 err = -libxfs_dir_createname(tp, orphanage_ip, &xname,
1071 ino, nres);
1072 if (err)
1073 do_error(
1074 _("name create failed in %s (%d), filesystem may be out of space\n"),
1075 ORPHANAGE, err);
1076
1077 if (irec)
1078 add_inode_ref(irec, ino_offset);
1079 else
1080 inc_nlink(VFS_I(orphanage_ip));
1081 libxfs_trans_log_inode(tp, orphanage_ip, XFS_ILOG_CORE);
1082
1083 /*
1084 * don't replace .. value if it already points
1085 * to us. that'll pop a libxfs/kernel ASSERT.
1086 */
1087 if (entry_ino_num != orphanage_ino) {
1088 err = -libxfs_dir_replace(tp, ino_p,
1089 &xfs_name_dotdot, orphanage_ino,
1090 nres);
1091 if (err)
1092 do_error(
1093 _("name replace op failed (%d), filesystem may be out of space\n"),
1094 err);
1095 }
1096
1097 err = -libxfs_trans_commit(tp);
1098 if (err)
1099 do_error(
1100 _("orphanage name replace op failed (%d)\n"), err);
1101 }
1102
1103 } else {
1104 /*
1105 * use the remove log reservation as that's
1106 * more accurate. we're only creating the
1107 * links, we're not doing the inode allocation
1108 * also accounted for in the create
1109 */
1110 nres = XFS_DIRENTER_SPACE_RES(mp, xname.len);
1111 err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove,
1112 nres, 0, 0, &tp);
1113 if (err)
1114 do_error(
1115 _("space reservation failed (%d), filesystem may be out of space\n"),
1116 err);
1117
1118 libxfs_trans_ijoin(tp, orphanage_ip, 0);
1119 libxfs_trans_ijoin(tp, ino_p, 0);
1120
1121 err = -libxfs_dir_createname(tp, orphanage_ip, &xname, ino,
1122 nres);
1123 if (err)
1124 do_error(
1125 _("name create failed in %s (%d), filesystem may be out of space\n"),
1126 ORPHANAGE, err);
1127 ASSERT(err == 0);
1128
1129 set_nlink(VFS_I(ino_p), 1);
1130 libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
1131 err = -libxfs_trans_commit(tp);
1132 if (err)
1133 do_error(
1134 _("orphanage name create failed (%d)\n"), err);
1135 }
1136 libxfs_irele(ino_p);
1137 libxfs_irele(orphanage_ip);
1138 }
1139
1140 static int
1141 entry_junked(
1142 const char *msg,
1143 const char *iname,
1144 xfs_ino_t ino1,
1145 xfs_ino_t ino2)
1146 {
1147 do_warn(msg, iname, ino1, ino2);
1148 if (!no_modify) {
1149 if (verbose)
1150 do_warn(_(", marking entry to be junked\n"));
1151 else
1152 do_warn("\n");
1153 } else
1154 do_warn(_(", would junk entry\n"));
1155 return !no_modify;
1156 }
1157
1158 /* Find and invalidate all the directory's buffers. */
1159 static int
1160 dir_binval(
1161 struct xfs_trans *tp,
1162 struct xfs_inode *ip,
1163 int whichfork)
1164 {
1165 struct xfs_iext_cursor icur;
1166 struct xfs_bmbt_irec rec;
1167 struct xfs_ifork *ifp;
1168 struct xfs_da_geometry *geo;
1169 struct xfs_buf *bp;
1170 xfs_dablk_t dabno;
1171 int error = 0;
1172
1173 if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
1174 ip->i_df.if_format != XFS_DINODE_FMT_BTREE)
1175 return 0;
1176
1177 geo = tp->t_mountp->m_dir_geo;
1178 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1179 for_each_xfs_iext(ifp, &icur, &rec) {
1180 for (dabno = roundup(rec.br_startoff, geo->fsbcount);
1181 dabno < rec.br_startoff + rec.br_blockcount;
1182 dabno += geo->fsbcount) {
1183 bp = NULL;
1184 error = -libxfs_da_get_buf(tp, ip, dabno, &bp,
1185 whichfork);
1186 if (error)
1187 return error;
1188 if (!bp)
1189 continue;
1190 libxfs_trans_binval(tp, bp);
1191 libxfs_trans_brelse(tp, bp);
1192 }
1193 }
1194
1195 return error;
1196 }
1197
1198 /*
1199 * Unexpected failure during the rebuild will leave the entries in
1200 * lost+found on the next run
1201 */
1202
1203 static void
1204 longform_dir2_rebuild(
1205 struct xfs_mount *mp,
1206 xfs_ino_t ino,
1207 struct xfs_inode *ip,
1208 struct ino_tree_node *irec,
1209 int ino_offset,
1210 struct dir_hash_tab *hashtab)
1211 {
1212 int error;
1213 int nres;
1214 struct xfs_trans *tp;
1215 xfs_fileoff_t lastblock;
1216 struct xfs_inode pip;
1217 struct dir_hash_ent *p;
1218 int done = 0;
1219
1220 /*
1221 * trash directory completely and rebuild from scratch using the
1222 * name/inode pairs in the hash table
1223 */
1224
1225 do_warn(_("rebuilding directory inode %" PRIu64 "\n"), ino);
1226
1227 /*
1228 * first attempt to locate the parent inode, if it can't be
1229 * found, set it to the root inode and it'll be moved to the
1230 * orphanage later (the inode number here needs to be valid
1231 * for the libxfs_dir_init() call).
1232 */
1233 pip.i_ino = get_inode_parent(irec, ino_offset);
1234 if (pip.i_ino == NULLFSINO ||
1235 libxfs_dir_ino_validate(mp, pip.i_ino))
1236 pip.i_ino = mp->m_sb.sb_rootino;
1237
1238 nres = XFS_REMOVE_SPACE_RES(mp);
1239 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, nres, 0, 0, &tp);
1240 if (error)
1241 res_failed(error);
1242 libxfs_trans_ijoin(tp, ip, 0);
1243
1244 error = dir_binval(tp, ip, XFS_DATA_FORK);
1245 if (error)
1246 do_error(_("error %d invalidating directory %llu blocks\n"),
1247 error, (unsigned long long)ip->i_ino);
1248
1249 if ((error = -libxfs_bmap_last_offset(ip, &lastblock, XFS_DATA_FORK)))
1250 do_error(_("xfs_bmap_last_offset failed -- error - %d\n"),
1251 error);
1252
1253 /* free all data, leaf, node and freespace blocks */
1254 while (!done) {
1255 error = -libxfs_bunmapi(tp, ip, 0, lastblock, XFS_BMAPI_METADATA,
1256 0, &done);
1257 if (error) {
1258 do_warn(_("xfs_bunmapi failed -- error - %d\n"), error);
1259 goto out_bmap_cancel;
1260 }
1261 error = -libxfs_defer_finish(&tp);
1262 if (error) {
1263 do_warn(("defer_finish failed -- error - %d\n"), error);
1264 goto out_bmap_cancel;
1265 }
1266 /*
1267 * Close out trans and start the next one in the chain.
1268 */
1269 error = -libxfs_trans_roll_inode(&tp, ip);
1270 if (error)
1271 goto out_bmap_cancel;
1272 }
1273
1274 error = -libxfs_dir_init(tp, ip, &pip);
1275 if (error) {
1276 do_warn(_("xfs_dir_init failed -- error - %d\n"), error);
1277 goto out_bmap_cancel;
1278 }
1279
1280 error = -libxfs_trans_commit(tp);
1281 if (error)
1282 do_error(
1283 _("dir init failed (%d)\n"), error);
1284
1285 if (ino == mp->m_sb.sb_rootino)
1286 need_root_dotdot = 0;
1287
1288 /* go through the hash list and re-add the inodes */
1289
1290 for (p = hashtab->first; p; p = p->nextbyorder) {
1291
1292 if (p->name.name[0] == '/' || (p->name.name[0] == '.' &&
1293 (p->name.len == 1 || (p->name.len == 2 &&
1294 p->name.name[1] == '.'))))
1295 continue;
1296
1297 nres = XFS_CREATE_SPACE_RES(mp, p->name.len);
1298 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_create,
1299 nres, 0, 0, &tp);
1300 if (error)
1301 res_failed(error);
1302
1303 libxfs_trans_ijoin(tp, ip, 0);
1304
1305 error = -libxfs_dir_createname(tp, ip, &p->name, p->inum,
1306 nres);
1307 if (error) {
1308 do_warn(
1309 _("name create failed in ino %" PRIu64 " (%d), filesystem may be out of space\n"),
1310 ino, error);
1311 goto out_bmap_cancel;
1312 }
1313
1314 error = -libxfs_trans_commit(tp);
1315 if (error)
1316 do_error(
1317 _("name create failed (%d) during rebuild\n"), error);
1318 }
1319
1320 return;
1321
1322 out_bmap_cancel:
1323 libxfs_trans_cancel(tp);
1324 return;
1325 }
1326
1327
1328 /*
1329 * Kill a block in a version 2 inode.
1330 * Makes its own transaction.
1331 */
1332 static void
1333 dir2_kill_block(
1334 xfs_mount_t *mp,
1335 xfs_inode_t *ip,
1336 xfs_dablk_t da_bno,
1337 struct xfs_buf *bp)
1338 {
1339 xfs_da_args_t args;
1340 int error;
1341 int nres;
1342 xfs_trans_t *tp;
1343
1344 nres = XFS_REMOVE_SPACE_RES(mp);
1345 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, nres, 0, 0, &tp);
1346 if (error)
1347 res_failed(error);
1348 libxfs_trans_ijoin(tp, ip, 0);
1349 libxfs_trans_bjoin(tp, bp);
1350 libxfs_trans_bhold(tp, bp);
1351 memset(&args, 0, sizeof(args));
1352 args.dp = ip;
1353 args.trans = tp;
1354 args.whichfork = XFS_DATA_FORK;
1355 args.geo = mp->m_dir_geo;
1356 if (da_bno >= mp->m_dir_geo->leafblk && da_bno < mp->m_dir_geo->freeblk)
1357 error = -libxfs_da_shrink_inode(&args, da_bno, bp);
1358 else
1359 error = -libxfs_dir2_shrink_inode(&args,
1360 xfs_dir2_da_to_db(mp->m_dir_geo, da_bno), bp);
1361 if (error)
1362 do_error(_("shrink_inode failed inode %" PRIu64 " block %u\n"),
1363 ip->i_ino, da_bno);
1364 error = -libxfs_trans_commit(tp);
1365 if (error)
1366 do_error(
1367 _("directory shrink failed (%d)\n"), error);
1368 }
1369
1370 /*
1371 * process a data block, also checks for .. entry
1372 * and corrects it to match what we think .. should be
1373 */
1374 static void
1375 longform_dir2_entry_check_data(
1376 struct xfs_mount *mp,
1377 struct xfs_inode *ip,
1378 int *num_illegal,
1379 int *need_dot,
1380 struct ino_tree_node *current_irec,
1381 int current_ino_offset,
1382 struct xfs_buf *bp,
1383 struct dir_hash_tab *hashtab,
1384 freetab_t **freetabp,
1385 xfs_dablk_t da_bno,
1386 int isblock)
1387 {
1388 xfs_dir2_dataptr_t addr;
1389 xfs_dir2_leaf_entry_t *blp;
1390 xfs_dir2_block_tail_t *btp;
1391 struct xfs_dir2_data_hdr *d;
1392 xfs_dir2_db_t db;
1393 xfs_dir2_data_entry_t *dep;
1394 xfs_dir2_data_unused_t *dup;
1395 struct xfs_dir2_data_free *bf;
1396 char *endptr;
1397 int error;
1398 char fname[MAXNAMELEN + 1];
1399 freetab_t *freetab;
1400 int i;
1401 int ino_offset;
1402 xfs_ino_t inum;
1403 ino_tree_node_t *irec;
1404 int junkit;
1405 int lastfree;
1406 int len;
1407 int nbad;
1408 int needlog;
1409 int needscan;
1410 xfs_ino_t parent;
1411 char *ptr;
1412 xfs_trans_t *tp;
1413 int wantmagic;
1414 struct xfs_da_args da = {
1415 .dp = ip,
1416 .geo = mp->m_dir_geo,
1417 };
1418
1419
1420 d = bp->b_addr;
1421 ptr = (char *)d + mp->m_dir_geo->data_entry_offset;
1422 nbad = 0;
1423 needscan = needlog = 0;
1424 junkit = 0;
1425 freetab = *freetabp;
1426 if (isblock) {
1427 btp = xfs_dir2_block_tail_p(mp->m_dir_geo, d);
1428 blp = xfs_dir2_block_leaf_p(btp);
1429 endptr = (char *)blp;
1430 if (endptr > (char *)btp)
1431 endptr = (char *)btp;
1432 if (xfs_sb_version_hascrc(&mp->m_sb))
1433 wantmagic = XFS_DIR3_BLOCK_MAGIC;
1434 else
1435 wantmagic = XFS_DIR2_BLOCK_MAGIC;
1436 } else {
1437 endptr = (char *)d + mp->m_dir_geo->blksize;
1438 if (xfs_sb_version_hascrc(&mp->m_sb))
1439 wantmagic = XFS_DIR3_DATA_MAGIC;
1440 else
1441 wantmagic = XFS_DIR2_DATA_MAGIC;
1442 }
1443 db = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
1444
1445 /* check for data block beyond expected end */
1446 if (freetab->naents <= db) {
1447 struct freetab_ent e;
1448
1449 *freetabp = freetab = realloc(freetab, FREETAB_SIZE(db + 1));
1450 if (!freetab) {
1451 do_error(_("realloc failed in %s (%zu bytes)\n"),
1452 __func__, FREETAB_SIZE(db + 1));
1453 }
1454 e.v = NULLDATAOFF;
1455 e.s = 0;
1456 for (i = freetab->naents; i < db; i++)
1457 freetab->ents[i] = e;
1458 freetab->naents = db + 1;
1459 }
1460
1461 /* check the data block */
1462 while (ptr < endptr) {
1463
1464 /* check for freespace */
1465 dup = (xfs_dir2_data_unused_t *)ptr;
1466 if (XFS_DIR2_DATA_FREE_TAG == be16_to_cpu(dup->freetag)) {
1467
1468 /* check for invalid freespace length */
1469 if (ptr + be16_to_cpu(dup->length) > endptr ||
1470 be16_to_cpu(dup->length) == 0 ||
1471 (be16_to_cpu(dup->length) &
1472 (XFS_DIR2_DATA_ALIGN - 1)))
1473 break;
1474
1475 /* check for invalid tag */
1476 if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) !=
1477 (char *)dup - (char *)d)
1478 break;
1479
1480 /* check for block with no data entries */
1481 if ((ptr == (char *)d + mp->m_dir_geo->data_entry_offset) &&
1482 (ptr + be16_to_cpu(dup->length) >= endptr)) {
1483 junkit = 1;
1484 *num_illegal += 1;
1485 break;
1486 }
1487
1488 /* continue at the end of the freespace */
1489 ptr += be16_to_cpu(dup->length);
1490 if (ptr >= endptr)
1491 break;
1492 }
1493
1494 /* validate data entry size */
1495 dep = (xfs_dir2_data_entry_t *)ptr;
1496 if (ptr + libxfs_dir2_data_entsize(mp, dep->namelen) > endptr)
1497 break;
1498 if (be16_to_cpu(*libxfs_dir2_data_entry_tag_p(mp, dep)) !=
1499 (char *)dep - (char *)d)
1500 break;
1501 ptr += libxfs_dir2_data_entsize(mp, dep->namelen);
1502 }
1503
1504 /* did we find an empty or corrupt block? */
1505 if (ptr != endptr) {
1506 if (junkit) {
1507 do_warn(
1508 _("empty data block %u in directory inode %" PRIu64 ": "),
1509 da_bno, ip->i_ino);
1510 } else {
1511 do_warn(_
1512 ("corrupt block %u in directory inode %" PRIu64 ": "),
1513 da_bno, ip->i_ino);
1514 }
1515 if (!no_modify) {
1516 do_warn(_("junking block\n"));
1517 dir2_kill_block(mp, ip, da_bno, bp);
1518 } else {
1519 do_warn(_("would junk block\n"));
1520 }
1521 freetab->ents[db].v = NULLDATAOFF;
1522 return;
1523 }
1524
1525 /* update number of data blocks processed */
1526 if (freetab->nents < db + 1)
1527 freetab->nents = db + 1;
1528
1529 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0, &tp);
1530 if (error)
1531 res_failed(error);
1532 da.trans = tp;
1533 libxfs_trans_ijoin(tp, ip, 0);
1534 libxfs_trans_bjoin(tp, bp);
1535 libxfs_trans_bhold(tp, bp);
1536 if (be32_to_cpu(d->magic) != wantmagic) {
1537 do_warn(
1538 _("bad directory block magic # %#x for directory inode %" PRIu64 " block %d: "),
1539 be32_to_cpu(d->magic), ip->i_ino, da_bno);
1540 if (!no_modify) {
1541 do_warn(_("fixing magic # to %#x\n"), wantmagic);
1542 d->magic = cpu_to_be32(wantmagic);
1543 needlog = 1;
1544 } else
1545 do_warn(_("would fix magic # to %#x\n"), wantmagic);
1546 }
1547 lastfree = 0;
1548 ptr = (char *)d + mp->m_dir_geo->data_entry_offset;
1549 /*
1550 * look at each entry. reference inode pointed to by each
1551 * entry in the incore inode tree.
1552 * if not a directory, set reached flag, increment link count
1553 * if a directory and reached, mark entry as to be deleted.
1554 * if a directory, check to see if recorded parent
1555 * matches current inode #,
1556 * if so, then set reached flag, increment link count
1557 * of current and child dir inodes, push the child
1558 * directory inode onto the directory stack.
1559 * if current inode != parent, then mark entry to be deleted.
1560 */
1561 while (ptr < endptr) {
1562 dup = (xfs_dir2_data_unused_t *)ptr;
1563 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
1564 if (lastfree) {
1565 do_warn(
1566 _("directory inode %" PRIu64 " block %u has consecutive free entries: "),
1567 ip->i_ino, da_bno);
1568 if (!no_modify) {
1569
1570 do_warn(_("joining together\n"));
1571 len = be16_to_cpu(dup->length);
1572 libxfs_dir2_data_use_free(&da, bp, dup,
1573 ptr - (char *)d, len, &needlog,
1574 &needscan);
1575 libxfs_dir2_data_make_free(&da, bp,
1576 ptr - (char *)d, len, &needlog,
1577 &needscan);
1578 } else
1579 do_warn(_("would join together\n"));
1580 }
1581 ptr += be16_to_cpu(dup->length);
1582 lastfree = 1;
1583 continue;
1584 }
1585 addr = xfs_dir2_db_off_to_dataptr(mp->m_dir_geo, db,
1586 ptr - (char *)d);
1587 dep = (xfs_dir2_data_entry_t *)ptr;
1588 ptr += libxfs_dir2_data_entsize(mp, dep->namelen);
1589 inum = be64_to_cpu(dep->inumber);
1590 lastfree = 0;
1591 /*
1592 * skip bogus entries (leading '/'). they'll be deleted
1593 * later. must still log it, else we leak references to
1594 * buffers.
1595 */
1596 if (dep->name[0] == '/') {
1597 nbad++;
1598 if (!no_modify)
1599 libxfs_dir2_data_log_entry(&da, bp, dep);
1600 continue;
1601 }
1602
1603 memmove(fname, dep->name, dep->namelen);
1604 fname[dep->namelen] = '\0';
1605 ASSERT(inum != NULLFSINO);
1606
1607 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, inum),
1608 XFS_INO_TO_AGINO(mp, inum));
1609 if (irec == NULL) {
1610 nbad++;
1611 if (entry_junked(
1612 _("entry \"%s\" in directory inode %" PRIu64 " points to non-existent inode %" PRIu64 ""),
1613 fname, ip->i_ino, inum)) {
1614 dep->name[0] = '/';
1615 libxfs_dir2_data_log_entry(&da, bp, dep);
1616 }
1617 continue;
1618 }
1619 ino_offset = XFS_INO_TO_AGINO(mp, inum) - irec->ino_startnum;
1620
1621 /*
1622 * if it's a free inode, blow out the entry.
1623 * by now, any inode that we think is free
1624 * really is free.
1625 */
1626 if (is_inode_free(irec, ino_offset)) {
1627 nbad++;
1628 if (entry_junked(
1629 _("entry \"%s\" in directory inode %" PRIu64 " points to free inode %" PRIu64),
1630 fname, ip->i_ino, inum)) {
1631 dep->name[0] = '/';
1632 libxfs_dir2_data_log_entry(&da, bp, dep);
1633 }
1634 continue;
1635 }
1636
1637 /*
1638 * check if this inode is lost+found dir in the root
1639 */
1640 if (inum == mp->m_sb.sb_rootino && strcmp(fname, ORPHANAGE) == 0) {
1641 /*
1642 * if it's not a directory, trash it
1643 */
1644 if (!inode_isadir(irec, ino_offset)) {
1645 nbad++;
1646 if (entry_junked(
1647 _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"),
1648 ORPHANAGE, inum, ip->i_ino)) {
1649 dep->name[0] = '/';
1650 libxfs_dir2_data_log_entry(&da, bp, dep);
1651 }
1652 continue;
1653 }
1654 /*
1655 * if this is a dup, it will be picked up below,
1656 * otherwise, mark it as the orphanage for later.
1657 */
1658 if (!orphanage_ino)
1659 orphanage_ino = inum;
1660 }
1661
1662 /*
1663 * check for duplicate names in directory.
1664 */
1665 if (!dir_hash_add(mp, hashtab, addr, inum, dep->namelen,
1666 dep->name, libxfs_dir2_data_get_ftype(mp, dep))) {
1667 nbad++;
1668 if (entry_junked(
1669 _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
1670 fname, inum, ip->i_ino)) {
1671 dep->name[0] = '/';
1672 libxfs_dir2_data_log_entry(&da, bp, dep);
1673 }
1674 if (inum == orphanage_ino)
1675 orphanage_ino = 0;
1676 continue;
1677 }
1678
1679 /*
1680 * if just scanning to rebuild a directory due to a ".."
1681 * update, just continue
1682 */
1683 if (dotdot_update)
1684 continue;
1685
1686 /*
1687 * skip the '..' entry since it's checked when the
1688 * directory is reached by something else. if it never
1689 * gets reached, it'll be moved to the orphanage and we'll
1690 * take care of it then. If it doesn't exist at all, the
1691 * directory needs to be rebuilt first before being added
1692 * to the orphanage.
1693 */
1694 if (dep->namelen == 2 && dep->name[0] == '.' &&
1695 dep->name[1] == '.') {
1696 if (da_bno != 0) {
1697 /* ".." should be in the first block */
1698 nbad++;
1699 if (entry_junked(
1700 _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is not in the the first block"), fname,
1701 inum, ip->i_ino)) {
1702 dep->name[0] = '/';
1703 libxfs_dir2_data_log_entry(&da, bp, dep);
1704 }
1705 }
1706 continue;
1707 }
1708 ASSERT(no_modify || libxfs_verify_dir_ino(mp, inum));
1709 /*
1710 * special case the . entry. we know there's only one
1711 * '.' and only '.' points to itself because bogus entries
1712 * got trashed in phase 3 if there were > 1.
1713 * bump up link count for '.' but don't set reached
1714 * until we're actually reached by another directory
1715 * '..' is already accounted for or will be taken care
1716 * of when directory is moved to orphanage.
1717 */
1718 if (ip->i_ino == inum) {
1719 ASSERT(no_modify ||
1720 (dep->name[0] == '.' && dep->namelen == 1));
1721 add_inode_ref(current_irec, current_ino_offset);
1722 if (da_bno != 0 ||
1723 dep != (void *)d + mp->m_dir_geo->data_entry_offset) {
1724 /* "." should be the first entry */
1725 nbad++;
1726 if (entry_junked(
1727 _("entry \"%s\" in dir %" PRIu64 " is not the first entry"),
1728 fname, inum, ip->i_ino)) {
1729 dep->name[0] = '/';
1730 libxfs_dir2_data_log_entry(&da, bp, dep);
1731 }
1732 }
1733 *need_dot = 0;
1734 continue;
1735 }
1736 /*
1737 * skip entries with bogus inumbers if we're in no modify mode
1738 */
1739 if (no_modify && !libxfs_verify_dir_ino(mp, inum))
1740 continue;
1741
1742 /* validate ftype field if supported */
1743 if (xfs_sb_version_hasftype(&mp->m_sb)) {
1744 uint8_t dir_ftype;
1745 uint8_t ino_ftype;
1746
1747 dir_ftype = libxfs_dir2_data_get_ftype(mp, dep);
1748 ino_ftype = get_inode_ftype(irec, ino_offset);
1749
1750 if (dir_ftype != ino_ftype) {
1751 if (no_modify) {
1752 do_warn(
1753 _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
1754 dir_ftype, ino_ftype,
1755 ip->i_ino, inum);
1756 } else {
1757 do_warn(
1758 _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
1759 dir_ftype, ino_ftype,
1760 ip->i_ino, inum);
1761 libxfs_dir2_data_put_ftype(mp, dep, ino_ftype);
1762 libxfs_dir2_data_log_entry(&da, bp, dep);
1763 dir_hash_update_ftype(hashtab, addr,
1764 ino_ftype);
1765 }
1766 }
1767 }
1768
1769 /*
1770 * check easy case first, regular inode, just bump
1771 * the link count and continue
1772 */
1773 if (!inode_isadir(irec, ino_offset)) {
1774 add_inode_reached(irec, ino_offset);
1775 continue;
1776 }
1777 parent = get_inode_parent(irec, ino_offset);
1778 ASSERT(parent != 0);
1779 junkit = 0;
1780 /*
1781 * bump up the link counts in parent and child
1782 * directory but if the link doesn't agree with
1783 * the .. in the child, blow out the entry.
1784 * if the directory has already been reached,
1785 * blow away the entry also.
1786 */
1787 if (is_inode_reached(irec, ino_offset)) {
1788 junkit = 1;
1789 do_warn(
1790 _("entry \"%s\" in dir %" PRIu64" points to an already connected directory inode %" PRIu64 "\n"),
1791 fname, ip->i_ino, inum);
1792 } else if (parent == ip->i_ino) {
1793 add_inode_reached(irec, ino_offset);
1794 add_inode_ref(current_irec, current_ino_offset);
1795 } else if (parent == NULLFSINO) {
1796 /* ".." was missing, but this entry refers to it,
1797 so, set it as the parent and mark for rebuild */
1798 do_warn(
1799 _("entry \"%s\" in dir ino %" PRIu64 " doesn't have a .. entry, will set it in ino %" PRIu64 ".\n"),
1800 fname, ip->i_ino, inum);
1801 set_inode_parent(irec, ino_offset, ip->i_ino);
1802 add_inode_reached(irec, ino_offset);
1803 add_inode_ref(current_irec, current_ino_offset);
1804 add_dotdot_update(XFS_INO_TO_AGNO(mp, inum), irec,
1805 ino_offset);
1806 } else {
1807 junkit = 1;
1808 do_warn(
1809 _("entry \"%s\" in dir inode %" PRIu64 " inconsistent with .. value (%" PRIu64 ") in ino %" PRIu64 "\n"),
1810 fname, ip->i_ino, parent, inum);
1811 }
1812 if (junkit) {
1813 if (inum == orphanage_ino)
1814 orphanage_ino = 0;
1815 nbad++;
1816 if (!no_modify) {
1817 dep->name[0] = '/';
1818 libxfs_dir2_data_log_entry(&da, bp, dep);
1819 if (verbose)
1820 do_warn(
1821 _("\twill clear entry \"%s\"\n"),
1822 fname);
1823 } else {
1824 do_warn(_("\twould clear entry \"%s\"\n"),
1825 fname);
1826 }
1827 }
1828 }
1829 *num_illegal += nbad;
1830 if (needscan)
1831 libxfs_dir2_data_freescan(mp, d, &i);
1832 if (needlog)
1833 libxfs_dir2_data_log_header(&da, bp);
1834 error = -libxfs_trans_commit(tp);
1835 if (error)
1836 do_error(
1837 _("directory block fixing failed (%d)\n"), error);
1838
1839 /* record the largest free space in the freetab for later checking */
1840 bf = libxfs_dir2_data_bestfree_p(mp, d);
1841 freetab->ents[db].v = be16_to_cpu(bf[0].length);
1842 freetab->ents[db].s = 0;
1843 }
1844
1845 /* check v5 metadata */
1846 static int
1847 __check_dir3_header(
1848 struct xfs_mount *mp,
1849 struct xfs_buf *bp,
1850 xfs_ino_t ino,
1851 __be64 owner,
1852 __be64 blkno,
1853 uuid_t *uuid)
1854 {
1855
1856 /* verify owner */
1857 if (be64_to_cpu(owner) != ino) {
1858 do_warn(
1859 _("expected owner inode %" PRIu64 ", got %llu, directory block %" PRIu64 "\n"),
1860 ino, (unsigned long long)be64_to_cpu(owner), bp->b_bn);
1861 return 1;
1862 }
1863 /* verify block number */
1864 if (be64_to_cpu(blkno) != bp->b_bn) {
1865 do_warn(
1866 _("expected block %" PRIu64 ", got %llu, directory inode %" PRIu64 "\n"),
1867 bp->b_bn, (unsigned long long)be64_to_cpu(blkno), ino);
1868 return 1;
1869 }
1870 /* verify uuid */
1871 if (platform_uuid_compare(uuid, &mp->m_sb.sb_meta_uuid) != 0) {
1872 do_warn(
1873 _("wrong FS UUID, directory inode %" PRIu64 " block %" PRIu64 "\n"),
1874 ino, bp->b_bn);
1875 return 1;
1876 }
1877
1878 return 0;
1879 }
1880
1881 static int
1882 check_da3_header(
1883 struct xfs_mount *mp,
1884 struct xfs_buf *bp,
1885 xfs_ino_t ino)
1886 {
1887 struct xfs_da3_blkinfo *info = bp->b_addr;
1888
1889 return __check_dir3_header(mp, bp, ino, info->owner, info->blkno,
1890 &info->uuid);
1891 }
1892
1893 static int
1894 check_dir3_header(
1895 struct xfs_mount *mp,
1896 struct xfs_buf *bp,
1897 xfs_ino_t ino)
1898 {
1899 struct xfs_dir3_blk_hdr *info = bp->b_addr;
1900
1901 return __check_dir3_header(mp, bp, ino, info->owner, info->blkno,
1902 &info->uuid);
1903 }
1904
1905 /*
1906 * Check contents of leaf-form block.
1907 */
1908 static int
1909 longform_dir2_check_leaf(
1910 struct xfs_mount *mp,
1911 struct xfs_inode *ip,
1912 struct dir_hash_tab *hashtab,
1913 struct freetab *freetab)
1914 {
1915 int badtail;
1916 __be16 *bestsp;
1917 struct xfs_buf *bp;
1918 xfs_dablk_t da_bno;
1919 int i;
1920 xfs_dir2_leaf_t *leaf;
1921 xfs_dir2_leaf_tail_t *ltp;
1922 int seeval;
1923 struct xfs_dir2_leaf_entry *ents;
1924 struct xfs_dir3_icleaf_hdr leafhdr;
1925 int error;
1926 int fixit = 0;
1927
1928 da_bno = mp->m_dir_geo->leafblk;
1929 error = dir_read_buf(ip, da_bno, &bp, &xfs_dir3_leaf1_buf_ops, &fixit);
1930 if (error == EFSBADCRC || error == EFSCORRUPTED || fixit) {
1931 do_warn(
1932 _("leaf block %u for directory inode %" PRIu64 " bad CRC\n"),
1933 da_bno, ip->i_ino);
1934 return 1;
1935 } else if (error) {
1936 do_error(
1937 _("can't read block %u for directory inode %" PRIu64 ", error %d\n"),
1938 da_bno, ip->i_ino, error);
1939 /* NOTREACHED */
1940 }
1941
1942 leaf = bp->b_addr;
1943 libxfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, leaf);
1944 ents = leafhdr.ents;
1945 ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, leaf);
1946 bestsp = xfs_dir2_leaf_bests_p(ltp);
1947 if (!(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
1948 leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) ||
1949 leafhdr.forw || leafhdr.back ||
1950 leafhdr.count < leafhdr.stale ||
1951 leafhdr.count > mp->m_dir_geo->leaf_max_ents ||
1952 (char *)&ents[leafhdr.count] > (char *)bestsp) {
1953 do_warn(
1954 _("leaf block %u for directory inode %" PRIu64 " bad header\n"),
1955 da_bno, ip->i_ino);
1956 libxfs_buf_relse(bp);
1957 return 1;
1958 }
1959
1960 if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {
1961 error = check_da3_header(mp, bp, ip->i_ino);
1962 if (error) {
1963 libxfs_buf_relse(bp);
1964 return error;
1965 }
1966 }
1967
1968 seeval = dir_hash_see_all(hashtab, ents, leafhdr.count, leafhdr.stale);
1969 if (dir_hash_check(hashtab, ip, seeval)) {
1970 libxfs_buf_relse(bp);
1971 return 1;
1972 }
1973 badtail = freetab->nents != be32_to_cpu(ltp->bestcount);
1974 for (i = 0; !badtail && i < be32_to_cpu(ltp->bestcount); i++) {
1975 freetab->ents[i].s = 1;
1976 badtail = freetab->ents[i].v != be16_to_cpu(bestsp[i]);
1977 }
1978 if (badtail) {
1979 do_warn(
1980 _("leaf block %u for directory inode %" PRIu64 " bad tail\n"),
1981 da_bno, ip->i_ino);
1982 libxfs_buf_relse(bp);
1983 return 1;
1984 }
1985 libxfs_buf_relse(bp);
1986 return fixit;
1987 }
1988
1989 /*
1990 * Check contents of the node blocks (leaves)
1991 * Looks for matching hash values for the data entries.
1992 */
1993 static int
1994 longform_dir2_check_node(
1995 struct xfs_mount *mp,
1996 struct xfs_inode *ip,
1997 struct dir_hash_tab *hashtab,
1998 struct freetab *freetab)
1999 {
2000 struct xfs_buf *bp;
2001 xfs_dablk_t da_bno;
2002 xfs_dir2_db_t fdb;
2003 xfs_dir2_free_t *free;
2004 int i;
2005 xfs_dir2_leaf_t *leaf;
2006 xfs_fileoff_t next_da_bno;
2007 int seeval = 0;
2008 int used;
2009 struct xfs_dir2_leaf_entry *ents;
2010 struct xfs_dir3_icleaf_hdr leafhdr;
2011 struct xfs_dir3_icfree_hdr freehdr;
2012 __be16 *bests;
2013 int error;
2014 int fixit = 0;
2015
2016 for (da_bno = mp->m_dir_geo->leafblk, next_da_bno = 0;
2017 next_da_bno != NULLFILEOFF && da_bno < mp->m_dir_geo->freeblk;
2018 da_bno = (xfs_dablk_t)next_da_bno) {
2019 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2020 if (bmap_next_offset(ip, &next_da_bno))
2021 break;
2022
2023 /*
2024 * we need to use the da3 node verifier here as it handles the
2025 * fact that reading the leaf hash tree blocks can return either
2026 * leaf or node blocks and calls the correct verifier. If we get
2027 * a node block, then we'll skip it below based on a magic
2028 * number check.
2029 */
2030 error = dir_read_buf(ip, da_bno, &bp, &xfs_da3_node_buf_ops,
2031 &fixit);
2032 if (error) {
2033 do_warn(
2034 _("can't read leaf block %u for directory inode %" PRIu64 ", error %d\n"),
2035 da_bno, ip->i_ino, error);
2036 return 1;
2037 }
2038 leaf = bp->b_addr;
2039 libxfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, leaf);
2040 ents = leafhdr.ents;
2041 if (!(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
2042 leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
2043 leafhdr.magic == XFS_DA_NODE_MAGIC ||
2044 leafhdr.magic == XFS_DA3_NODE_MAGIC)) {
2045 do_warn(
2046 _("unknown magic number %#x for block %u in directory inode %" PRIu64 "\n"),
2047 leafhdr.magic, da_bno, ip->i_ino);
2048 libxfs_buf_relse(bp);
2049 return 1;
2050 }
2051
2052 /* check v5 metadata */
2053 if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
2054 leafhdr.magic == XFS_DA3_NODE_MAGIC) {
2055 error = check_da3_header(mp, bp, ip->i_ino);
2056 if (error) {
2057 libxfs_buf_relse(bp);
2058 return error;
2059 }
2060 }
2061
2062 /* ignore nodes */
2063 if (leafhdr.magic == XFS_DA_NODE_MAGIC ||
2064 leafhdr.magic == XFS_DA3_NODE_MAGIC) {
2065 libxfs_buf_relse(bp);
2066 continue;
2067 }
2068
2069 /*
2070 * If there's a validator error, we need to ensure that we got
2071 * the right ops on the buffer for when we write it back out.
2072 */
2073 bp->b_ops = &xfs_dir3_leafn_buf_ops;
2074 if (leafhdr.count > mp->m_dir_geo->leaf_max_ents ||
2075 leafhdr.count < leafhdr.stale) {
2076 do_warn(
2077 _("leaf block %u for directory inode %" PRIu64 " bad header\n"),
2078 da_bno, ip->i_ino);
2079 libxfs_buf_relse(bp);
2080 return 1;
2081 }
2082 seeval = dir_hash_see_all(hashtab, ents,
2083 leafhdr.count, leafhdr.stale);
2084 libxfs_buf_relse(bp);
2085 if (seeval != DIR_HASH_CK_OK)
2086 return 1;
2087 }
2088 if (dir_hash_check(hashtab, ip, seeval))
2089 return 1;
2090
2091 for (da_bno = mp->m_dir_geo->freeblk, next_da_bno = 0;
2092 next_da_bno != NULLFILEOFF;
2093 da_bno = (xfs_dablk_t)next_da_bno) {
2094 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2095 if (bmap_next_offset(ip, &next_da_bno))
2096 break;
2097
2098 error = dir_read_buf(ip, da_bno, &bp, &xfs_dir3_free_buf_ops,
2099 &fixit);
2100 if (error) {
2101 do_warn(
2102 _("can't read freespace block %u for directory inode %" PRIu64 ", error %d\n"),
2103 da_bno, ip->i_ino, error);
2104 return 1;
2105 }
2106 free = bp->b_addr;
2107 libxfs_dir2_free_hdr_from_disk(mp, &freehdr, free);
2108 bests = freehdr.bests;
2109 fdb = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
2110 if (!(freehdr.magic == XFS_DIR2_FREE_MAGIC ||
2111 freehdr.magic == XFS_DIR3_FREE_MAGIC) ||
2112 freehdr.firstdb !=
2113 (fdb - xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) *
2114 mp->m_dir_geo->free_max_bests ||
2115 freehdr.nvalid < freehdr.nused) {
2116 do_warn(
2117 _("free block %u for directory inode %" PRIu64 " bad header\n"),
2118 da_bno, ip->i_ino);
2119 libxfs_buf_relse(bp);
2120 return 1;
2121 }
2122
2123 if (freehdr.magic == XFS_DIR3_FREE_MAGIC) {
2124 error = check_dir3_header(mp, bp, ip->i_ino);
2125 if (error) {
2126 libxfs_buf_relse(bp);
2127 return error;
2128 }
2129 }
2130 for (i = used = 0; i < freehdr.nvalid; i++) {
2131 if (i + freehdr.firstdb >= freetab->nents ||
2132 freetab->ents[i + freehdr.firstdb].v !=
2133 be16_to_cpu(bests[i])) {
2134 do_warn(
2135 _("free block %u entry %i for directory ino %" PRIu64 " bad\n"),
2136 da_bno, i, ip->i_ino);
2137 libxfs_buf_relse(bp);
2138 return 1;
2139 }
2140 used += be16_to_cpu(bests[i]) != NULLDATAOFF;
2141 freetab->ents[i + freehdr.firstdb].s = 1;
2142 }
2143 if (used != freehdr.nused) {
2144 do_warn(
2145 _("free block %u for directory inode %" PRIu64 " bad nused\n"),
2146 da_bno, ip->i_ino);
2147 libxfs_buf_relse(bp);
2148 return 1;
2149 }
2150 libxfs_buf_relse(bp);
2151 }
2152 for (i = 0; i < freetab->nents; i++) {
2153 if ((freetab->ents[i].s == 0) &&
2154 (freetab->ents[i].v != NULLDATAOFF)) {
2155 do_warn(
2156 _("missing freetab entry %u for directory inode %" PRIu64 "\n"),
2157 i, ip->i_ino);
2158 return 1;
2159 }
2160 }
2161 return fixit;
2162 }
2163
2164 /*
2165 * If a directory is corrupt, we need to read in as many entries as possible,
2166 * destroy the entry and create a new one with recovered name/inode pairs.
2167 * (ie. get libxfs to do all the grunt work)
2168 */
2169 static void
2170 longform_dir2_entry_check(
2171 struct xfs_mount *mp,
2172 xfs_ino_t ino,
2173 struct xfs_inode *ip,
2174 int *num_illegal,
2175 int *need_dot,
2176 struct ino_tree_node *irec,
2177 int ino_offset,
2178 struct dir_hash_tab *hashtab)
2179 {
2180 struct xfs_buf *bp;
2181 xfs_dablk_t da_bno;
2182 freetab_t *freetab;
2183 int i;
2184 int isblock;
2185 int isleaf;
2186 xfs_fileoff_t next_da_bno;
2187 int seeval;
2188 int fixit = 0;
2189 struct xfs_da_args args;
2190
2191 *need_dot = 1;
2192 freetab = malloc(FREETAB_SIZE(ip->i_d.di_size / mp->m_dir_geo->blksize));
2193 if (!freetab) {
2194 do_error(_("malloc failed in %s (%" PRId64 " bytes)\n"),
2195 __func__,
2196 FREETAB_SIZE(ip->i_d.di_size / mp->m_dir_geo->blksize));
2197 exit(1);
2198 }
2199 freetab->naents = ip->i_d.di_size / mp->m_dir_geo->blksize;
2200 freetab->nents = 0;
2201 for (i = 0; i < freetab->naents; i++) {
2202 freetab->ents[i].v = NULLDATAOFF;
2203 freetab->ents[i].s = 0;
2204 }
2205
2206 /* is this a block, leaf, or node directory? */
2207 args.dp = ip;
2208 args.geo = mp->m_dir_geo;
2209 libxfs_dir2_isblock(&args, &isblock);
2210 libxfs_dir2_isleaf(&args, &isleaf);
2211
2212 /* check directory "data" blocks (ie. name/inode pairs) */
2213 for (da_bno = 0, next_da_bno = 0;
2214 next_da_bno != NULLFILEOFF && da_bno < mp->m_dir_geo->leafblk;
2215 da_bno = (xfs_dablk_t)next_da_bno) {
2216 const struct xfs_buf_ops *ops;
2217 int error;
2218 struct xfs_dir2_data_hdr *d;
2219
2220 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2221 if (bmap_next_offset(ip, &next_da_bno)) {
2222 /*
2223 * if this is the first block, there isn't anything we
2224 * can recover so we just trash it.
2225 */
2226 if (da_bno == 0) {
2227 fixit++;
2228 goto out_fix;
2229 }
2230 break;
2231 }
2232
2233 if (isblock)
2234 ops = &xfs_dir3_block_buf_ops;
2235 else
2236 ops = &xfs_dir3_data_buf_ops;
2237
2238 error = dir_read_buf(ip, da_bno, &bp, ops, &fixit);
2239 if (error) {
2240 do_warn(
2241 _("can't read data block %u for directory inode %" PRIu64 " error %d\n"),
2242 da_bno, ino, error);
2243 *num_illegal += 1;
2244
2245 /*
2246 * we try to read all "data" blocks, but if we are in
2247 * block form and we fail, there isn't anything else to
2248 * read, and nothing we can do but trash it.
2249 */
2250 if (isblock) {
2251 fixit++;
2252 goto out_fix;
2253 }
2254 continue;
2255 }
2256
2257 /* check v5 metadata */
2258 d = bp->b_addr;
2259 if (be32_to_cpu(d->magic) == XFS_DIR3_BLOCK_MAGIC ||
2260 be32_to_cpu(d->magic) == XFS_DIR3_DATA_MAGIC) {
2261 error = check_dir3_header(mp, bp, ino);
2262 if (error) {
2263 fixit++;
2264 if (isblock)
2265 goto out_fix;
2266 continue;
2267 }
2268 }
2269
2270 longform_dir2_entry_check_data(mp, ip, num_illegal, need_dot,
2271 irec, ino_offset, bp, hashtab,
2272 &freetab, da_bno, isblock);
2273 if (isblock)
2274 break;
2275
2276 libxfs_buf_relse(bp);
2277 }
2278 fixit |= (*num_illegal != 0) || dir2_is_badino(ino) || *need_dot;
2279
2280 if (!dotdot_update) {
2281 /* check btree and freespace */
2282 if (isblock) {
2283 struct xfs_dir2_data_hdr *block;
2284 xfs_dir2_block_tail_t *btp;
2285 xfs_dir2_leaf_entry_t *blp;
2286
2287 block = bp->b_addr;
2288 btp = xfs_dir2_block_tail_p(mp->m_dir_geo, block);
2289 blp = xfs_dir2_block_leaf_p(btp);
2290 seeval = dir_hash_see_all(hashtab, blp,
2291 be32_to_cpu(btp->count),
2292 be32_to_cpu(btp->stale));
2293 if (dir_hash_check(hashtab, ip, seeval))
2294 fixit |= 1;
2295 } else if (isleaf) {
2296 fixit |= longform_dir2_check_leaf(mp, ip, hashtab,
2297 freetab);
2298 } else {
2299 fixit |= longform_dir2_check_node(mp, ip, hashtab,
2300 freetab);
2301 }
2302 }
2303 out_fix:
2304 if (isblock && bp)
2305 libxfs_buf_relse(bp);
2306
2307 if (!no_modify && (fixit || dotdot_update)) {
2308 longform_dir2_rebuild(mp, ino, ip, irec, ino_offset, hashtab);
2309 *num_illegal = 0;
2310 *need_dot = 0;
2311 } else {
2312 if (fixit || dotdot_update)
2313 do_warn(
2314 _("would rebuild directory inode %" PRIu64 "\n"), ino);
2315 }
2316
2317 free(freetab);
2318 }
2319
2320 /*
2321 * shortform directory v2 processing routines -- entry verification and
2322 * bad entry deletion (pruning).
2323 */
2324 static struct xfs_dir2_sf_entry *
2325 shortform_dir2_junk(
2326 struct xfs_mount *mp,
2327 struct xfs_dir2_sf_hdr *sfp,
2328 struct xfs_dir2_sf_entry *sfep,
2329 xfs_ino_t lino,
2330 int *max_size,
2331 int *index,
2332 int *bytes_deleted,
2333 int *ino_dirty)
2334 {
2335 struct xfs_dir2_sf_entry *next_sfep;
2336 int next_len;
2337 int next_elen;
2338
2339 if (lino == orphanage_ino)
2340 orphanage_ino = 0;
2341
2342 next_elen = libxfs_dir2_sf_entsize(mp, sfp, sfep->namelen);
2343 next_sfep = libxfs_dir2_sf_nextentry(mp, sfp, sfep);
2344
2345 /*
2346 * if we are just checking, simply return the pointer to the next entry
2347 * here so that the checking loop can continue.
2348 */
2349 if (no_modify) {
2350 do_warn(_("would junk entry\n"));
2351 return next_sfep;
2352 }
2353
2354 /*
2355 * now move all the remaining entries down over the junked entry and
2356 * clear the newly unused bytes at the tail of the directory region.
2357 */
2358 next_len = *max_size - ((intptr_t)next_sfep - (intptr_t)sfp);
2359 *max_size -= next_elen;
2360 *bytes_deleted += next_elen;
2361
2362 memmove(sfep, next_sfep, next_len);
2363 memset((void *)((intptr_t)sfep + next_len), 0, next_elen);
2364 sfp->count -= 1;
2365 *ino_dirty = 1;
2366
2367 /*
2368 * WARNING: drop the index i by one so it matches the decremented count
2369 * for accurate comparisons in the loop test
2370 */
2371 (*index)--;
2372
2373 if (verbose)
2374 do_warn(_("junking entry\n"));
2375 else
2376 do_warn("\n");
2377 return sfep;
2378 }
2379
2380 static void
2381 shortform_dir2_entry_check(
2382 struct xfs_mount *mp,
2383 xfs_ino_t ino,
2384 struct xfs_inode *ip,
2385 int *ino_dirty,
2386 struct ino_tree_node *current_irec,
2387 int current_ino_offset,
2388 struct dir_hash_tab *hashtab)
2389 {
2390 xfs_ino_t lino;
2391 xfs_ino_t parent;
2392 struct xfs_dir2_sf_hdr *sfp;
2393 struct xfs_dir2_sf_entry *sfep;
2394 struct xfs_dir2_sf_entry *next_sfep;
2395 struct xfs_ifork *ifp;
2396 struct ino_tree_node *irec;
2397 int max_size;
2398 int ino_offset;
2399 int i;
2400 int bad_sfnamelen;
2401 int namelen;
2402 int bytes_deleted;
2403 char fname[MAXNAMELEN + 1];
2404 int i8;
2405
2406 ifp = &ip->i_df;
2407 sfp = (struct xfs_dir2_sf_hdr *) ifp->if_u1.if_data;
2408 *ino_dirty = 0;
2409 bytes_deleted = 0;
2410
2411 max_size = ifp->if_bytes;
2412 ASSERT(ip->i_d.di_size <= ifp->if_bytes);
2413
2414 /*
2415 * if just rebuild a directory due to a "..", update and return
2416 */
2417 if (dotdot_update) {
2418 parent = get_inode_parent(current_irec, current_ino_offset);
2419 if (no_modify) {
2420 do_warn(
2421 _("would set .. in sf dir inode %" PRIu64 " to %" PRIu64 "\n"),
2422 ino, parent);
2423 } else {
2424 do_warn(
2425 _("setting .. in sf dir inode %" PRIu64 " to %" PRIu64 "\n"),
2426 ino, parent);
2427 libxfs_dir2_sf_put_parent_ino(sfp, parent);
2428 *ino_dirty = 1;
2429 }
2430 return;
2431 }
2432
2433 /*
2434 * no '.' entry in shortform dirs, just bump up ref count by 1
2435 * '..' was already (or will be) accounted for and checked when
2436 * the directory is reached or will be taken care of when the
2437 * directory is moved to orphanage.
2438 */
2439 add_inode_ref(current_irec, current_ino_offset);
2440
2441 /*
2442 * Initialise i8 counter -- the parent inode number counts as well.
2443 */
2444 i8 = libxfs_dir2_sf_get_parent_ino(sfp) > XFS_DIR2_MAX_SHORT_INUM;
2445
2446 /*
2447 * now run through entries, stop at first bad entry, don't need
2448 * to skip over '..' since that's encoded in its own field and
2449 * no need to worry about '.' since it doesn't exist.
2450 */
2451 sfep = next_sfep = xfs_dir2_sf_firstentry(sfp);
2452
2453 for (i = 0; i < sfp->count && max_size >
2454 (intptr_t)next_sfep - (intptr_t)sfp;
2455 sfep = next_sfep, i++) {
2456 bad_sfnamelen = 0;
2457
2458 lino = libxfs_dir2_sf_get_ino(mp, sfp, sfep);
2459
2460 namelen = sfep->namelen;
2461
2462 ASSERT(no_modify || namelen > 0);
2463
2464 if (no_modify && namelen == 0) {
2465 /*
2466 * if we're really lucky, this is
2467 * the last entry in which case we
2468 * can use the dir size to set the
2469 * namelen value. otherwise, forget
2470 * it because we're not going to be
2471 * able to find the next entry.
2472 */
2473 bad_sfnamelen = 1;
2474
2475 if (i == sfp->count - 1) {
2476 namelen = ip->i_d.di_size -
2477 ((intptr_t) &sfep->name[0] -
2478 (intptr_t) sfp);
2479 } else {
2480 /*
2481 * don't process the rest of the directory,
2482 * break out of processing loop
2483 */
2484 break;
2485 }
2486 } else if (no_modify && (intptr_t) sfep - (intptr_t) sfp +
2487 + libxfs_dir2_sf_entsize(mp, sfp, sfep->namelen)
2488 > ip->i_d.di_size) {
2489 bad_sfnamelen = 1;
2490
2491 if (i == sfp->count - 1) {
2492 namelen = ip->i_d.di_size -
2493 ((intptr_t) &sfep->name[0] -
2494 (intptr_t) sfp);
2495 } else {
2496 /*
2497 * don't process the rest of the directory,
2498 * break out of processing loop
2499 */
2500 break;
2501 }
2502 }
2503
2504 memmove(fname, sfep->name, sfep->namelen);
2505 fname[sfep->namelen] = '\0';
2506
2507 ASSERT(no_modify || (lino != NULLFSINO && lino != 0));
2508 ASSERT(no_modify || libxfs_verify_dir_ino(mp, lino));
2509
2510 /*
2511 * Also skip entries with bogus inode numbers if we're
2512 * in no modify mode.
2513 */
2514
2515 if (no_modify && !libxfs_verify_dir_ino(mp, lino)) {
2516 next_sfep = libxfs_dir2_sf_nextentry(mp, sfp, sfep);
2517 continue;
2518 }
2519
2520 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, lino),
2521 XFS_INO_TO_AGINO(mp, lino));
2522
2523 if (irec == NULL) {
2524 do_warn(
2525 _("entry \"%s\" in shortform directory %" PRIu64 " references non-existent inode %" PRIu64 "\n"),
2526 fname, ino, lino);
2527 next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2528 &max_size, &i, &bytes_deleted,
2529 ino_dirty);
2530 continue;
2531 }
2532
2533 ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum;
2534
2535 /*
2536 * if it's a free inode, blow out the entry.
2537 * by now, any inode that we think is free
2538 * really is free.
2539 */
2540 if (is_inode_free(irec, ino_offset)) {
2541 do_warn(
2542 _("entry \"%s\" in shortform directory inode %" PRIu64 " points to free inode %" PRIu64 "\n"),
2543 fname, ino, lino);
2544 next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2545 &max_size, &i, &bytes_deleted,
2546 ino_dirty);
2547 continue;
2548 }
2549 /*
2550 * check if this inode is lost+found dir in the root
2551 */
2552 if (ino == mp->m_sb.sb_rootino && strcmp(fname, ORPHANAGE) == 0) {
2553 /*
2554 * if it's not a directory, trash it
2555 */
2556 if (!inode_isadir(irec, ino_offset)) {
2557 do_warn(
2558 _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"),
2559 ORPHANAGE, lino, ino);
2560 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2561 lino, &max_size, &i,
2562 &bytes_deleted, ino_dirty);
2563 continue;
2564 }
2565 /*
2566 * if this is a dup, it will be picked up below,
2567 * otherwise, mark it as the orphanage for later.
2568 */
2569 if (!orphanage_ino)
2570 orphanage_ino = lino;
2571 }
2572 /*
2573 * check for duplicate names in directory.
2574 */
2575 if (!dir_hash_add(mp, hashtab, (xfs_dir2_dataptr_t)
2576 (sfep - xfs_dir2_sf_firstentry(sfp)),
2577 lino, sfep->namelen, sfep->name,
2578 libxfs_dir2_sf_get_ftype(mp, sfep))) {
2579 do_warn(
2580 _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
2581 fname, lino, ino);
2582 next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2583 &max_size, &i, &bytes_deleted,
2584 ino_dirty);
2585 continue;
2586 }
2587
2588 if (!inode_isadir(irec, ino_offset)) {
2589 /*
2590 * check easy case first, regular inode, just bump
2591 * the link count
2592 */
2593 add_inode_reached(irec, ino_offset);
2594 } else {
2595 parent = get_inode_parent(irec, ino_offset);
2596
2597 /*
2598 * bump up the link counts in parent and child.
2599 * directory but if the link doesn't agree with
2600 * the .. in the child, blow out the entry
2601 */
2602 if (is_inode_reached(irec, ino_offset)) {
2603 do_warn(
2604 _("entry \"%s\" in directory inode %" PRIu64
2605 " references already connected inode %" PRIu64 ".\n"),
2606 fname, ino, lino);
2607 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2608 lino, &max_size, &i,
2609 &bytes_deleted, ino_dirty);
2610 continue;
2611 } else if (parent == ino) {
2612 add_inode_reached(irec, ino_offset);
2613 add_inode_ref(current_irec, current_ino_offset);
2614 } else if (parent == NULLFSINO) {
2615 /* ".." was missing, but this entry refers to it,
2616 so, set it as the parent and mark for rebuild */
2617 do_warn(
2618 _("entry \"%s\" in dir ino %" PRIu64 " doesn't have a .. entry, will set it in ino %" PRIu64 ".\n"),
2619 fname, ino, lino);
2620 set_inode_parent(irec, ino_offset, ino);
2621 add_inode_reached(irec, ino_offset);
2622 add_inode_ref(current_irec, current_ino_offset);
2623 add_dotdot_update(XFS_INO_TO_AGNO(mp, lino),
2624 irec, ino_offset);
2625 } else {
2626 do_warn(
2627 _("entry \"%s\" in directory inode %" PRIu64
2628 " not consistent with .. value (%" PRIu64
2629 ") in inode %" PRIu64 ",\n"),
2630 fname, ino, parent, lino);
2631 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2632 lino, &max_size, &i,
2633 &bytes_deleted, ino_dirty);
2634 continue;
2635 }
2636 }
2637
2638 /* validate ftype field if supported */
2639 if (xfs_sb_version_hasftype(&mp->m_sb)) {
2640 uint8_t dir_ftype;
2641 uint8_t ino_ftype;
2642
2643 dir_ftype = libxfs_dir2_sf_get_ftype(mp, sfep);
2644 ino_ftype = get_inode_ftype(irec, ino_offset);
2645
2646 if (dir_ftype != ino_ftype) {
2647 if (no_modify) {
2648 do_warn(
2649 _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
2650 dir_ftype, ino_ftype,
2651 ino, lino);
2652 } else {
2653 do_warn(
2654 _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
2655 dir_ftype, ino_ftype,
2656 ino, lino);
2657 libxfs_dir2_sf_put_ftype(mp, sfep,
2658 ino_ftype);
2659 dir_hash_update_ftype(hashtab,
2660 (xfs_dir2_dataptr_t)(sfep - xfs_dir2_sf_firstentry(sfp)),
2661 ino_ftype);
2662 *ino_dirty = 1;
2663 }
2664 }
2665 }
2666
2667 if (lino > XFS_DIR2_MAX_SHORT_INUM)
2668 i8++;
2669
2670 /*
2671 * go onto next entry - we have to take entries with bad namelen
2672 * into account in no modify mode since we calculate size based
2673 * on next_sfep.
2674 */
2675 ASSERT(no_modify || bad_sfnamelen == 0);
2676 next_sfep = (struct xfs_dir2_sf_entry *)((intptr_t)sfep +
2677 (bad_sfnamelen
2678 ? libxfs_dir2_sf_entsize(mp, sfp, namelen)
2679 : libxfs_dir2_sf_entsize(mp, sfp, sfep->namelen)));
2680 }
2681
2682 if (sfp->i8count != i8) {
2683 if (no_modify) {
2684 do_warn(_("would fix i8count in inode %" PRIu64 "\n"),
2685 ino);
2686 } else {
2687 if (i8 == 0) {
2688 struct xfs_dir2_sf_entry *tmp_sfep;
2689
2690 tmp_sfep = next_sfep;
2691 process_sf_dir2_fixi8(mp, sfp, &tmp_sfep);
2692 bytes_deleted +=
2693 (intptr_t)next_sfep -
2694 (intptr_t)tmp_sfep;
2695 next_sfep = tmp_sfep;
2696 } else
2697 sfp->i8count = i8;
2698 *ino_dirty = 1;
2699 do_warn(_("fixing i8count in inode %" PRIu64 "\n"),
2700 ino);
2701 }
2702 }
2703
2704 /*
2705 * sync up sizes if required
2706 */
2707 if (*ino_dirty && bytes_deleted > 0) {
2708 ASSERT(!no_modify);
2709 libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK);
2710 ip->i_d.di_size -= bytes_deleted;
2711 }
2712
2713 if (ip->i_d.di_size != ip->i_df.if_bytes) {
2714 ASSERT(ip->i_df.if_bytes == (xfs_fsize_t)
2715 ((intptr_t) next_sfep - (intptr_t) sfp));
2716 ip->i_d.di_size = (xfs_fsize_t)
2717 ((intptr_t) next_sfep - (intptr_t) sfp);
2718 do_warn(
2719 _("setting size to %" PRId64 " bytes to reflect junked entries\n"),
2720 ip->i_d.di_size);
2721 *ino_dirty = 1;
2722 }
2723 }
2724
2725 /*
2726 * processes all reachable inodes in directories
2727 */
2728 static void
2729 process_dir_inode(
2730 struct xfs_mount *mp,
2731 xfs_agnumber_t agno,
2732 struct ino_tree_node *irec,
2733 int ino_offset)
2734 {
2735 xfs_ino_t ino;
2736 struct xfs_inode *ip;
2737 struct xfs_trans *tp;
2738 struct dir_hash_tab *hashtab;
2739 int need_dot;
2740 int dirty, num_illegal, error, nres;
2741
2742 ino = XFS_AGINO_TO_INO(mp, agno, irec->ino_startnum + ino_offset);
2743
2744 /*
2745 * open up directory inode, check all entries,
2746 * then call prune_dir_entries to remove all
2747 * remaining illegal directory entries.
2748 */
2749
2750 ASSERT(!is_inode_refchecked(irec, ino_offset) || dotdot_update);
2751
2752 error = -libxfs_iget(mp, NULL, ino, 0, &ip);
2753 if (error) {
2754 if (!no_modify)
2755 do_error(
2756 _("couldn't map inode %" PRIu64 ", err = %d\n"),
2757 ino, error);
2758 else {
2759 do_warn(
2760 _("couldn't map inode %" PRIu64 ", err = %d\n"),
2761 ino, error);
2762 /*
2763 * see below for what we're doing if this
2764 * is root. Why do we need to do this here?
2765 * to ensure that the root doesn't show up
2766 * as being disconnected in the no_modify case.
2767 */
2768 if (mp->m_sb.sb_rootino == ino) {
2769 add_inode_reached(irec, 0);
2770 add_inode_ref(irec, 0);
2771 }
2772 }
2773
2774 add_inode_refchecked(irec, 0);
2775 return;
2776 }
2777
2778 need_dot = dirty = num_illegal = 0;
2779
2780 if (mp->m_sb.sb_rootino == ino) {
2781 /*
2782 * mark root inode reached and bump up
2783 * link count for root inode to account
2784 * for '..' entry since the root inode is
2785 * never reached by a parent. we know
2786 * that root's '..' is always good --
2787 * guaranteed by phase 3 and/or below.
2788 */
2789 add_inode_reached(irec, ino_offset);
2790 }
2791
2792 add_inode_refchecked(irec, ino_offset);
2793
2794 hashtab = dir_hash_init(ip->i_d.di_size);
2795
2796 /*
2797 * look for bogus entries
2798 */
2799 switch (ip->i_df.if_format) {
2800 case XFS_DINODE_FMT_EXTENTS:
2801 case XFS_DINODE_FMT_BTREE:
2802 /*
2803 * also check for missing '.' in longform dirs.
2804 * missing .. entries are added if required when
2805 * the directory is connected to lost+found. but
2806 * we need to create '.' entries here.
2807 */
2808 longform_dir2_entry_check(mp, ino, ip,
2809 &num_illegal, &need_dot,
2810 irec, ino_offset,
2811 hashtab);
2812 break;
2813
2814 case XFS_DINODE_FMT_LOCAL:
2815 /*
2816 * using the remove reservation is overkill
2817 * since at most we'll only need to log the
2818 * inode but it's easier than wedging a
2819 * new define in ourselves.
2820 */
2821 nres = no_modify ? 0 : XFS_REMOVE_SPACE_RES(mp);
2822 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove,
2823 nres, 0, 0, &tp);
2824 if (error)
2825 res_failed(error);
2826
2827 libxfs_trans_ijoin(tp, ip, 0);
2828
2829 shortform_dir2_entry_check(mp, ino, ip, &dirty,
2830 irec, ino_offset,
2831 hashtab);
2832
2833 ASSERT(dirty == 0 || (dirty && !no_modify));
2834 if (dirty) {
2835 libxfs_trans_log_inode(tp, ip,
2836 XFS_ILOG_CORE | XFS_ILOG_DDATA);
2837 error = -libxfs_trans_commit(tp);
2838 if (error)
2839 do_error(
2840 _("error %d fixing shortform directory %llu\n"),
2841 error,
2842 (unsigned long long)ip->i_ino);
2843 } else {
2844 libxfs_trans_cancel(tp);
2845 }
2846 break;
2847
2848 default:
2849 break;
2850 }
2851 dir_hash_done(hashtab);
2852
2853 /*
2854 * if we have to create a .. for /, do it now *before*
2855 * we delete the bogus entries, otherwise the directory
2856 * could transform into a shortform dir which would
2857 * probably cause the simulation to choke. Even
2858 * if the illegal entries get shifted around, it's ok
2859 * because the entries are structurally intact and in
2860 * in hash-value order so the simulation won't get confused
2861 * if it has to move them around.
2862 */
2863 if (!no_modify && need_root_dotdot && ino == mp->m_sb.sb_rootino) {
2864 ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_LOCAL);
2865
2866 do_warn(_("recreating root directory .. entry\n"));
2867
2868 nres = XFS_MKDIR_SPACE_RES(mp, 2);
2869 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir,
2870 nres, 0, 0, &tp);
2871 if (error)
2872 res_failed(error);
2873
2874 libxfs_trans_ijoin(tp, ip, 0);
2875
2876 error = -libxfs_dir_createname(tp, ip, &xfs_name_dotdot,
2877 ip->i_ino, nres);
2878 if (error)
2879 do_error(
2880 _("can't make \"..\" entry in root inode %" PRIu64 ", createname error %d\n"), ino, error);
2881
2882 libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2883 error = -libxfs_trans_commit(tp);
2884 if (error)
2885 do_error(
2886 _("root inode \"..\" entry recreation failed (%d)\n"), error);
2887
2888 need_root_dotdot = 0;
2889 } else if (need_root_dotdot && ino == mp->m_sb.sb_rootino) {
2890 do_warn(_("would recreate root directory .. entry\n"));
2891 }
2892
2893 /*
2894 * if we need to create the '.' entry, do so only if
2895 * the directory is a longform dir. if it's been
2896 * turned into a shortform dir, then the inode is ok
2897 * since shortform dirs have no '.' entry and the inode
2898 * has already been committed by prune_lf_dir_entry().
2899 */
2900 if (need_dot) {
2901 /*
2902 * bump up our link count but don't
2903 * bump up the inode link count. chances
2904 * are good that even though we lost '.'
2905 * the inode link counts reflect '.' so
2906 * leave the inode link count alone and if
2907 * it turns out to be wrong, we'll catch
2908 * that in phase 7.
2909 */
2910 add_inode_ref(irec, ino_offset);
2911
2912 if (no_modify) {
2913 do_warn(
2914 _("would create missing \".\" entry in dir ino %" PRIu64 "\n"),
2915 ino);
2916 } else if (ip->i_df.if_format != XFS_DINODE_FMT_LOCAL) {
2917 /*
2918 * need to create . entry in longform dir.
2919 */
2920 do_warn(
2921 _("creating missing \".\" entry in dir ino %" PRIu64 "\n"), ino);
2922
2923 nres = XFS_MKDIR_SPACE_RES(mp, 1);
2924 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir,
2925 nres, 0, 0, &tp);
2926 if (error)
2927 res_failed(error);
2928
2929 libxfs_trans_ijoin(tp, ip, 0);
2930
2931 error = -libxfs_dir_createname(tp, ip, &xfs_name_dot,
2932 ip->i_ino, nres);
2933 if (error)
2934 do_error(
2935 _("can't make \".\" entry in dir ino %" PRIu64 ", createname error %d\n"),
2936 ino, error);
2937
2938 libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2939 error = -libxfs_trans_commit(tp);
2940 if (error)
2941 do_error(
2942 _("root inode \".\" entry recreation failed (%d)\n"), error);
2943 }
2944 }
2945 libxfs_irele(ip);
2946 }
2947
2948 /*
2949 * mark realtime bitmap and summary inodes as reached.
2950 * quota inode will be marked here as well
2951 */
2952 static void
2953 mark_standalone_inodes(xfs_mount_t *mp)
2954 {
2955 ino_tree_node_t *irec;
2956 int offset;
2957
2958 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rbmino),
2959 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino));
2960
2961 offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino) -
2962 irec->ino_startnum;
2963
2964 add_inode_reached(irec, offset);
2965
2966 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rsumino),
2967 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino));
2968
2969 offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino) -
2970 irec->ino_startnum;
2971
2972 add_inode_reached(irec, offset);
2973
2974 if (fs_quotas) {
2975 if (mp->m_sb.sb_uquotino
2976 && mp->m_sb.sb_uquotino != NULLFSINO) {
2977 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
2978 mp->m_sb.sb_uquotino),
2979 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino));
2980 offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino)
2981 - irec->ino_startnum;
2982 add_inode_reached(irec, offset);
2983 }
2984 if (mp->m_sb.sb_gquotino
2985 && mp->m_sb.sb_gquotino != NULLFSINO) {
2986 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
2987 mp->m_sb.sb_gquotino),
2988 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_gquotino));
2989 offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_gquotino)
2990 - irec->ino_startnum;
2991 add_inode_reached(irec, offset);
2992 }
2993 if (mp->m_sb.sb_pquotino
2994 && mp->m_sb.sb_pquotino != NULLFSINO) {
2995 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
2996 mp->m_sb.sb_pquotino),
2997 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino));
2998 offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino)
2999 - irec->ino_startnum;
3000 add_inode_reached(irec, offset);
3001 }
3002 }
3003 }
3004
3005 static void
3006 check_for_orphaned_inodes(
3007 xfs_mount_t *mp,
3008 xfs_agnumber_t agno,
3009 ino_tree_node_t *irec)
3010 {
3011 int i;
3012 xfs_ino_t ino;
3013
3014 for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
3015 ASSERT(is_inode_confirmed(irec, i));
3016 if (is_inode_free(irec, i))
3017 continue;
3018
3019 if (is_inode_reached(irec, i))
3020 continue;
3021
3022 ASSERT(inode_isadir(irec, i) ||
3023 num_inode_references(irec, i) == 0);
3024
3025 ino = XFS_AGINO_TO_INO(mp, agno, i + irec->ino_startnum);
3026 if (inode_isadir(irec, i))
3027 do_warn(_("disconnected dir inode %" PRIu64 ", "), ino);
3028 else
3029 do_warn(_("disconnected inode %" PRIu64 ", "), ino);
3030 if (!no_modify) {
3031 if (!orphanage_ino)
3032 orphanage_ino = mk_orphanage(mp);
3033 do_warn(_("moving to %s\n"), ORPHANAGE);
3034 mv_orphanage(mp, ino, inode_isadir(irec, i));
3035 } else {
3036 do_warn(_("would move to %s\n"), ORPHANAGE);
3037 }
3038 /*
3039 * for read-only case, even though the inode isn't
3040 * really reachable, set the flag (and bump our link
3041 * count) anyway to fool phase 7
3042 */
3043 add_inode_reached(irec, i);
3044 }
3045 }
3046
3047 static void
3048 do_dir_inode(
3049 struct workqueue *wq,
3050 xfs_agnumber_t agno,
3051 void *arg)
3052 {
3053 struct ino_tree_node *irec = arg;
3054 int i;
3055
3056 for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
3057 if (inode_isadir(irec, i))
3058 process_dir_inode(wq->wq_ctx, agno, irec, i);
3059 }
3060 }
3061
3062 static void
3063 traverse_function(
3064 struct workqueue *wq,
3065 xfs_agnumber_t agno,
3066 void *arg)
3067 {
3068 struct ino_tree_node *irec;
3069 prefetch_args_t *pf_args = arg;
3070 struct workqueue lwq;
3071 struct xfs_mount *mp = wq->wq_ctx;
3072
3073 wait_for_inode_prefetch(pf_args);
3074
3075 if (verbose)
3076 do_log(_(" - agno = %d\n"), agno);
3077
3078 /*
3079 * The more AGs we have in flight at once, the fewer processing threads
3080 * per AG. This means we don't overwhelm the machine with hundreds of
3081 * threads when we start acting on lots of AGs at once. We just want
3082 * enough that we can keep multiple CPUs busy across multiple AGs.
3083 */
3084 workqueue_create_bound(&lwq, mp, ag_stride, 1000);
3085
3086 for (irec = findfirst_inode_rec(agno); irec; irec = next_ino_rec(irec)) {
3087 if (irec->ino_isa_dir == 0)
3088 continue;
3089
3090 if (pf_args) {
3091 sem_post(&pf_args->ra_count);
3092 #ifdef XR_PF_TRACE
3093 {
3094 int i;
3095 sem_getvalue(&pf_args->ra_count, &i);
3096 pftrace(
3097 "processing inode chunk %p in AG %d (sem count = %d)",
3098 irec, agno, i);
3099 }
3100 #endif
3101 }
3102
3103 queue_work(&lwq, do_dir_inode, agno, irec);
3104 }
3105 destroy_work_queue(&lwq);
3106 cleanup_inode_prefetch(pf_args);
3107 }
3108
3109 static void
3110 update_missing_dotdot_entries(
3111 xfs_mount_t *mp)
3112 {
3113 dotdot_update_t *dir;
3114
3115 /*
3116 * these entries parents were updated, rebuild them again
3117 * set dotdot_update flag so processing routines do not count links
3118 */
3119 dotdot_update = 1;
3120 while (!list_empty(&dotdot_update_list)) {
3121 dir = list_entry(dotdot_update_list.prev, struct dotdot_update,
3122 list);
3123 list_del(&dir->list);
3124 process_dir_inode(mp, dir->agno, dir->irec, dir->ino_offset);
3125 free(dir);
3126 }
3127 }
3128
3129 static void
3130 traverse_ags(
3131 struct xfs_mount *mp)
3132 {
3133 do_inode_prefetch(mp, ag_stride, traverse_function, false, true);
3134 }
3135
3136 void
3137 phase6(xfs_mount_t *mp)
3138 {
3139 ino_tree_node_t *irec;
3140 int i;
3141
3142 memset(&zerocr, 0, sizeof(struct cred));
3143 memset(&zerofsx, 0, sizeof(struct fsxattr));
3144 orphanage_ino = 0;
3145
3146 do_log(_("Phase 6 - check inode connectivity...\n"));
3147
3148 incore_ext_teardown(mp);
3149
3150 add_ino_ex_data(mp);
3151
3152 /*
3153 * verify existence of root directory - if we have to
3154 * make one, it's ok for the incore data structs not to
3155 * know about it since everything about it (and the other
3156 * inodes in its chunk if a new chunk was created) are ok
3157 */
3158 if (need_root_inode) {
3159 if (!no_modify) {
3160 do_warn(_("reinitializing root directory\n"));
3161 mk_root_dir(mp);
3162 need_root_inode = 0;
3163 need_root_dotdot = 0;
3164 } else {
3165 do_warn(_("would reinitialize root directory\n"));
3166 }
3167 }
3168
3169 if (need_rbmino) {
3170 if (!no_modify) {
3171 do_warn(_("reinitializing realtime bitmap inode\n"));
3172 mk_rbmino(mp);
3173 need_rbmino = 0;
3174 } else {
3175 do_warn(_("would reinitialize realtime bitmap inode\n"));
3176 }
3177 }
3178
3179 if (need_rsumino) {
3180 if (!no_modify) {
3181 do_warn(_("reinitializing realtime summary inode\n"));
3182 mk_rsumino(mp);
3183 need_rsumino = 0;
3184 } else {
3185 do_warn(_("would reinitialize realtime summary inode\n"));
3186 }
3187 }
3188
3189 if (!no_modify) {
3190 do_log(
3191 _(" - resetting contents of realtime bitmap and summary inodes\n"));
3192 if (fill_rbmino(mp)) {
3193 do_warn(
3194 _("Warning: realtime bitmap may be inconsistent\n"));
3195 }
3196
3197 if (fill_rsumino(mp)) {
3198 do_warn(
3199 _("Warning: realtime bitmap may be inconsistent\n"));
3200 }
3201 }
3202
3203 mark_standalone_inodes(mp);
3204
3205 do_log(_(" - traversing filesystem ...\n"));
3206
3207 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
3208 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
3209
3210 /*
3211 * we always have a root inode, even if it's free...
3212 * if the root is free, forget it, lost+found is already gone
3213 */
3214 if (is_inode_free(irec, 0) || !inode_isadir(irec, 0)) {
3215 need_root_inode = 1;
3216 }
3217
3218 /*
3219 * then process all inodes by walking incore inode tree
3220 */
3221 traverse_ags(mp);
3222
3223 /*
3224 * any directories that had updated ".." entries, rebuild them now
3225 */
3226 update_missing_dotdot_entries(mp);
3227
3228 do_log(_(" - traversal finished ...\n"));
3229 do_log(_(" - moving disconnected inodes to %s ...\n"),
3230 ORPHANAGE);
3231
3232 /*
3233 * move all disconnected inodes to the orphanage
3234 */
3235 for (i = 0; i < glob_agcount; i++) {
3236 irec = findfirst_inode_rec(i);
3237 while (irec != NULL) {
3238 check_for_orphaned_inodes(mp, i, irec);
3239 irec = next_ino_rec(irec);
3240 }
3241 }
3242 }