]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - repair/phase6.c
xfs: convert bp->b_bn references to xfs_buf_daddr()
[thirdparty/xfsprogs-dev.git] / repair / phase6.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6
7 #include "libxfs.h"
8 #include "threads.h"
9 #include "threads.h"
10 #include "prefetch.h"
11 #include "avl.h"
12 #include "globals.h"
13 #include "agheader.h"
14 #include "incore.h"
15 #include "dir2.h"
16 #include "protos.h"
17 #include "err_protos.h"
18 #include "dinode.h"
19 #include "progress.h"
20 #include "versions.h"
21
22 static struct cred zerocr;
23 static struct fsxattr zerofsx;
24 static xfs_ino_t orphanage_ino;
25
26 static struct xfs_name xfs_name_dot = {(unsigned char *)".",
27 1,
28 XFS_DIR3_FT_DIR};
29
30 /*
31 * Data structures used to keep track of directories where the ".."
32 * entries are updated. These must be rebuilt after the initial pass
33 */
34 typedef struct dotdot_update {
35 struct list_head list;
36 ino_tree_node_t *irec;
37 xfs_agnumber_t agno;
38 int ino_offset;
39 } dotdot_update_t;
40
41 static LIST_HEAD(dotdot_update_list);
42 static int dotdot_update;
43
44 static void
45 add_dotdot_update(
46 xfs_agnumber_t agno,
47 ino_tree_node_t *irec,
48 int ino_offset)
49 {
50 dotdot_update_t *dir = malloc(sizeof(dotdot_update_t));
51
52 if (!dir)
53 do_error(_("malloc failed add_dotdot_update (%zu bytes)\n"),
54 sizeof(dotdot_update_t));
55
56 INIT_LIST_HEAD(&dir->list);
57 dir->irec = irec;
58 dir->agno = agno;
59 dir->ino_offset = ino_offset;
60
61 list_add(&dir->list, &dotdot_update_list);
62 }
63
64 /*
65 * Data structures and routines to keep track of directory entries
66 * and whether their leaf entry has been seen. Also used for name
67 * duplicate checking and rebuilding step if required.
68 */
69 struct dir_hash_ent {
70 struct dir_hash_ent *nextbyhash; /* next in name bucket */
71 struct dir_hash_ent *nextbyorder; /* next in order added */
72 xfs_dahash_t hashval; /* hash value of name */
73 uint32_t address; /* offset of data entry */
74 xfs_ino_t inum; /* inode num of entry */
75 short junkit; /* name starts with / */
76 short seen; /* have seen leaf entry */
77 struct xfs_name name;
78 unsigned char namebuf[];
79 };
80
81 struct dir_hash_tab {
82 int size; /* size of hash tables */
83 struct dir_hash_ent *first; /* ptr to first added entry */
84 struct dir_hash_ent *last; /* ptr to last added entry */
85 struct dir_hash_ent **byhash; /* ptr to name hash buckets */
86 #define HT_UNSEEN 1
87 struct radix_tree_root byaddr;
88 };
89
90 #define DIR_HASH_TAB_SIZE(n) \
91 (sizeof(struct dir_hash_tab) + (sizeof(struct dir_hash_ent *) * (n)))
92 #define DIR_HASH_FUNC(t,a) ((a) % (t)->size)
93
94 /*
95 * Track the contents of the freespace table in a directory.
96 */
97 typedef struct freetab {
98 int naents; /* expected number of data blocks */
99 int nents; /* number of data blocks processed */
100 struct freetab_ent {
101 xfs_dir2_data_off_t v;
102 short s;
103 } ents[1];
104 } freetab_t;
105 #define FREETAB_SIZE(n) \
106 (offsetof(freetab_t, ents) + (sizeof(struct freetab_ent) * (n)))
107
108 #define DIR_HASH_CK_OK 0
109 #define DIR_HASH_CK_DUPLEAF 1
110 #define DIR_HASH_CK_BADHASH 2
111 #define DIR_HASH_CK_NODATA 3
112 #define DIR_HASH_CK_NOLEAF 4
113 #define DIR_HASH_CK_BADSTALE 5
114 #define DIR_HASH_CK_TOTAL 6
115
116 /*
117 * Need to handle CRC and validation errors specially here. If there is a
118 * validator error, re-read without the verifier so that we get a buffer we can
119 * check and repair. Re-attach the ops to the buffer after the read so that when
120 * it is rewritten the CRC is recalculated.
121 *
122 * If the buffer was not read, we return an error. If the buffer was read but
123 * had a CRC or corruption error, we reread it without the verifier and if it is
124 * read successfully we increment *crc_error and return 0. Otherwise we
125 * return the read error.
126 */
127 static int
128 dir_read_buf(
129 struct xfs_inode *ip,
130 xfs_dablk_t bno,
131 struct xfs_buf **bpp,
132 const struct xfs_buf_ops *ops,
133 int *crc_error)
134 {
135 int error;
136 int error2;
137
138 error = -libxfs_da_read_buf(NULL, ip, bno, 0, bpp, XFS_DATA_FORK, ops);
139
140 if (error != EFSBADCRC && error != EFSCORRUPTED)
141 return error;
142
143 error2 = -libxfs_da_read_buf(NULL, ip, bno, 0, bpp, XFS_DATA_FORK,
144 NULL);
145 if (error2)
146 return error2;
147
148 (*crc_error)++;
149 (*bpp)->b_ops = ops;
150 return 0;
151 }
152
153 /*
154 * Returns 0 if the name already exists (ie. a duplicate)
155 */
156 static int
157 dir_hash_add(
158 struct xfs_mount *mp,
159 struct dir_hash_tab *hashtab,
160 uint32_t addr,
161 xfs_ino_t inum,
162 int namelen,
163 unsigned char *name,
164 uint8_t ftype)
165 {
166 xfs_dahash_t hash = 0;
167 int byhash = 0;
168 struct dir_hash_ent *p;
169 int dup;
170 short junk;
171 struct xfs_name xname;
172 int error;
173
174 xname.name = name;
175 xname.len = namelen;
176 xname.type = ftype;
177
178 junk = name[0] == '/';
179 dup = 0;
180
181 if (!junk) {
182 hash = libxfs_dir2_hashname(mp, &xname);
183 byhash = DIR_HASH_FUNC(hashtab, hash);
184
185 /*
186 * search hash bucket for existing name.
187 */
188 for (p = hashtab->byhash[byhash]; p; p = p->nextbyhash) {
189 if (p->hashval == hash && p->name.len == namelen) {
190 if (memcmp(p->name.name, name, namelen) == 0) {
191 dup = 1;
192 junk = 1;
193 break;
194 }
195 }
196 }
197 }
198
199 /*
200 * Allocate enough space for the hash entry and the name in a single
201 * allocation so we can store our own copy of the name for later use.
202 */
203 p = calloc(1, sizeof(*p) + namelen + 1);
204 if (!p)
205 do_error(_("malloc failed in dir_hash_add (%zu bytes)\n"),
206 sizeof(*p));
207
208 error = radix_tree_insert(&hashtab->byaddr, addr, p);
209 if (error == EEXIST) {
210 do_warn(_("duplicate addrs %u in directory!\n"), addr);
211 free(p);
212 return 0;
213 }
214 radix_tree_tag_set(&hashtab->byaddr, addr, HT_UNSEEN);
215
216 if (hashtab->last)
217 hashtab->last->nextbyorder = p;
218 else
219 hashtab->first = p;
220 p->nextbyorder = NULL;
221 hashtab->last = p;
222
223 if (!(p->junkit = junk)) {
224 p->hashval = hash;
225 p->nextbyhash = hashtab->byhash[byhash];
226 hashtab->byhash[byhash] = p;
227 }
228 p->address = addr;
229 p->inum = inum;
230 p->seen = 0;
231
232 /* Set up the name in the region trailing the hash entry. */
233 memcpy(p->namebuf, name, namelen);
234 p->name.name = p->namebuf;
235 p->name.len = namelen;
236 p->name.type = ftype;
237 return !dup;
238 }
239
240 /* Mark an existing directory hashtable entry as junk. */
241 static void
242 dir_hash_junkit(
243 struct dir_hash_tab *hashtab,
244 xfs_dir2_dataptr_t addr)
245 {
246 struct dir_hash_ent *p;
247
248 p = radix_tree_lookup(&hashtab->byaddr, addr);
249 assert(p != NULL);
250
251 p->junkit = 1;
252 p->namebuf[0] = '/';
253 }
254
255 static int
256 dir_hash_check(
257 struct dir_hash_tab *hashtab,
258 struct xfs_inode *ip,
259 int seeval)
260 {
261 static char *seevalstr[DIR_HASH_CK_TOTAL];
262 static int done;
263
264 if (!done) {
265 seevalstr[DIR_HASH_CK_OK] = _("ok");
266 seevalstr[DIR_HASH_CK_DUPLEAF] = _("duplicate leaf");
267 seevalstr[DIR_HASH_CK_BADHASH] = _("hash value mismatch");
268 seevalstr[DIR_HASH_CK_NODATA] = _("no data entry");
269 seevalstr[DIR_HASH_CK_NOLEAF] = _("no leaf entry");
270 seevalstr[DIR_HASH_CK_BADSTALE] = _("bad stale count");
271 done = 1;
272 }
273
274 if (seeval == DIR_HASH_CK_OK &&
275 radix_tree_tagged(&hashtab->byaddr, HT_UNSEEN))
276 seeval = DIR_HASH_CK_NOLEAF;
277 if (seeval == DIR_HASH_CK_OK)
278 return 0;
279 do_warn(_("bad hash table for directory inode %" PRIu64 " (%s): "),
280 ip->i_ino, seevalstr[seeval]);
281 if (!no_modify)
282 do_warn(_("rebuilding\n"));
283 else
284 do_warn(_("would rebuild\n"));
285 return 1;
286 }
287
288 static void
289 dir_hash_done(
290 struct dir_hash_tab *hashtab)
291 {
292 int i;
293 struct dir_hash_ent *n;
294 struct dir_hash_ent *p;
295
296 for (i = 0; i < hashtab->size; i++) {
297 for (p = hashtab->byhash[i]; p; p = n) {
298 n = p->nextbyhash;
299 radix_tree_delete(&hashtab->byaddr, p->address);
300 free(p);
301 }
302 }
303 free(hashtab);
304 }
305
306 /*
307 * Create a directory hash index structure based on the size of the directory we
308 * are about to try to repair. The size passed in is the size of the data
309 * segment of the directory in bytes, so we don't really know exactly how many
310 * entries are in it. Hence assume an entry size of around 64 bytes - that's a
311 * name length of 40+ bytes so should cover a most situations with really large
312 * directories.
313 */
314 static struct dir_hash_tab *
315 dir_hash_init(
316 xfs_fsize_t size)
317 {
318 struct dir_hash_tab *hashtab = NULL;
319 int hsize;
320
321 hsize = size / 64;
322 if (hsize < 16)
323 hsize = 16;
324
325 /*
326 * Try to allocate as large a hash table as possible. Failure to
327 * allocate isn't fatal, it will just result in slower performance as we
328 * reduce the size of the table.
329 */
330 while (hsize >= 16) {
331 hashtab = calloc(DIR_HASH_TAB_SIZE(hsize), 1);
332 if (hashtab)
333 break;
334 hsize /= 2;
335 }
336 if (!hashtab)
337 do_error(_("calloc failed in dir_hash_init\n"));
338 hashtab->size = hsize;
339 hashtab->byhash = (struct dir_hash_ent **)((char *)hashtab +
340 sizeof(struct dir_hash_tab));
341 INIT_RADIX_TREE(&hashtab->byaddr, 0);
342 return hashtab;
343 }
344
345 static int
346 dir_hash_see(
347 struct dir_hash_tab *hashtab,
348 xfs_dahash_t hash,
349 xfs_dir2_dataptr_t addr)
350 {
351 struct dir_hash_ent *p;
352
353 p = radix_tree_lookup(&hashtab->byaddr, addr);
354 if (!p)
355 return DIR_HASH_CK_NODATA;
356 if (!radix_tree_tag_get(&hashtab->byaddr, addr, HT_UNSEEN))
357 return DIR_HASH_CK_DUPLEAF;
358 if (p->junkit == 0 && p->hashval != hash)
359 return DIR_HASH_CK_BADHASH;
360 radix_tree_tag_clear(&hashtab->byaddr, addr, HT_UNSEEN);
361 return DIR_HASH_CK_OK;
362 }
363
364 static void
365 dir_hash_update_ftype(
366 struct dir_hash_tab *hashtab,
367 xfs_dir2_dataptr_t addr,
368 uint8_t ftype)
369 {
370 struct dir_hash_ent *p;
371
372 p = radix_tree_lookup(&hashtab->byaddr, addr);
373 if (!p)
374 return;
375 p->name.type = ftype;
376 }
377
378 /*
379 * checks to make sure leafs match a data entry, and that the stale
380 * count is valid.
381 */
382 static int
383 dir_hash_see_all(
384 struct dir_hash_tab *hashtab,
385 xfs_dir2_leaf_entry_t *ents,
386 int count,
387 int stale)
388 {
389 int i;
390 int j;
391 int rval;
392
393 for (i = j = 0; i < count; i++) {
394 if (be32_to_cpu(ents[i].address) == XFS_DIR2_NULL_DATAPTR) {
395 j++;
396 continue;
397 }
398 rval = dir_hash_see(hashtab, be32_to_cpu(ents[i].hashval),
399 be32_to_cpu(ents[i].address));
400 if (rval != DIR_HASH_CK_OK)
401 return rval;
402 }
403 return j == stale ? DIR_HASH_CK_OK : DIR_HASH_CK_BADSTALE;
404 }
405
406 /*
407 * Given a block number in a fork, return the next valid block number (not a
408 * hole). If this is the last block number then NULLFILEOFF is returned.
409 */
410 static int
411 bmap_next_offset(
412 struct xfs_inode *ip,
413 xfs_fileoff_t *bnop)
414 {
415 xfs_fileoff_t bno;
416 int error;
417 struct xfs_bmbt_irec got;
418 struct xfs_iext_cursor icur;
419
420 switch (ip->i_df.if_format) {
421 case XFS_DINODE_FMT_LOCAL:
422 *bnop = NULLFILEOFF;
423 return 0;
424 case XFS_DINODE_FMT_BTREE:
425 case XFS_DINODE_FMT_EXTENTS:
426 break;
427 default:
428 return EIO;
429 }
430
431 /* Read extent map. */
432 error = -libxfs_iread_extents(NULL, ip, XFS_DATA_FORK);
433 if (error)
434 return error;
435
436 bno = *bnop + 1;
437 if (!libxfs_iext_lookup_extent(ip, &ip->i_df, bno, &icur, &got))
438 *bnop = NULLFILEOFF;
439 else
440 *bnop = got.br_startoff < bno ? bno : got.br_startoff;
441 return 0;
442 }
443
444 static void
445 res_failed(
446 int err)
447 {
448 if (err == ENOSPC) {
449 do_error(_("ran out of disk space!\n"));
450 } else
451 do_error(_("xfs_trans_reserve returned %d\n"), err);
452 }
453
454 static inline void
455 reset_inode_fields(struct xfs_inode *ip)
456 {
457 ip->i_projid = 0;
458 ip->i_disk_size = 0;
459 ip->i_nblocks = 0;
460 ip->i_extsize = 0;
461 ip->i_cowextsize = 0;
462 ip->i_flushiter = 0;
463 ip->i_forkoff = 0;
464 ip->i_diflags = 0;
465 ip->i_diflags2 = 0;
466 ip->i_crtime.tv_sec = 0;
467 ip->i_crtime.tv_nsec = 0;
468 }
469
470 static void
471 mk_rbmino(xfs_mount_t *mp)
472 {
473 xfs_trans_t *tp;
474 xfs_inode_t *ip;
475 xfs_bmbt_irec_t *ep;
476 int i;
477 int nmap;
478 int error;
479 xfs_fileoff_t bno;
480 xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP];
481 int times;
482 uint blocks;
483
484 /*
485 * first set up inode
486 */
487 i = -libxfs_trans_alloc_rollable(mp, 10, &tp);
488 if (i)
489 res_failed(i);
490
491 error = -libxfs_iget(mp, tp, mp->m_sb.sb_rbmino, 0, &ip);
492 if (error) {
493 do_error(
494 _("couldn't iget realtime bitmap inode -- error - %d\n"),
495 error);
496 }
497
498 reset_inode_fields(ip);
499
500 VFS_I(ip)->i_mode = S_IFREG;
501 ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
502 if (ip->i_afp)
503 ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
504
505 set_nlink(VFS_I(ip), 1); /* account for sb ptr */
506
507 times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
508 if (xfs_has_v3inodes(mp)) {
509 VFS_I(ip)->i_version = 1;
510 ip->i_diflags2 = 0;
511 times |= XFS_ICHGTIME_CREATE;
512 }
513 libxfs_trans_ichgtime(tp, ip, times);
514
515 /*
516 * now the ifork
517 */
518 ip->i_df.if_bytes = 0;
519 ip->i_df.if_u1.if_root = NULL;
520
521 ip->i_disk_size = mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize;
522
523 /*
524 * commit changes
525 */
526 libxfs_trans_ijoin(tp, ip, 0);
527 libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
528 error = -libxfs_trans_commit(tp);
529 if (error)
530 do_error(_("%s: commit failed, error %d\n"), __func__, error);
531
532 /*
533 * then allocate blocks for file and fill with zeroes (stolen
534 * from mkfs)
535 */
536 blocks = mp->m_sb.sb_rbmblocks +
537 XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1;
538 error = -libxfs_trans_alloc_rollable(mp, blocks, &tp);
539 if (error)
540 res_failed(error);
541
542 libxfs_trans_ijoin(tp, ip, 0);
543 bno = 0;
544 while (bno < mp->m_sb.sb_rbmblocks) {
545 nmap = XFS_BMAP_MAX_NMAP;
546 error = -libxfs_bmapi_write(tp, ip, bno,
547 (xfs_extlen_t)(mp->m_sb.sb_rbmblocks - bno),
548 0, mp->m_sb.sb_rbmblocks, map, &nmap);
549 if (error) {
550 do_error(
551 _("couldn't allocate realtime bitmap, error = %d\n"),
552 error);
553 }
554 for (i = 0, ep = map; i < nmap; i++, ep++) {
555 libxfs_device_zero(mp->m_ddev_targp,
556 XFS_FSB_TO_DADDR(mp, ep->br_startblock),
557 XFS_FSB_TO_BB(mp, ep->br_blockcount));
558 bno += ep->br_blockcount;
559 }
560 }
561 error = -libxfs_trans_commit(tp);
562 if (error) {
563 do_error(
564 _("allocation of the realtime bitmap failed, error = %d\n"),
565 error);
566 }
567 libxfs_irele(ip);
568 }
569
570 static int
571 fill_rbmino(xfs_mount_t *mp)
572 {
573 struct xfs_buf *bp;
574 xfs_trans_t *tp;
575 xfs_inode_t *ip;
576 xfs_rtword_t *bmp;
577 int nmap;
578 int error;
579 xfs_fileoff_t bno;
580 xfs_bmbt_irec_t map;
581
582 bmp = btmcompute;
583 bno = 0;
584
585 error = -libxfs_trans_alloc_rollable(mp, 10, &tp);
586 if (error)
587 res_failed(error);
588
589 error = -libxfs_iget(mp, tp, mp->m_sb.sb_rbmino, 0, &ip);
590 if (error) {
591 do_error(
592 _("couldn't iget realtime bitmap inode -- error - %d\n"),
593 error);
594 }
595
596 while (bno < mp->m_sb.sb_rbmblocks) {
597 /*
598 * fill the file one block at a time
599 */
600 nmap = 1;
601 error = -libxfs_bmapi_write(tp, ip, bno, 1, 0, 1, &map, &nmap);
602 if (error || nmap != 1) {
603 do_error(
604 _("couldn't map realtime bitmap block %" PRIu64 ", error = %d\n"),
605 bno, error);
606 }
607
608 ASSERT(map.br_startblock != HOLESTARTBLOCK);
609
610 error = -libxfs_trans_read_buf(
611 mp, tp, mp->m_dev,
612 XFS_FSB_TO_DADDR(mp, map.br_startblock),
613 XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL);
614
615 if (error) {
616 do_warn(
617 _("can't access block %" PRIu64 " (fsbno %" PRIu64 ") of realtime bitmap inode %" PRIu64 "\n"),
618 bno, map.br_startblock, mp->m_sb.sb_rbmino);
619 return(1);
620 }
621
622 memmove(bp->b_addr, bmp, mp->m_sb.sb_blocksize);
623
624 libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
625
626 bmp = (xfs_rtword_t *)((intptr_t) bmp + mp->m_sb.sb_blocksize);
627 bno++;
628 }
629
630 libxfs_trans_ijoin(tp, ip, 0);
631 error = -libxfs_trans_commit(tp);
632 if (error)
633 do_error(_("%s: commit failed, error %d\n"), __func__, error);
634 libxfs_irele(ip);
635 return(0);
636 }
637
638 static int
639 fill_rsumino(xfs_mount_t *mp)
640 {
641 struct xfs_buf *bp;
642 xfs_trans_t *tp;
643 xfs_inode_t *ip;
644 xfs_suminfo_t *smp;
645 int nmap;
646 int error;
647 xfs_fileoff_t bno;
648 xfs_fileoff_t end_bno;
649 xfs_bmbt_irec_t map;
650
651 smp = sumcompute;
652 bno = 0;
653 end_bno = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
654
655 error = -libxfs_trans_alloc_rollable(mp, 10, &tp);
656 if (error)
657 res_failed(error);
658
659 error = -libxfs_iget(mp, tp, mp->m_sb.sb_rsumino, 0, &ip);
660 if (error) {
661 do_error(
662 _("couldn't iget realtime summary inode -- error - %d\n"),
663 error);
664 }
665
666 while (bno < end_bno) {
667 /*
668 * fill the file one block at a time
669 */
670 nmap = 1;
671 error = -libxfs_bmapi_write(tp, ip, bno, 1, 0, 1, &map, &nmap);
672 if (error || nmap != 1) {
673 do_error(
674 _("couldn't map realtime summary inode block %" PRIu64 ", error = %d\n"),
675 bno, error);
676 }
677
678 ASSERT(map.br_startblock != HOLESTARTBLOCK);
679
680 error = -libxfs_trans_read_buf(
681 mp, tp, mp->m_dev,
682 XFS_FSB_TO_DADDR(mp, map.br_startblock),
683 XFS_FSB_TO_BB(mp, 1), 1, &bp, NULL);
684
685 if (error) {
686 do_warn(
687 _("can't access block %" PRIu64 " (fsbno %" PRIu64 ") of realtime summary inode %" PRIu64 "\n"),
688 bno, map.br_startblock, mp->m_sb.sb_rsumino);
689 libxfs_irele(ip);
690 return(1);
691 }
692
693 memmove(bp->b_addr, smp, mp->m_sb.sb_blocksize);
694
695 libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
696
697 smp = (xfs_suminfo_t *)((intptr_t)smp + mp->m_sb.sb_blocksize);
698 bno++;
699 }
700
701 libxfs_trans_ijoin(tp, ip, 0);
702 error = -libxfs_trans_commit(tp);
703 if (error)
704 do_error(_("%s: commit failed, error %d\n"), __func__, error);
705 libxfs_irele(ip);
706 return(0);
707 }
708
709 static void
710 mk_rsumino(xfs_mount_t *mp)
711 {
712 xfs_trans_t *tp;
713 xfs_inode_t *ip;
714 xfs_bmbt_irec_t *ep;
715 int i;
716 int nmap;
717 int error;
718 int nsumblocks;
719 xfs_fileoff_t bno;
720 xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP];
721 int times;
722 uint blocks;
723
724 /*
725 * first set up inode
726 */
727 i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 10, 0, 0, &tp);
728 if (i)
729 res_failed(i);
730
731 error = -libxfs_iget(mp, tp, mp->m_sb.sb_rsumino, 0, &ip);
732 if (error) {
733 do_error(
734 _("couldn't iget realtime summary inode -- error - %d\n"),
735 error);
736 }
737
738 reset_inode_fields(ip);
739
740 VFS_I(ip)->i_mode = S_IFREG;
741 ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
742 if (ip->i_afp)
743 ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
744
745 set_nlink(VFS_I(ip), 1); /* account for sb ptr */
746
747 times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
748 if (xfs_has_v3inodes(mp)) {
749 VFS_I(ip)->i_version = 1;
750 ip->i_diflags2 = 0;
751 times |= XFS_ICHGTIME_CREATE;
752 }
753 libxfs_trans_ichgtime(tp, ip, times);
754
755 /*
756 * now the ifork
757 */
758 ip->i_df.if_bytes = 0;
759 ip->i_df.if_u1.if_root = NULL;
760
761 ip->i_disk_size = mp->m_rsumsize;
762
763 /*
764 * commit changes
765 */
766 libxfs_trans_ijoin(tp, ip, 0);
767 libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
768 error = -libxfs_trans_commit(tp);
769 if (error)
770 do_error(_("%s: commit failed, error %d\n"), __func__, error);
771
772 /*
773 * then allocate blocks for file and fill with zeroes (stolen
774 * from mkfs)
775 */
776 nsumblocks = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
777 blocks = nsumblocks + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1;
778 error = -libxfs_trans_alloc_rollable(mp, blocks, &tp);
779 if (error)
780 res_failed(error);
781
782 libxfs_trans_ijoin(tp, ip, 0);
783 bno = 0;
784 while (bno < nsumblocks) {
785 nmap = XFS_BMAP_MAX_NMAP;
786 error = -libxfs_bmapi_write(tp, ip, bno,
787 (xfs_extlen_t)(nsumblocks - bno),
788 0, nsumblocks, map, &nmap);
789 if (error) {
790 do_error(
791 _("couldn't allocate realtime summary inode, error = %d\n"),
792 error);
793 }
794 for (i = 0, ep = map; i < nmap; i++, ep++) {
795 libxfs_device_zero(mp->m_ddev_targp,
796 XFS_FSB_TO_DADDR(mp, ep->br_startblock),
797 XFS_FSB_TO_BB(mp, ep->br_blockcount));
798 bno += ep->br_blockcount;
799 }
800 }
801 error = -libxfs_trans_commit(tp);
802 if (error) {
803 do_error(
804 _("allocation of the realtime summary ino failed, error = %d\n"),
805 error);
806 }
807 libxfs_irele(ip);
808 }
809
810 /*
811 * makes a new root directory.
812 */
813 static void
814 mk_root_dir(xfs_mount_t *mp)
815 {
816 xfs_trans_t *tp;
817 xfs_inode_t *ip;
818 int i;
819 int error;
820 const mode_t mode = 0755;
821 ino_tree_node_t *irec;
822 int times;
823
824 ip = NULL;
825 i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 10, 0, 0, &tp);
826 if (i)
827 res_failed(i);
828
829 error = -libxfs_iget(mp, tp, mp->m_sb.sb_rootino, 0, &ip);
830 if (error) {
831 do_error(_("could not iget root inode -- error - %d\n"), error);
832 }
833
834 /*
835 * take care of the core -- initialization from xfs_ialloc()
836 */
837 reset_inode_fields(ip);
838
839 VFS_I(ip)->i_mode = mode|S_IFDIR;
840 ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
841 if (ip->i_afp)
842 ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
843
844 set_nlink(VFS_I(ip), 2); /* account for . and .. */
845
846 times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
847 if (xfs_has_v3inodes(mp)) {
848 VFS_I(ip)->i_version = 1;
849 ip->i_diflags2 = 0;
850 times |= XFS_ICHGTIME_CREATE;
851 }
852 libxfs_trans_ichgtime(tp, ip, times);
853 libxfs_trans_ijoin(tp, ip, 0);
854 libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
855
856 /*
857 * now the ifork
858 */
859 ip->i_df.if_bytes = 0;
860 ip->i_df.if_u1.if_root = NULL;
861
862 /*
863 * initialize the directory
864 */
865 libxfs_dir_init(tp, ip, ip);
866
867 error = -libxfs_trans_commit(tp);
868 if (error)
869 do_error(_("%s: commit failed, error %d\n"), __func__, error);
870
871 libxfs_irele(ip);
872
873 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
874 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
875 set_inode_isadir(irec, XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino) -
876 irec->ino_startnum);
877 }
878
879 /*
880 * orphanage name == lost+found
881 */
882 static xfs_ino_t
883 mk_orphanage(xfs_mount_t *mp)
884 {
885 xfs_ino_t ino;
886 xfs_trans_t *tp;
887 xfs_inode_t *ip;
888 xfs_inode_t *pip;
889 ino_tree_node_t *irec;
890 int ino_offset = 0;
891 int i;
892 int error;
893 const int mode = 0755;
894 int nres;
895 struct xfs_name xname;
896
897 /*
898 * check for an existing lost+found first, if it exists, return
899 * its inode. Otherwise, we can create it. Bad lost+found inodes
900 * would have been cleared in phase3 and phase4.
901 */
902
903 i = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip);
904 if (i)
905 do_error(_("%d - couldn't iget root inode to obtain %s\n"),
906 i, ORPHANAGE);
907
908 xname.name = (unsigned char *)ORPHANAGE;
909 xname.len = strlen(ORPHANAGE);
910 xname.type = XFS_DIR3_FT_DIR;
911
912 if (libxfs_dir_lookup(NULL, pip, &xname, &ino, NULL) == 0)
913 return ino;
914
915 /*
916 * could not be found, create it
917 */
918 nres = XFS_MKDIR_SPACE_RES(mp, xname.len);
919 i = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir, nres, 0, 0, &tp);
920 if (i)
921 res_failed(i);
922
923 /*
924 * use iget/ijoin instead of trans_iget because the ialloc
925 * wrapper can commit the transaction and start a new one
926 */
927 /* i = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip);
928 if (i)
929 do_error(_("%d - couldn't iget root inode to make %s\n"),
930 i, ORPHANAGE);*/
931
932 error = -libxfs_dir_ialloc(&tp, pip, mode|S_IFDIR,
933 1, 0, &zerocr, &zerofsx, &ip);
934 if (error) {
935 do_error(_("%s inode allocation failed %d\n"),
936 ORPHANAGE, error);
937 }
938 inc_nlink(VFS_I(ip)); /* account for . */
939 ino = ip->i_ino;
940
941 irec = find_inode_rec(mp,
942 XFS_INO_TO_AGNO(mp, ino),
943 XFS_INO_TO_AGINO(mp, ino));
944
945 if (irec == NULL) {
946 /*
947 * This inode is allocated from a newly created inode
948 * chunk and therefore did not exist when inode chunks
949 * were processed in phase3. Add this group of inodes to
950 * the entry avl tree as if they were discovered in phase3.
951 */
952 irec = set_inode_free_alloc(mp, XFS_INO_TO_AGNO(mp, ino),
953 XFS_INO_TO_AGINO(mp, ino));
954 alloc_ex_data(irec);
955
956 for (i = 0; i < XFS_INODES_PER_CHUNK; i++)
957 set_inode_free(irec, i);
958 }
959
960 ino_offset = get_inode_offset(mp, ino, irec);
961
962 /*
963 * Mark the inode allocated to lost+found as used in the AVL tree
964 * so it is not skipped in phase 7
965 */
966 set_inode_used(irec, ino_offset);
967 add_inode_ref(irec, ino_offset);
968 add_inode_reached(irec, ino_offset);
969
970 /*
971 * now that we know the transaction will stay around,
972 * add the root inode to it
973 */
974 libxfs_trans_ijoin(tp, pip, 0);
975
976 /*
977 * create the actual entry
978 */
979 error = -libxfs_dir_createname(tp, pip, &xname, ip->i_ino, nres);
980 if (error)
981 do_error(
982 _("can't make %s, createname error %d\n"),
983 ORPHANAGE, error);
984
985 /*
986 * bump up the link count in the root directory to account
987 * for .. in the new directory, and update the irec copy of the
988 * on-disk nlink so we don't fail the link count check later.
989 */
990 inc_nlink(VFS_I(pip));
991 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
992 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
993 add_inode_ref(irec, 0);
994 set_inode_disk_nlinks(irec, 0, get_inode_disk_nlinks(irec, 0) + 1);
995
996 libxfs_trans_log_inode(tp, pip, XFS_ILOG_CORE);
997 libxfs_dir_init(tp, ip, pip);
998 libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
999 error = -libxfs_trans_commit(tp);
1000 if (error) {
1001 do_error(_("%s directory creation failed -- bmapf error %d\n"),
1002 ORPHANAGE, error);
1003 }
1004 libxfs_irele(ip);
1005 libxfs_irele(pip);
1006
1007 return(ino);
1008 }
1009
1010 /*
1011 * move a file to the orphange.
1012 */
1013 static void
1014 mv_orphanage(
1015 xfs_mount_t *mp,
1016 xfs_ino_t ino, /* inode # to be moved */
1017 int isa_dir) /* 1 if inode is a directory */
1018 {
1019 xfs_inode_t *orphanage_ip;
1020 xfs_ino_t entry_ino_num;
1021 xfs_inode_t *ino_p;
1022 xfs_trans_t *tp;
1023 int err;
1024 unsigned char fname[MAXPATHLEN + 1];
1025 int nres;
1026 int incr;
1027 ino_tree_node_t *irec;
1028 int ino_offset = 0;
1029 struct xfs_name xname;
1030
1031 xname.name = fname;
1032 xname.len = snprintf((char *)fname, sizeof(fname), "%llu",
1033 (unsigned long long)ino);
1034
1035 err = -libxfs_iget(mp, NULL, orphanage_ino, 0, &orphanage_ip);
1036 if (err)
1037 do_error(_("%d - couldn't iget orphanage inode\n"), err);
1038 /*
1039 * Make sure the filename is unique in the lost+found
1040 */
1041 incr = 0;
1042 while (libxfs_dir_lookup(NULL, orphanage_ip, &xname, &entry_ino_num,
1043 NULL) == 0)
1044 xname.len = snprintf((char *)fname, sizeof(fname), "%llu.%d",
1045 (unsigned long long)ino, ++incr);
1046
1047 /* Orphans may not have a proper parent, so use custom ops here */
1048 err = -libxfs_iget(mp, NULL, ino, 0, &ino_p);
1049 if (err)
1050 do_error(_("%d - couldn't iget disconnected inode\n"), err);
1051
1052 xname.type = libxfs_mode_to_ftype(VFS_I(ino_p)->i_mode);
1053
1054 if (isa_dir) {
1055 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, orphanage_ino),
1056 XFS_INO_TO_AGINO(mp, orphanage_ino));
1057 if (irec)
1058 ino_offset = XFS_INO_TO_AGINO(mp, orphanage_ino) -
1059 irec->ino_startnum;
1060 nres = XFS_DIRENTER_SPACE_RES(mp, fnamelen) +
1061 XFS_DIRENTER_SPACE_RES(mp, 2);
1062 err = -libxfs_dir_lookup(NULL, ino_p, &xfs_name_dotdot,
1063 &entry_ino_num, NULL);
1064 if (err) {
1065 ASSERT(err == ENOENT);
1066
1067 err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_rename,
1068 nres, 0, 0, &tp);
1069 if (err)
1070 do_error(
1071 _("space reservation failed (%d), filesystem may be out of space\n"),
1072 err);
1073
1074 libxfs_trans_ijoin(tp, orphanage_ip, 0);
1075 libxfs_trans_ijoin(tp, ino_p, 0);
1076
1077 err = -libxfs_dir_createname(tp, orphanage_ip, &xname,
1078 ino, nres);
1079 if (err)
1080 do_error(
1081 _("name create failed in %s (%d), filesystem may be out of space\n"),
1082 ORPHANAGE, err);
1083
1084 if (irec)
1085 add_inode_ref(irec, ino_offset);
1086 else
1087 inc_nlink(VFS_I(orphanage_ip));
1088 libxfs_trans_log_inode(tp, orphanage_ip, XFS_ILOG_CORE);
1089
1090 err = -libxfs_dir_createname(tp, ino_p, &xfs_name_dotdot,
1091 orphanage_ino, nres);
1092 if (err)
1093 do_error(
1094 _("creation of .. entry failed (%d), filesystem may be out of space\n"),
1095 err);
1096
1097 inc_nlink(VFS_I(ino_p));
1098 libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
1099 err = -libxfs_trans_commit(tp);
1100 if (err)
1101 do_error(
1102 _("creation of .. entry failed (%d)\n"), err);
1103 } else {
1104 err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_rename,
1105 nres, 0, 0, &tp);
1106 if (err)
1107 do_error(
1108 _("space reservation failed (%d), filesystem may be out of space\n"),
1109 err);
1110
1111 libxfs_trans_ijoin(tp, orphanage_ip, 0);
1112 libxfs_trans_ijoin(tp, ino_p, 0);
1113
1114
1115 err = -libxfs_dir_createname(tp, orphanage_ip, &xname,
1116 ino, nres);
1117 if (err)
1118 do_error(
1119 _("name create failed in %s (%d), filesystem may be out of space\n"),
1120 ORPHANAGE, err);
1121
1122 if (irec)
1123 add_inode_ref(irec, ino_offset);
1124 else
1125 inc_nlink(VFS_I(orphanage_ip));
1126 libxfs_trans_log_inode(tp, orphanage_ip, XFS_ILOG_CORE);
1127
1128 /*
1129 * don't replace .. value if it already points
1130 * to us. that'll pop a libxfs/kernel ASSERT.
1131 */
1132 if (entry_ino_num != orphanage_ino) {
1133 err = -libxfs_dir_replace(tp, ino_p,
1134 &xfs_name_dotdot, orphanage_ino,
1135 nres);
1136 if (err)
1137 do_error(
1138 _("name replace op failed (%d), filesystem may be out of space\n"),
1139 err);
1140 }
1141
1142 err = -libxfs_trans_commit(tp);
1143 if (err)
1144 do_error(
1145 _("orphanage name replace op failed (%d)\n"), err);
1146 }
1147
1148 } else {
1149 /*
1150 * use the remove log reservation as that's
1151 * more accurate. we're only creating the
1152 * links, we're not doing the inode allocation
1153 * also accounted for in the create
1154 */
1155 nres = XFS_DIRENTER_SPACE_RES(mp, xname.len);
1156 err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove,
1157 nres, 0, 0, &tp);
1158 if (err)
1159 do_error(
1160 _("space reservation failed (%d), filesystem may be out of space\n"),
1161 err);
1162
1163 libxfs_trans_ijoin(tp, orphanage_ip, 0);
1164 libxfs_trans_ijoin(tp, ino_p, 0);
1165
1166 err = -libxfs_dir_createname(tp, orphanage_ip, &xname, ino,
1167 nres);
1168 if (err)
1169 do_error(
1170 _("name create failed in %s (%d), filesystem may be out of space\n"),
1171 ORPHANAGE, err);
1172 ASSERT(err == 0);
1173
1174 set_nlink(VFS_I(ino_p), 1);
1175 libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
1176 err = -libxfs_trans_commit(tp);
1177 if (err)
1178 do_error(
1179 _("orphanage name create failed (%d)\n"), err);
1180 }
1181 libxfs_irele(ino_p);
1182 libxfs_irele(orphanage_ip);
1183 }
1184
1185 static int
1186 entry_junked(
1187 const char *msg,
1188 const char *iname,
1189 xfs_ino_t ino1,
1190 xfs_ino_t ino2)
1191 {
1192 do_warn(msg, iname, ino1, ino2);
1193 if (!no_modify) {
1194 if (verbose)
1195 do_warn(_(", marking entry to be junked\n"));
1196 else
1197 do_warn("\n");
1198 } else
1199 do_warn(_(", would junk entry\n"));
1200 return !no_modify;
1201 }
1202
1203 /* Find and invalidate all the directory's buffers. */
1204 static int
1205 dir_binval(
1206 struct xfs_trans *tp,
1207 struct xfs_inode *ip,
1208 int whichfork)
1209 {
1210 struct xfs_iext_cursor icur;
1211 struct xfs_bmbt_irec rec;
1212 struct xfs_ifork *ifp;
1213 struct xfs_da_geometry *geo;
1214 struct xfs_buf *bp;
1215 xfs_dablk_t dabno;
1216 int error = 0;
1217
1218 if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
1219 ip->i_df.if_format != XFS_DINODE_FMT_BTREE)
1220 return 0;
1221
1222 geo = tp->t_mountp->m_dir_geo;
1223 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1224 for_each_xfs_iext(ifp, &icur, &rec) {
1225 for (dabno = roundup(rec.br_startoff, geo->fsbcount);
1226 dabno < rec.br_startoff + rec.br_blockcount;
1227 dabno += geo->fsbcount) {
1228 bp = NULL;
1229 error = -libxfs_da_get_buf(tp, ip, dabno, &bp,
1230 whichfork);
1231 if (error)
1232 return error;
1233 if (!bp)
1234 continue;
1235 libxfs_trans_binval(tp, bp);
1236 libxfs_trans_brelse(tp, bp);
1237 }
1238 }
1239
1240 return error;
1241 }
1242
1243 /*
1244 * Unexpected failure during the rebuild will leave the entries in
1245 * lost+found on the next run
1246 */
1247
1248 static void
1249 longform_dir2_rebuild(
1250 struct xfs_mount *mp,
1251 xfs_ino_t ino,
1252 struct xfs_inode *ip,
1253 struct ino_tree_node *irec,
1254 int ino_offset,
1255 struct dir_hash_tab *hashtab)
1256 {
1257 int error;
1258 int nres;
1259 struct xfs_trans *tp;
1260 xfs_fileoff_t lastblock;
1261 struct xfs_inode pip;
1262 struct dir_hash_ent *p;
1263 int done = 0;
1264
1265 /*
1266 * trash directory completely and rebuild from scratch using the
1267 * name/inode pairs in the hash table
1268 */
1269
1270 do_warn(_("rebuilding directory inode %" PRIu64 "\n"), ino);
1271
1272 /*
1273 * first attempt to locate the parent inode, if it can't be
1274 * found, set it to the root inode and it'll be moved to the
1275 * orphanage later (the inode number here needs to be valid
1276 * for the libxfs_dir_init() call).
1277 */
1278 pip.i_ino = get_inode_parent(irec, ino_offset);
1279 if (pip.i_ino == NULLFSINO ||
1280 libxfs_dir_ino_validate(mp, pip.i_ino))
1281 pip.i_ino = mp->m_sb.sb_rootino;
1282
1283 nres = XFS_REMOVE_SPACE_RES(mp);
1284 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, nres, 0, 0, &tp);
1285 if (error)
1286 res_failed(error);
1287 libxfs_trans_ijoin(tp, ip, 0);
1288
1289 error = dir_binval(tp, ip, XFS_DATA_FORK);
1290 if (error)
1291 do_error(_("error %d invalidating directory %llu blocks\n"),
1292 error, (unsigned long long)ip->i_ino);
1293
1294 if ((error = -libxfs_bmap_last_offset(ip, &lastblock, XFS_DATA_FORK)))
1295 do_error(_("xfs_bmap_last_offset failed -- error - %d\n"),
1296 error);
1297
1298 /* free all data, leaf, node and freespace blocks */
1299 while (!done) {
1300 error = -libxfs_bunmapi(tp, ip, 0, lastblock, XFS_BMAPI_METADATA,
1301 0, &done);
1302 if (error) {
1303 do_warn(_("xfs_bunmapi failed -- error - %d\n"), error);
1304 goto out_bmap_cancel;
1305 }
1306 error = -libxfs_defer_finish(&tp);
1307 if (error) {
1308 do_warn(("defer_finish failed -- error - %d\n"), error);
1309 goto out_bmap_cancel;
1310 }
1311 /*
1312 * Close out trans and start the next one in the chain.
1313 */
1314 error = -libxfs_trans_roll_inode(&tp, ip);
1315 if (error)
1316 goto out_bmap_cancel;
1317 }
1318
1319 error = -libxfs_dir_init(tp, ip, &pip);
1320 if (error) {
1321 do_warn(_("xfs_dir_init failed -- error - %d\n"), error);
1322 goto out_bmap_cancel;
1323 }
1324
1325 error = -libxfs_trans_commit(tp);
1326 if (error)
1327 do_error(
1328 _("dir init failed (%d)\n"), error);
1329
1330 if (ino == mp->m_sb.sb_rootino)
1331 need_root_dotdot = 0;
1332
1333 /* go through the hash list and re-add the inodes */
1334
1335 for (p = hashtab->first; p; p = p->nextbyorder) {
1336
1337 if (p->name.name[0] == '/' || (p->name.name[0] == '.' &&
1338 (p->name.len == 1 || (p->name.len == 2 &&
1339 p->name.name[1] == '.'))))
1340 continue;
1341
1342 nres = XFS_CREATE_SPACE_RES(mp, p->name.len);
1343 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_create,
1344 nres, 0, 0, &tp);
1345 if (error)
1346 res_failed(error);
1347
1348 libxfs_trans_ijoin(tp, ip, 0);
1349
1350 error = -libxfs_dir_createname(tp, ip, &p->name, p->inum,
1351 nres);
1352 if (error) {
1353 do_warn(
1354 _("name create failed in ino %" PRIu64 " (%d), filesystem may be out of space\n"),
1355 ino, error);
1356 goto out_bmap_cancel;
1357 }
1358
1359 error = -libxfs_trans_commit(tp);
1360 if (error)
1361 do_error(
1362 _("name create failed (%d) during rebuild\n"), error);
1363 }
1364
1365 return;
1366
1367 out_bmap_cancel:
1368 libxfs_trans_cancel(tp);
1369 return;
1370 }
1371
1372
1373 /*
1374 * Kill a block in a version 2 inode.
1375 * Makes its own transaction.
1376 */
1377 static void
1378 dir2_kill_block(
1379 xfs_mount_t *mp,
1380 xfs_inode_t *ip,
1381 xfs_dablk_t da_bno,
1382 struct xfs_buf *bp)
1383 {
1384 xfs_da_args_t args;
1385 int error;
1386 int nres;
1387 xfs_trans_t *tp;
1388
1389 nres = XFS_REMOVE_SPACE_RES(mp);
1390 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, nres, 0, 0, &tp);
1391 if (error)
1392 res_failed(error);
1393 libxfs_trans_ijoin(tp, ip, 0);
1394 libxfs_trans_bjoin(tp, bp);
1395 libxfs_trans_bhold(tp, bp);
1396 memset(&args, 0, sizeof(args));
1397 args.dp = ip;
1398 args.trans = tp;
1399 args.whichfork = XFS_DATA_FORK;
1400 args.geo = mp->m_dir_geo;
1401 if (da_bno >= mp->m_dir_geo->leafblk && da_bno < mp->m_dir_geo->freeblk)
1402 error = -libxfs_da_shrink_inode(&args, da_bno, bp);
1403 else
1404 error = -libxfs_dir2_shrink_inode(&args,
1405 xfs_dir2_da_to_db(mp->m_dir_geo, da_bno), bp);
1406 if (error)
1407 do_error(_("shrink_inode failed inode %" PRIu64 " block %u\n"),
1408 ip->i_ino, da_bno);
1409 error = -libxfs_trans_commit(tp);
1410 if (error)
1411 do_error(
1412 _("directory shrink failed (%d)\n"), error);
1413 }
1414
1415 /*
1416 * process a data block, also checks for .. entry
1417 * and corrects it to match what we think .. should be
1418 */
1419 static void
1420 longform_dir2_entry_check_data(
1421 struct xfs_mount *mp,
1422 struct xfs_inode *ip,
1423 int *num_illegal,
1424 int *need_dot,
1425 struct ino_tree_node *current_irec,
1426 int current_ino_offset,
1427 struct xfs_buf *bp,
1428 struct dir_hash_tab *hashtab,
1429 freetab_t **freetabp,
1430 xfs_dablk_t da_bno,
1431 int isblock)
1432 {
1433 xfs_dir2_dataptr_t addr;
1434 xfs_dir2_leaf_entry_t *blp;
1435 xfs_dir2_block_tail_t *btp;
1436 struct xfs_dir2_data_hdr *d;
1437 xfs_dir2_db_t db;
1438 xfs_dir2_data_entry_t *dep;
1439 xfs_dir2_data_unused_t *dup;
1440 struct xfs_dir2_data_free *bf;
1441 char *endptr;
1442 int error;
1443 char fname[MAXNAMELEN + 1];
1444 freetab_t *freetab;
1445 int i;
1446 int ino_offset;
1447 xfs_ino_t inum;
1448 ino_tree_node_t *irec;
1449 int junkit;
1450 int lastfree;
1451 int len;
1452 int nbad;
1453 int needlog;
1454 int needscan;
1455 xfs_ino_t parent;
1456 char *ptr;
1457 xfs_trans_t *tp;
1458 int wantmagic;
1459 struct xfs_da_args da = {
1460 .dp = ip,
1461 .geo = mp->m_dir_geo,
1462 };
1463
1464
1465 d = bp->b_addr;
1466 ptr = (char *)d + mp->m_dir_geo->data_entry_offset;
1467 nbad = 0;
1468 needscan = needlog = 0;
1469 junkit = 0;
1470 freetab = *freetabp;
1471 if (isblock) {
1472 btp = xfs_dir2_block_tail_p(mp->m_dir_geo, d);
1473 blp = xfs_dir2_block_leaf_p(btp);
1474 endptr = (char *)blp;
1475 if (endptr > (char *)btp)
1476 endptr = (char *)btp;
1477 if (xfs_has_crc(mp))
1478 wantmagic = XFS_DIR3_BLOCK_MAGIC;
1479 else
1480 wantmagic = XFS_DIR2_BLOCK_MAGIC;
1481 } else {
1482 endptr = (char *)d + mp->m_dir_geo->blksize;
1483 if (xfs_has_crc(mp))
1484 wantmagic = XFS_DIR3_DATA_MAGIC;
1485 else
1486 wantmagic = XFS_DIR2_DATA_MAGIC;
1487 }
1488 db = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
1489
1490 /* check for data block beyond expected end */
1491 if (freetab->naents <= db) {
1492 struct freetab_ent e;
1493
1494 *freetabp = freetab = realloc(freetab, FREETAB_SIZE(db + 1));
1495 if (!freetab) {
1496 do_error(_("realloc failed in %s (%zu bytes)\n"),
1497 __func__, FREETAB_SIZE(db + 1));
1498 }
1499 e.v = NULLDATAOFF;
1500 e.s = 0;
1501 for (i = freetab->naents; i < db; i++)
1502 freetab->ents[i] = e;
1503 freetab->naents = db + 1;
1504 }
1505
1506 /* check the data block */
1507 while (ptr < endptr) {
1508
1509 /* check for freespace */
1510 dup = (xfs_dir2_data_unused_t *)ptr;
1511 if (XFS_DIR2_DATA_FREE_TAG == be16_to_cpu(dup->freetag)) {
1512
1513 /* check for invalid freespace length */
1514 if (ptr + be16_to_cpu(dup->length) > endptr ||
1515 be16_to_cpu(dup->length) == 0 ||
1516 (be16_to_cpu(dup->length) &
1517 (XFS_DIR2_DATA_ALIGN - 1)))
1518 break;
1519
1520 /* check for invalid tag */
1521 if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) !=
1522 (char *)dup - (char *)d)
1523 break;
1524
1525 /* check for block with no data entries */
1526 if ((ptr == (char *)d + mp->m_dir_geo->data_entry_offset) &&
1527 (ptr + be16_to_cpu(dup->length) >= endptr)) {
1528 junkit = 1;
1529 *num_illegal += 1;
1530 break;
1531 }
1532
1533 /* continue at the end of the freespace */
1534 ptr += be16_to_cpu(dup->length);
1535 if (ptr >= endptr)
1536 break;
1537 }
1538
1539 /* validate data entry size */
1540 dep = (xfs_dir2_data_entry_t *)ptr;
1541 if (ptr + libxfs_dir2_data_entsize(mp, dep->namelen) > endptr)
1542 break;
1543 if (be16_to_cpu(*libxfs_dir2_data_entry_tag_p(mp, dep)) !=
1544 (char *)dep - (char *)d)
1545 break;
1546 ptr += libxfs_dir2_data_entsize(mp, dep->namelen);
1547 }
1548
1549 /* did we find an empty or corrupt block? */
1550 if (ptr != endptr) {
1551 if (junkit) {
1552 do_warn(
1553 _("empty data block %u in directory inode %" PRIu64 ": "),
1554 da_bno, ip->i_ino);
1555 } else {
1556 do_warn(_
1557 ("corrupt block %u in directory inode %" PRIu64 ": "),
1558 da_bno, ip->i_ino);
1559 }
1560 if (!no_modify) {
1561 do_warn(_("junking block\n"));
1562 dir2_kill_block(mp, ip, da_bno, bp);
1563 } else {
1564 do_warn(_("would junk block\n"));
1565 }
1566 freetab->ents[db].v = NULLDATAOFF;
1567 return;
1568 }
1569
1570 /* update number of data blocks processed */
1571 if (freetab->nents < db + 1)
1572 freetab->nents = db + 1;
1573
1574 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0, &tp);
1575 if (error)
1576 res_failed(error);
1577 da.trans = tp;
1578 libxfs_trans_ijoin(tp, ip, 0);
1579 libxfs_trans_bjoin(tp, bp);
1580 libxfs_trans_bhold(tp, bp);
1581 if (be32_to_cpu(d->magic) != wantmagic) {
1582 do_warn(
1583 _("bad directory block magic # %#x for directory inode %" PRIu64 " block %d: "),
1584 be32_to_cpu(d->magic), ip->i_ino, da_bno);
1585 if (!no_modify) {
1586 do_warn(_("fixing magic # to %#x\n"), wantmagic);
1587 d->magic = cpu_to_be32(wantmagic);
1588 needlog = 1;
1589 } else
1590 do_warn(_("would fix magic # to %#x\n"), wantmagic);
1591 }
1592 lastfree = 0;
1593 ptr = (char *)d + mp->m_dir_geo->data_entry_offset;
1594 /*
1595 * look at each entry. reference inode pointed to by each
1596 * entry in the incore inode tree.
1597 * if not a directory, set reached flag, increment link count
1598 * if a directory and reached, mark entry as to be deleted.
1599 * if a directory, check to see if recorded parent
1600 * matches current inode #,
1601 * if so, then set reached flag, increment link count
1602 * of current and child dir inodes, push the child
1603 * directory inode onto the directory stack.
1604 * if current inode != parent, then mark entry to be deleted.
1605 */
1606 while (ptr < endptr) {
1607 dup = (xfs_dir2_data_unused_t *)ptr;
1608 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
1609 if (lastfree) {
1610 do_warn(
1611 _("directory inode %" PRIu64 " block %u has consecutive free entries: "),
1612 ip->i_ino, da_bno);
1613 if (!no_modify) {
1614
1615 do_warn(_("joining together\n"));
1616 len = be16_to_cpu(dup->length);
1617 libxfs_dir2_data_use_free(&da, bp, dup,
1618 ptr - (char *)d, len, &needlog,
1619 &needscan);
1620 libxfs_dir2_data_make_free(&da, bp,
1621 ptr - (char *)d, len, &needlog,
1622 &needscan);
1623 } else
1624 do_warn(_("would join together\n"));
1625 }
1626 ptr += be16_to_cpu(dup->length);
1627 lastfree = 1;
1628 continue;
1629 }
1630 addr = xfs_dir2_db_off_to_dataptr(mp->m_dir_geo, db,
1631 ptr - (char *)d);
1632 dep = (xfs_dir2_data_entry_t *)ptr;
1633 ptr += libxfs_dir2_data_entsize(mp, dep->namelen);
1634 inum = be64_to_cpu(dep->inumber);
1635 lastfree = 0;
1636 /*
1637 * skip bogus entries (leading '/'). they'll be deleted
1638 * later. must still log it, else we leak references to
1639 * buffers.
1640 */
1641 if (dep->name[0] == '/') {
1642 nbad++;
1643 if (!no_modify)
1644 libxfs_dir2_data_log_entry(&da, bp, dep);
1645 continue;
1646 }
1647
1648 memmove(fname, dep->name, dep->namelen);
1649 fname[dep->namelen] = '\0';
1650 ASSERT(inum != NULLFSINO);
1651
1652 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, inum),
1653 XFS_INO_TO_AGINO(mp, inum));
1654 if (irec == NULL) {
1655 nbad++;
1656 if (entry_junked(
1657 _("entry \"%s\" in directory inode %" PRIu64 " points to non-existent inode %" PRIu64 ""),
1658 fname, ip->i_ino, inum)) {
1659 dep->name[0] = '/';
1660 libxfs_dir2_data_log_entry(&da, bp, dep);
1661 }
1662 continue;
1663 }
1664 ino_offset = XFS_INO_TO_AGINO(mp, inum) - irec->ino_startnum;
1665
1666 /*
1667 * if it's a free inode, blow out the entry.
1668 * by now, any inode that we think is free
1669 * really is free.
1670 */
1671 if (is_inode_free(irec, ino_offset)) {
1672 nbad++;
1673 if (entry_junked(
1674 _("entry \"%s\" in directory inode %" PRIu64 " points to free inode %" PRIu64),
1675 fname, ip->i_ino, inum)) {
1676 dep->name[0] = '/';
1677 libxfs_dir2_data_log_entry(&da, bp, dep);
1678 }
1679 continue;
1680 }
1681
1682 /*
1683 * check if this inode is lost+found dir in the root
1684 */
1685 if (inum == mp->m_sb.sb_rootino && strcmp(fname, ORPHANAGE) == 0) {
1686 /*
1687 * if it's not a directory, trash it
1688 */
1689 if (!inode_isadir(irec, ino_offset)) {
1690 nbad++;
1691 if (entry_junked(
1692 _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"),
1693 ORPHANAGE, inum, ip->i_ino)) {
1694 dep->name[0] = '/';
1695 libxfs_dir2_data_log_entry(&da, bp, dep);
1696 }
1697 continue;
1698 }
1699 /*
1700 * if this is a dup, it will be picked up below,
1701 * otherwise, mark it as the orphanage for later.
1702 */
1703 if (!orphanage_ino)
1704 orphanage_ino = inum;
1705 }
1706
1707 /*
1708 * check for duplicate names in directory.
1709 */
1710 if (!dir_hash_add(mp, hashtab, addr, inum, dep->namelen,
1711 dep->name, libxfs_dir2_data_get_ftype(mp, dep))) {
1712 nbad++;
1713 if (entry_junked(
1714 _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
1715 fname, inum, ip->i_ino)) {
1716 dep->name[0] = '/';
1717 libxfs_dir2_data_log_entry(&da, bp, dep);
1718 }
1719 if (inum == orphanage_ino)
1720 orphanage_ino = 0;
1721 continue;
1722 }
1723
1724 /*
1725 * if just scanning to rebuild a directory due to a ".."
1726 * update, just continue
1727 */
1728 if (dotdot_update)
1729 continue;
1730
1731 /*
1732 * skip the '..' entry since it's checked when the
1733 * directory is reached by something else. if it never
1734 * gets reached, it'll be moved to the orphanage and we'll
1735 * take care of it then. If it doesn't exist at all, the
1736 * directory needs to be rebuilt first before being added
1737 * to the orphanage.
1738 */
1739 if (dep->namelen == 2 && dep->name[0] == '.' &&
1740 dep->name[1] == '.') {
1741 if (da_bno != 0) {
1742 /* ".." should be in the first block */
1743 nbad++;
1744 if (entry_junked(
1745 _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is not in the the first block"), fname,
1746 inum, ip->i_ino)) {
1747 dir_hash_junkit(hashtab, addr);
1748 dep->name[0] = '/';
1749 libxfs_dir2_data_log_entry(&da, bp, dep);
1750 }
1751 }
1752 continue;
1753 }
1754 ASSERT(no_modify || libxfs_verify_dir_ino(mp, inum));
1755 /*
1756 * special case the . entry. we know there's only one
1757 * '.' and only '.' points to itself because bogus entries
1758 * got trashed in phase 3 if there were > 1.
1759 * bump up link count for '.' but don't set reached
1760 * until we're actually reached by another directory
1761 * '..' is already accounted for or will be taken care
1762 * of when directory is moved to orphanage.
1763 */
1764 if (ip->i_ino == inum) {
1765 ASSERT(no_modify ||
1766 (dep->name[0] == '.' && dep->namelen == 1));
1767 add_inode_ref(current_irec, current_ino_offset);
1768 if (da_bno != 0 ||
1769 dep != (void *)d + mp->m_dir_geo->data_entry_offset) {
1770 /* "." should be the first entry */
1771 nbad++;
1772 if (entry_junked(
1773 _("entry \"%s\" in dir %" PRIu64 " is not the first entry"),
1774 fname, inum, ip->i_ino)) {
1775 dir_hash_junkit(hashtab, addr);
1776 dep->name[0] = '/';
1777 libxfs_dir2_data_log_entry(&da, bp, dep);
1778 }
1779 }
1780 *need_dot = 0;
1781 continue;
1782 }
1783 /*
1784 * skip entries with bogus inumbers if we're in no modify mode
1785 */
1786 if (no_modify && !libxfs_verify_dir_ino(mp, inum))
1787 continue;
1788
1789 /* validate ftype field if supported */
1790 if (xfs_has_ftype(mp)) {
1791 uint8_t dir_ftype;
1792 uint8_t ino_ftype;
1793
1794 dir_ftype = libxfs_dir2_data_get_ftype(mp, dep);
1795 ino_ftype = get_inode_ftype(irec, ino_offset);
1796
1797 if (dir_ftype != ino_ftype) {
1798 if (no_modify) {
1799 do_warn(
1800 _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
1801 dir_ftype, ino_ftype,
1802 ip->i_ino, inum);
1803 } else {
1804 do_warn(
1805 _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
1806 dir_ftype, ino_ftype,
1807 ip->i_ino, inum);
1808 libxfs_dir2_data_put_ftype(mp, dep, ino_ftype);
1809 libxfs_dir2_data_log_entry(&da, bp, dep);
1810 dir_hash_update_ftype(hashtab, addr,
1811 ino_ftype);
1812 }
1813 }
1814 }
1815
1816 /*
1817 * check easy case first, regular inode, just bump
1818 * the link count and continue
1819 */
1820 if (!inode_isadir(irec, ino_offset)) {
1821 add_inode_reached(irec, ino_offset);
1822 continue;
1823 }
1824 parent = get_inode_parent(irec, ino_offset);
1825 ASSERT(parent != 0);
1826 junkit = 0;
1827 /*
1828 * bump up the link counts in parent and child
1829 * directory but if the link doesn't agree with
1830 * the .. in the child, blow out the entry.
1831 * if the directory has already been reached,
1832 * blow away the entry also.
1833 */
1834 if (is_inode_reached(irec, ino_offset)) {
1835 junkit = 1;
1836 do_warn(
1837 _("entry \"%s\" in dir %" PRIu64" points to an already connected directory inode %" PRIu64 "\n"),
1838 fname, ip->i_ino, inum);
1839 } else if (parent == ip->i_ino) {
1840 add_inode_reached(irec, ino_offset);
1841 add_inode_ref(current_irec, current_ino_offset);
1842 } else if (parent == NULLFSINO) {
1843 /* ".." was missing, but this entry refers to it,
1844 so, set it as the parent and mark for rebuild */
1845 do_warn(
1846 _("entry \"%s\" in dir ino %" PRIu64 " doesn't have a .. entry, will set it in ino %" PRIu64 ".\n"),
1847 fname, ip->i_ino, inum);
1848 set_inode_parent(irec, ino_offset, ip->i_ino);
1849 add_inode_reached(irec, ino_offset);
1850 add_inode_ref(current_irec, current_ino_offset);
1851 add_dotdot_update(XFS_INO_TO_AGNO(mp, inum), irec,
1852 ino_offset);
1853 } else {
1854 junkit = 1;
1855 do_warn(
1856 _("entry \"%s\" in dir inode %" PRIu64 " inconsistent with .. value (%" PRIu64 ") in ino %" PRIu64 "\n"),
1857 fname, ip->i_ino, parent, inum);
1858 }
1859 if (junkit) {
1860 if (inum == orphanage_ino)
1861 orphanage_ino = 0;
1862 nbad++;
1863 if (!no_modify) {
1864 dir_hash_junkit(hashtab, addr);
1865 dep->name[0] = '/';
1866 libxfs_dir2_data_log_entry(&da, bp, dep);
1867 if (verbose)
1868 do_warn(
1869 _("\twill clear entry \"%s\"\n"),
1870 fname);
1871 } else {
1872 do_warn(_("\twould clear entry \"%s\"\n"),
1873 fname);
1874 }
1875 }
1876 }
1877 *num_illegal += nbad;
1878 if (needscan)
1879 libxfs_dir2_data_freescan(mp, d, &i);
1880 if (needlog)
1881 libxfs_dir2_data_log_header(&da, bp);
1882 error = -libxfs_trans_commit(tp);
1883 if (error)
1884 do_error(
1885 _("directory block fixing failed (%d)\n"), error);
1886
1887 /* record the largest free space in the freetab for later checking */
1888 bf = libxfs_dir2_data_bestfree_p(mp, d);
1889 freetab->ents[db].v = be16_to_cpu(bf[0].length);
1890 freetab->ents[db].s = 0;
1891 }
1892
1893 /* check v5 metadata */
1894 static int
1895 __check_dir3_header(
1896 struct xfs_mount *mp,
1897 struct xfs_buf *bp,
1898 xfs_ino_t ino,
1899 __be64 owner,
1900 __be64 blkno,
1901 uuid_t *uuid)
1902 {
1903
1904 /* verify owner */
1905 if (be64_to_cpu(owner) != ino) {
1906 do_warn(
1907 _("expected owner inode %" PRIu64 ", got %llu, directory block %" PRIu64 "\n"),
1908 ino, (unsigned long long)be64_to_cpu(owner), xfs_buf_daddr(bp));
1909 return 1;
1910 }
1911 /* verify block number */
1912 if (be64_to_cpu(blkno) != xfs_buf_daddr(bp)) {
1913 do_warn(
1914 _("expected block %" PRIu64 ", got %llu, directory inode %" PRIu64 "\n"),
1915 xfs_buf_daddr(bp), (unsigned long long)be64_to_cpu(blkno), ino);
1916 return 1;
1917 }
1918 /* verify uuid */
1919 if (platform_uuid_compare(uuid, &mp->m_sb.sb_meta_uuid) != 0) {
1920 do_warn(
1921 _("wrong FS UUID, directory inode %" PRIu64 " block %" PRIu64 "\n"),
1922 ino, xfs_buf_daddr(bp));
1923 return 1;
1924 }
1925
1926 return 0;
1927 }
1928
1929 static int
1930 check_da3_header(
1931 struct xfs_mount *mp,
1932 struct xfs_buf *bp,
1933 xfs_ino_t ino)
1934 {
1935 struct xfs_da3_blkinfo *info = bp->b_addr;
1936
1937 return __check_dir3_header(mp, bp, ino, info->owner, info->blkno,
1938 &info->uuid);
1939 }
1940
1941 static int
1942 check_dir3_header(
1943 struct xfs_mount *mp,
1944 struct xfs_buf *bp,
1945 xfs_ino_t ino)
1946 {
1947 struct xfs_dir3_blk_hdr *info = bp->b_addr;
1948
1949 return __check_dir3_header(mp, bp, ino, info->owner, info->blkno,
1950 &info->uuid);
1951 }
1952
1953 /*
1954 * Check contents of leaf-form block.
1955 */
1956 static int
1957 longform_dir2_check_leaf(
1958 struct xfs_mount *mp,
1959 struct xfs_inode *ip,
1960 struct dir_hash_tab *hashtab,
1961 struct freetab *freetab)
1962 {
1963 int badtail;
1964 __be16 *bestsp;
1965 struct xfs_buf *bp;
1966 xfs_dablk_t da_bno;
1967 int i;
1968 xfs_dir2_leaf_t *leaf;
1969 xfs_dir2_leaf_tail_t *ltp;
1970 int seeval;
1971 struct xfs_dir2_leaf_entry *ents;
1972 struct xfs_dir3_icleaf_hdr leafhdr;
1973 int error;
1974 int fixit = 0;
1975
1976 da_bno = mp->m_dir_geo->leafblk;
1977 error = dir_read_buf(ip, da_bno, &bp, &xfs_dir3_leaf1_buf_ops, &fixit);
1978 if (error == EFSBADCRC || error == EFSCORRUPTED || fixit) {
1979 do_warn(
1980 _("leaf block %u for directory inode %" PRIu64 " bad CRC\n"),
1981 da_bno, ip->i_ino);
1982 return 1;
1983 } else if (error) {
1984 do_error(
1985 _("can't read block %u for directory inode %" PRIu64 ", error %d\n"),
1986 da_bno, ip->i_ino, error);
1987 /* NOTREACHED */
1988 }
1989
1990 leaf = bp->b_addr;
1991 libxfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, leaf);
1992 ents = leafhdr.ents;
1993 ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, leaf);
1994 bestsp = xfs_dir2_leaf_bests_p(ltp);
1995 if (!(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
1996 leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) ||
1997 leafhdr.forw || leafhdr.back ||
1998 leafhdr.count < leafhdr.stale ||
1999 leafhdr.count > mp->m_dir_geo->leaf_max_ents ||
2000 (char *)&ents[leafhdr.count] > (char *)bestsp) {
2001 do_warn(
2002 _("leaf block %u for directory inode %" PRIu64 " bad header\n"),
2003 da_bno, ip->i_ino);
2004 libxfs_buf_relse(bp);
2005 return 1;
2006 }
2007
2008 if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {
2009 error = check_da3_header(mp, bp, ip->i_ino);
2010 if (error) {
2011 libxfs_buf_relse(bp);
2012 return error;
2013 }
2014 }
2015
2016 seeval = dir_hash_see_all(hashtab, ents, leafhdr.count, leafhdr.stale);
2017 if (dir_hash_check(hashtab, ip, seeval)) {
2018 libxfs_buf_relse(bp);
2019 return 1;
2020 }
2021 badtail = freetab->nents != be32_to_cpu(ltp->bestcount);
2022 for (i = 0; !badtail && i < be32_to_cpu(ltp->bestcount); i++) {
2023 freetab->ents[i].s = 1;
2024 badtail = freetab->ents[i].v != be16_to_cpu(bestsp[i]);
2025 }
2026 if (badtail) {
2027 do_warn(
2028 _("leaf block %u for directory inode %" PRIu64 " bad tail\n"),
2029 da_bno, ip->i_ino);
2030 libxfs_buf_relse(bp);
2031 return 1;
2032 }
2033 libxfs_buf_relse(bp);
2034 return fixit;
2035 }
2036
2037 /*
2038 * Check contents of the node blocks (leaves)
2039 * Looks for matching hash values for the data entries.
2040 */
2041 static int
2042 longform_dir2_check_node(
2043 struct xfs_mount *mp,
2044 struct xfs_inode *ip,
2045 struct dir_hash_tab *hashtab,
2046 struct freetab *freetab)
2047 {
2048 struct xfs_buf *bp;
2049 xfs_dablk_t da_bno;
2050 xfs_dir2_db_t fdb;
2051 xfs_dir2_free_t *free;
2052 int i;
2053 xfs_dir2_leaf_t *leaf;
2054 xfs_fileoff_t next_da_bno;
2055 int seeval = 0;
2056 int used;
2057 struct xfs_dir2_leaf_entry *ents;
2058 struct xfs_dir3_icleaf_hdr leafhdr;
2059 struct xfs_dir3_icfree_hdr freehdr;
2060 __be16 *bests;
2061 int error;
2062 int fixit = 0;
2063
2064 for (da_bno = mp->m_dir_geo->leafblk, next_da_bno = 0;
2065 next_da_bno != NULLFILEOFF && da_bno < mp->m_dir_geo->freeblk;
2066 da_bno = (xfs_dablk_t)next_da_bno) {
2067 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2068 if (bmap_next_offset(ip, &next_da_bno))
2069 break;
2070
2071 /*
2072 * we need to use the da3 node verifier here as it handles the
2073 * fact that reading the leaf hash tree blocks can return either
2074 * leaf or node blocks and calls the correct verifier. If we get
2075 * a node block, then we'll skip it below based on a magic
2076 * number check.
2077 */
2078 error = dir_read_buf(ip, da_bno, &bp, &xfs_da3_node_buf_ops,
2079 &fixit);
2080 if (error) {
2081 do_warn(
2082 _("can't read leaf block %u for directory inode %" PRIu64 ", error %d\n"),
2083 da_bno, ip->i_ino, error);
2084 return 1;
2085 }
2086 leaf = bp->b_addr;
2087 libxfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, leaf);
2088 ents = leafhdr.ents;
2089 if (!(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
2090 leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
2091 leafhdr.magic == XFS_DA_NODE_MAGIC ||
2092 leafhdr.magic == XFS_DA3_NODE_MAGIC)) {
2093 do_warn(
2094 _("unknown magic number %#x for block %u in directory inode %" PRIu64 "\n"),
2095 leafhdr.magic, da_bno, ip->i_ino);
2096 libxfs_buf_relse(bp);
2097 return 1;
2098 }
2099
2100 /* check v5 metadata */
2101 if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
2102 leafhdr.magic == XFS_DA3_NODE_MAGIC) {
2103 error = check_da3_header(mp, bp, ip->i_ino);
2104 if (error) {
2105 libxfs_buf_relse(bp);
2106 return error;
2107 }
2108 }
2109
2110 /* ignore nodes */
2111 if (leafhdr.magic == XFS_DA_NODE_MAGIC ||
2112 leafhdr.magic == XFS_DA3_NODE_MAGIC) {
2113 libxfs_buf_relse(bp);
2114 continue;
2115 }
2116
2117 /*
2118 * If there's a validator error, we need to ensure that we got
2119 * the right ops on the buffer for when we write it back out.
2120 */
2121 bp->b_ops = &xfs_dir3_leafn_buf_ops;
2122 if (leafhdr.count > mp->m_dir_geo->leaf_max_ents ||
2123 leafhdr.count < leafhdr.stale) {
2124 do_warn(
2125 _("leaf block %u for directory inode %" PRIu64 " bad header\n"),
2126 da_bno, ip->i_ino);
2127 libxfs_buf_relse(bp);
2128 return 1;
2129 }
2130 seeval = dir_hash_see_all(hashtab, ents,
2131 leafhdr.count, leafhdr.stale);
2132 libxfs_buf_relse(bp);
2133 if (seeval != DIR_HASH_CK_OK)
2134 return 1;
2135 }
2136 if (dir_hash_check(hashtab, ip, seeval))
2137 return 1;
2138
2139 for (da_bno = mp->m_dir_geo->freeblk, next_da_bno = 0;
2140 next_da_bno != NULLFILEOFF;
2141 da_bno = (xfs_dablk_t)next_da_bno) {
2142 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2143 if (bmap_next_offset(ip, &next_da_bno))
2144 break;
2145
2146 error = dir_read_buf(ip, da_bno, &bp, &xfs_dir3_free_buf_ops,
2147 &fixit);
2148 if (error) {
2149 do_warn(
2150 _("can't read freespace block %u for directory inode %" PRIu64 ", error %d\n"),
2151 da_bno, ip->i_ino, error);
2152 return 1;
2153 }
2154 free = bp->b_addr;
2155 libxfs_dir2_free_hdr_from_disk(mp, &freehdr, free);
2156 bests = freehdr.bests;
2157 fdb = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
2158 if (!(freehdr.magic == XFS_DIR2_FREE_MAGIC ||
2159 freehdr.magic == XFS_DIR3_FREE_MAGIC) ||
2160 freehdr.firstdb !=
2161 (fdb - xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) *
2162 mp->m_dir_geo->free_max_bests ||
2163 freehdr.nvalid < freehdr.nused) {
2164 do_warn(
2165 _("free block %u for directory inode %" PRIu64 " bad header\n"),
2166 da_bno, ip->i_ino);
2167 libxfs_buf_relse(bp);
2168 return 1;
2169 }
2170
2171 if (freehdr.magic == XFS_DIR3_FREE_MAGIC) {
2172 error = check_dir3_header(mp, bp, ip->i_ino);
2173 if (error) {
2174 libxfs_buf_relse(bp);
2175 return error;
2176 }
2177 }
2178 for (i = used = 0; i < freehdr.nvalid; i++) {
2179 if (i + freehdr.firstdb >= freetab->nents ||
2180 freetab->ents[i + freehdr.firstdb].v !=
2181 be16_to_cpu(bests[i])) {
2182 do_warn(
2183 _("free block %u entry %i for directory ino %" PRIu64 " bad\n"),
2184 da_bno, i, ip->i_ino);
2185 libxfs_buf_relse(bp);
2186 return 1;
2187 }
2188 used += be16_to_cpu(bests[i]) != NULLDATAOFF;
2189 freetab->ents[i + freehdr.firstdb].s = 1;
2190 }
2191 if (used != freehdr.nused) {
2192 do_warn(
2193 _("free block %u for directory inode %" PRIu64 " bad nused\n"),
2194 da_bno, ip->i_ino);
2195 libxfs_buf_relse(bp);
2196 return 1;
2197 }
2198 libxfs_buf_relse(bp);
2199 }
2200 for (i = 0; i < freetab->nents; i++) {
2201 if ((freetab->ents[i].s == 0) &&
2202 (freetab->ents[i].v != NULLDATAOFF)) {
2203 do_warn(
2204 _("missing freetab entry %u for directory inode %" PRIu64 "\n"),
2205 i, ip->i_ino);
2206 return 1;
2207 }
2208 }
2209 return fixit;
2210 }
2211
2212 /*
2213 * If a directory is corrupt, we need to read in as many entries as possible,
2214 * destroy the entry and create a new one with recovered name/inode pairs.
2215 * (ie. get libxfs to do all the grunt work)
2216 */
2217 static void
2218 longform_dir2_entry_check(
2219 struct xfs_mount *mp,
2220 xfs_ino_t ino,
2221 struct xfs_inode *ip,
2222 int *num_illegal,
2223 int *need_dot,
2224 struct ino_tree_node *irec,
2225 int ino_offset,
2226 struct dir_hash_tab *hashtab)
2227 {
2228 struct xfs_buf *bp = NULL;
2229 xfs_dablk_t da_bno;
2230 freetab_t *freetab;
2231 int i;
2232 int isblock;
2233 int isleaf;
2234 xfs_fileoff_t next_da_bno;
2235 int seeval;
2236 int fixit = 0;
2237 struct xfs_da_args args;
2238
2239 *need_dot = 1;
2240 freetab = malloc(FREETAB_SIZE(ip->i_disk_size / mp->m_dir_geo->blksize));
2241 if (!freetab) {
2242 do_error(_("malloc failed in %s (%" PRId64 " bytes)\n"),
2243 __func__,
2244 FREETAB_SIZE(ip->i_disk_size / mp->m_dir_geo->blksize));
2245 exit(1);
2246 }
2247 freetab->naents = ip->i_disk_size / mp->m_dir_geo->blksize;
2248 freetab->nents = 0;
2249 for (i = 0; i < freetab->naents; i++) {
2250 freetab->ents[i].v = NULLDATAOFF;
2251 freetab->ents[i].s = 0;
2252 }
2253
2254 /* is this a block, leaf, or node directory? */
2255 args.dp = ip;
2256 args.geo = mp->m_dir_geo;
2257 libxfs_dir2_isblock(&args, &isblock);
2258 libxfs_dir2_isleaf(&args, &isleaf);
2259
2260 /* check directory "data" blocks (ie. name/inode pairs) */
2261 for (da_bno = 0, next_da_bno = 0;
2262 next_da_bno != NULLFILEOFF && da_bno < mp->m_dir_geo->leafblk;
2263 da_bno = (xfs_dablk_t)next_da_bno) {
2264 const struct xfs_buf_ops *ops;
2265 int error;
2266 struct xfs_dir2_data_hdr *d;
2267
2268 next_da_bno = da_bno + mp->m_dir_geo->fsbcount - 1;
2269 if (bmap_next_offset(ip, &next_da_bno)) {
2270 /*
2271 * if this is the first block, there isn't anything we
2272 * can recover so we just trash it.
2273 */
2274 if (da_bno == 0) {
2275 fixit++;
2276 goto out_fix;
2277 }
2278 break;
2279 }
2280
2281 if (isblock)
2282 ops = &xfs_dir3_block_buf_ops;
2283 else
2284 ops = &xfs_dir3_data_buf_ops;
2285
2286 error = dir_read_buf(ip, da_bno, &bp, ops, &fixit);
2287 if (error) {
2288 do_warn(
2289 _("can't read data block %u for directory inode %" PRIu64 " error %d\n"),
2290 da_bno, ino, error);
2291 *num_illegal += 1;
2292
2293 /*
2294 * we try to read all "data" blocks, but if we are in
2295 * block form and we fail, there isn't anything else to
2296 * read, and nothing we can do but trash it.
2297 */
2298 if (isblock) {
2299 fixit++;
2300 goto out_fix;
2301 }
2302 continue;
2303 }
2304
2305 /* check v5 metadata */
2306 d = bp->b_addr;
2307 if (be32_to_cpu(d->magic) == XFS_DIR3_BLOCK_MAGIC ||
2308 be32_to_cpu(d->magic) == XFS_DIR3_DATA_MAGIC) {
2309 error = check_dir3_header(mp, bp, ino);
2310 if (error) {
2311 fixit++;
2312 if (isblock)
2313 goto out_fix;
2314 continue;
2315 }
2316 }
2317
2318 longform_dir2_entry_check_data(mp, ip, num_illegal, need_dot,
2319 irec, ino_offset, bp, hashtab,
2320 &freetab, da_bno, isblock);
2321 if (isblock)
2322 break;
2323
2324 libxfs_buf_relse(bp);
2325 }
2326 fixit |= (*num_illegal != 0) || dir2_is_badino(ino) || *need_dot;
2327
2328 if (!dotdot_update) {
2329 /* check btree and freespace */
2330 if (isblock) {
2331 struct xfs_dir2_data_hdr *block;
2332 xfs_dir2_block_tail_t *btp;
2333 xfs_dir2_leaf_entry_t *blp;
2334
2335 block = bp->b_addr;
2336 btp = xfs_dir2_block_tail_p(mp->m_dir_geo, block);
2337 blp = xfs_dir2_block_leaf_p(btp);
2338 seeval = dir_hash_see_all(hashtab, blp,
2339 be32_to_cpu(btp->count),
2340 be32_to_cpu(btp->stale));
2341 if (dir_hash_check(hashtab, ip, seeval))
2342 fixit |= 1;
2343 } else if (isleaf) {
2344 fixit |= longform_dir2_check_leaf(mp, ip, hashtab,
2345 freetab);
2346 } else {
2347 fixit |= longform_dir2_check_node(mp, ip, hashtab,
2348 freetab);
2349 }
2350 }
2351 out_fix:
2352 if (isblock && bp)
2353 libxfs_buf_relse(bp);
2354
2355 if (!no_modify && (fixit || dotdot_update)) {
2356 longform_dir2_rebuild(mp, ino, ip, irec, ino_offset, hashtab);
2357 *num_illegal = 0;
2358 *need_dot = 0;
2359 } else {
2360 if (fixit || dotdot_update)
2361 do_warn(
2362 _("would rebuild directory inode %" PRIu64 "\n"), ino);
2363 }
2364
2365 free(freetab);
2366 }
2367
2368 /*
2369 * shortform directory v2 processing routines -- entry verification and
2370 * bad entry deletion (pruning).
2371 */
2372 static struct xfs_dir2_sf_entry *
2373 shortform_dir2_junk(
2374 struct xfs_mount *mp,
2375 struct xfs_dir2_sf_hdr *sfp,
2376 struct xfs_dir2_sf_entry *sfep,
2377 xfs_ino_t lino,
2378 int *max_size,
2379 int *index,
2380 int *bytes_deleted,
2381 int *ino_dirty)
2382 {
2383 struct xfs_dir2_sf_entry *next_sfep;
2384 int next_len;
2385 int next_elen;
2386
2387 if (lino == orphanage_ino)
2388 orphanage_ino = 0;
2389
2390 next_elen = libxfs_dir2_sf_entsize(mp, sfp, sfep->namelen);
2391 next_sfep = libxfs_dir2_sf_nextentry(mp, sfp, sfep);
2392
2393 /*
2394 * if we are just checking, simply return the pointer to the next entry
2395 * here so that the checking loop can continue.
2396 */
2397 if (no_modify) {
2398 do_warn(_("would junk entry\n"));
2399 return next_sfep;
2400 }
2401
2402 /*
2403 * now move all the remaining entries down over the junked entry and
2404 * clear the newly unused bytes at the tail of the directory region.
2405 */
2406 next_len = *max_size - ((intptr_t)next_sfep - (intptr_t)sfp);
2407 *max_size -= next_elen;
2408 *bytes_deleted += next_elen;
2409
2410 memmove(sfep, next_sfep, next_len);
2411 memset((void *)((intptr_t)sfep + next_len), 0, next_elen);
2412 sfp->count -= 1;
2413 *ino_dirty = 1;
2414
2415 /*
2416 * WARNING: drop the index i by one so it matches the decremented count
2417 * for accurate comparisons in the loop test
2418 */
2419 (*index)--;
2420
2421 if (verbose)
2422 do_warn(_("junking entry\n"));
2423 else
2424 do_warn("\n");
2425 return sfep;
2426 }
2427
2428 static void
2429 shortform_dir2_entry_check(
2430 struct xfs_mount *mp,
2431 xfs_ino_t ino,
2432 struct xfs_inode *ip,
2433 int *ino_dirty,
2434 struct ino_tree_node *current_irec,
2435 int current_ino_offset,
2436 struct dir_hash_tab *hashtab)
2437 {
2438 xfs_ino_t lino;
2439 xfs_ino_t parent;
2440 struct xfs_dir2_sf_hdr *sfp;
2441 struct xfs_dir2_sf_entry *sfep;
2442 struct xfs_dir2_sf_entry *next_sfep;
2443 struct xfs_ifork *ifp;
2444 struct ino_tree_node *irec;
2445 int max_size;
2446 int ino_offset;
2447 int i;
2448 int bad_sfnamelen;
2449 int namelen;
2450 int bytes_deleted;
2451 char fname[MAXNAMELEN + 1];
2452 int i8;
2453
2454 ifp = &ip->i_df;
2455 sfp = (struct xfs_dir2_sf_hdr *) ifp->if_u1.if_data;
2456 *ino_dirty = 0;
2457 bytes_deleted = 0;
2458
2459 max_size = ifp->if_bytes;
2460 ASSERT(ip->i_disk_size <= ifp->if_bytes);
2461
2462 /*
2463 * if just rebuild a directory due to a "..", update and return
2464 */
2465 if (dotdot_update) {
2466 parent = get_inode_parent(current_irec, current_ino_offset);
2467 if (no_modify) {
2468 do_warn(
2469 _("would set .. in sf dir inode %" PRIu64 " to %" PRIu64 "\n"),
2470 ino, parent);
2471 } else {
2472 do_warn(
2473 _("setting .. in sf dir inode %" PRIu64 " to %" PRIu64 "\n"),
2474 ino, parent);
2475 libxfs_dir2_sf_put_parent_ino(sfp, parent);
2476 *ino_dirty = 1;
2477 }
2478 return;
2479 }
2480
2481 /*
2482 * no '.' entry in shortform dirs, just bump up ref count by 1
2483 * '..' was already (or will be) accounted for and checked when
2484 * the directory is reached or will be taken care of when the
2485 * directory is moved to orphanage.
2486 */
2487 add_inode_ref(current_irec, current_ino_offset);
2488
2489 /*
2490 * Initialise i8 counter -- the parent inode number counts as well.
2491 */
2492 i8 = libxfs_dir2_sf_get_parent_ino(sfp) > XFS_DIR2_MAX_SHORT_INUM;
2493
2494 /*
2495 * now run through entries, stop at first bad entry, don't need
2496 * to skip over '..' since that's encoded in its own field and
2497 * no need to worry about '.' since it doesn't exist.
2498 */
2499 sfep = next_sfep = xfs_dir2_sf_firstentry(sfp);
2500
2501 for (i = 0; i < sfp->count && max_size >
2502 (intptr_t)next_sfep - (intptr_t)sfp;
2503 sfep = next_sfep, i++) {
2504 bad_sfnamelen = 0;
2505
2506 lino = libxfs_dir2_sf_get_ino(mp, sfp, sfep);
2507
2508 namelen = sfep->namelen;
2509
2510 ASSERT(no_modify || namelen > 0);
2511
2512 if (no_modify && namelen == 0) {
2513 /*
2514 * if we're really lucky, this is
2515 * the last entry in which case we
2516 * can use the dir size to set the
2517 * namelen value. otherwise, forget
2518 * it because we're not going to be
2519 * able to find the next entry.
2520 */
2521 bad_sfnamelen = 1;
2522
2523 if (i == sfp->count - 1) {
2524 namelen = ip->i_disk_size -
2525 ((intptr_t) &sfep->name[0] -
2526 (intptr_t) sfp);
2527 } else {
2528 /*
2529 * don't process the rest of the directory,
2530 * break out of processing loop
2531 */
2532 break;
2533 }
2534 } else if (no_modify && (intptr_t) sfep - (intptr_t) sfp +
2535 + libxfs_dir2_sf_entsize(mp, sfp, sfep->namelen)
2536 > ip->i_disk_size) {
2537 bad_sfnamelen = 1;
2538
2539 if (i == sfp->count - 1) {
2540 namelen = ip->i_disk_size -
2541 ((intptr_t) &sfep->name[0] -
2542 (intptr_t) sfp);
2543 } else {
2544 /*
2545 * don't process the rest of the directory,
2546 * break out of processing loop
2547 */
2548 break;
2549 }
2550 }
2551
2552 memmove(fname, sfep->name, sfep->namelen);
2553 fname[sfep->namelen] = '\0';
2554
2555 ASSERT(no_modify || (lino != NULLFSINO && lino != 0));
2556 ASSERT(no_modify || libxfs_verify_dir_ino(mp, lino));
2557
2558 /*
2559 * Also skip entries with bogus inode numbers if we're
2560 * in no modify mode.
2561 */
2562
2563 if (no_modify && !libxfs_verify_dir_ino(mp, lino)) {
2564 next_sfep = libxfs_dir2_sf_nextentry(mp, sfp, sfep);
2565 continue;
2566 }
2567
2568 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, lino),
2569 XFS_INO_TO_AGINO(mp, lino));
2570
2571 if (irec == NULL) {
2572 do_warn(
2573 _("entry \"%s\" in shortform directory %" PRIu64 " references non-existent inode %" PRIu64 "\n"),
2574 fname, ino, lino);
2575 next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2576 &max_size, &i, &bytes_deleted,
2577 ino_dirty);
2578 continue;
2579 }
2580
2581 ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum;
2582
2583 /*
2584 * if it's a free inode, blow out the entry.
2585 * by now, any inode that we think is free
2586 * really is free.
2587 */
2588 if (is_inode_free(irec, ino_offset)) {
2589 do_warn(
2590 _("entry \"%s\" in shortform directory inode %" PRIu64 " points to free inode %" PRIu64 "\n"),
2591 fname, ino, lino);
2592 next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2593 &max_size, &i, &bytes_deleted,
2594 ino_dirty);
2595 continue;
2596 }
2597 /*
2598 * check if this inode is lost+found dir in the root
2599 */
2600 if (ino == mp->m_sb.sb_rootino && strcmp(fname, ORPHANAGE) == 0) {
2601 /*
2602 * if it's not a directory, trash it
2603 */
2604 if (!inode_isadir(irec, ino_offset)) {
2605 do_warn(
2606 _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"),
2607 ORPHANAGE, lino, ino);
2608 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2609 lino, &max_size, &i,
2610 &bytes_deleted, ino_dirty);
2611 continue;
2612 }
2613 /*
2614 * if this is a dup, it will be picked up below,
2615 * otherwise, mark it as the orphanage for later.
2616 */
2617 if (!orphanage_ino)
2618 orphanage_ino = lino;
2619 }
2620 /*
2621 * check for duplicate names in directory.
2622 */
2623 if (!dir_hash_add(mp, hashtab, (xfs_dir2_dataptr_t)
2624 (sfep - xfs_dir2_sf_firstentry(sfp)),
2625 lino, sfep->namelen, sfep->name,
2626 libxfs_dir2_sf_get_ftype(mp, sfep))) {
2627 do_warn(
2628 _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
2629 fname, lino, ino);
2630 next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino,
2631 &max_size, &i, &bytes_deleted,
2632 ino_dirty);
2633 continue;
2634 }
2635
2636 if (!inode_isadir(irec, ino_offset)) {
2637 /*
2638 * check easy case first, regular inode, just bump
2639 * the link count
2640 */
2641 add_inode_reached(irec, ino_offset);
2642 } else {
2643 parent = get_inode_parent(irec, ino_offset);
2644
2645 /*
2646 * bump up the link counts in parent and child.
2647 * directory but if the link doesn't agree with
2648 * the .. in the child, blow out the entry
2649 */
2650 if (is_inode_reached(irec, ino_offset)) {
2651 do_warn(
2652 _("entry \"%s\" in directory inode %" PRIu64
2653 " references already connected inode %" PRIu64 ".\n"),
2654 fname, ino, lino);
2655 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2656 lino, &max_size, &i,
2657 &bytes_deleted, ino_dirty);
2658 continue;
2659 } else if (parent == ino) {
2660 add_inode_reached(irec, ino_offset);
2661 add_inode_ref(current_irec, current_ino_offset);
2662 } else if (parent == NULLFSINO) {
2663 /* ".." was missing, but this entry refers to it,
2664 so, set it as the parent and mark for rebuild */
2665 do_warn(
2666 _("entry \"%s\" in dir ino %" PRIu64 " doesn't have a .. entry, will set it in ino %" PRIu64 ".\n"),
2667 fname, ino, lino);
2668 set_inode_parent(irec, ino_offset, ino);
2669 add_inode_reached(irec, ino_offset);
2670 add_inode_ref(current_irec, current_ino_offset);
2671 add_dotdot_update(XFS_INO_TO_AGNO(mp, lino),
2672 irec, ino_offset);
2673 } else {
2674 do_warn(
2675 _("entry \"%s\" in directory inode %" PRIu64
2676 " not consistent with .. value (%" PRIu64
2677 ") in inode %" PRIu64 ",\n"),
2678 fname, ino, parent, lino);
2679 next_sfep = shortform_dir2_junk(mp, sfp, sfep,
2680 lino, &max_size, &i,
2681 &bytes_deleted, ino_dirty);
2682 continue;
2683 }
2684 }
2685
2686 /* validate ftype field if supported */
2687 if (xfs_has_ftype(mp)) {
2688 uint8_t dir_ftype;
2689 uint8_t ino_ftype;
2690
2691 dir_ftype = libxfs_dir2_sf_get_ftype(mp, sfep);
2692 ino_ftype = get_inode_ftype(irec, ino_offset);
2693
2694 if (dir_ftype != ino_ftype) {
2695 if (no_modify) {
2696 do_warn(
2697 _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
2698 dir_ftype, ino_ftype,
2699 ino, lino);
2700 } else {
2701 do_warn(
2702 _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
2703 dir_ftype, ino_ftype,
2704 ino, lino);
2705 libxfs_dir2_sf_put_ftype(mp, sfep,
2706 ino_ftype);
2707 dir_hash_update_ftype(hashtab,
2708 (xfs_dir2_dataptr_t)(sfep - xfs_dir2_sf_firstentry(sfp)),
2709 ino_ftype);
2710 *ino_dirty = 1;
2711 }
2712 }
2713 }
2714
2715 if (lino > XFS_DIR2_MAX_SHORT_INUM)
2716 i8++;
2717
2718 /*
2719 * go onto next entry - we have to take entries with bad namelen
2720 * into account in no modify mode since we calculate size based
2721 * on next_sfep.
2722 */
2723 ASSERT(no_modify || bad_sfnamelen == 0);
2724 next_sfep = (struct xfs_dir2_sf_entry *)((intptr_t)sfep +
2725 (bad_sfnamelen
2726 ? libxfs_dir2_sf_entsize(mp, sfp, namelen)
2727 : libxfs_dir2_sf_entsize(mp, sfp, sfep->namelen)));
2728 }
2729
2730 if (sfp->i8count != i8) {
2731 if (no_modify) {
2732 do_warn(_("would fix i8count in inode %" PRIu64 "\n"),
2733 ino);
2734 } else {
2735 if (i8 == 0) {
2736 struct xfs_dir2_sf_entry *tmp_sfep;
2737
2738 tmp_sfep = next_sfep;
2739 process_sf_dir2_fixi8(mp, sfp, &tmp_sfep);
2740 bytes_deleted +=
2741 (intptr_t)next_sfep -
2742 (intptr_t)tmp_sfep;
2743 next_sfep = tmp_sfep;
2744 } else
2745 sfp->i8count = i8;
2746 *ino_dirty = 1;
2747 do_warn(_("fixing i8count in inode %" PRIu64 "\n"),
2748 ino);
2749 }
2750 }
2751
2752 /*
2753 * sync up sizes if required
2754 */
2755 if (*ino_dirty && bytes_deleted > 0) {
2756 ASSERT(!no_modify);
2757 libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK);
2758 ip->i_disk_size -= bytes_deleted;
2759 }
2760
2761 if (ip->i_disk_size != ip->i_df.if_bytes) {
2762 ASSERT(ip->i_df.if_bytes == (xfs_fsize_t)
2763 ((intptr_t) next_sfep - (intptr_t) sfp));
2764 ip->i_disk_size = (xfs_fsize_t)
2765 ((intptr_t) next_sfep - (intptr_t) sfp);
2766 do_warn(
2767 _("setting size to %" PRId64 " bytes to reflect junked entries\n"),
2768 ip->i_disk_size);
2769 *ino_dirty = 1;
2770 }
2771 }
2772
2773 /*
2774 * processes all reachable inodes in directories
2775 */
2776 static void
2777 process_dir_inode(
2778 struct xfs_mount *mp,
2779 xfs_agnumber_t agno,
2780 struct ino_tree_node *irec,
2781 int ino_offset)
2782 {
2783 xfs_ino_t ino;
2784 struct xfs_inode *ip;
2785 struct xfs_trans *tp;
2786 struct dir_hash_tab *hashtab;
2787 int need_dot;
2788 int dirty, num_illegal, error, nres;
2789
2790 ino = XFS_AGINO_TO_INO(mp, agno, irec->ino_startnum + ino_offset);
2791
2792 /*
2793 * open up directory inode, check all entries,
2794 * then call prune_dir_entries to remove all
2795 * remaining illegal directory entries.
2796 */
2797
2798 ASSERT(!is_inode_refchecked(irec, ino_offset) || dotdot_update);
2799
2800 error = -libxfs_iget(mp, NULL, ino, 0, &ip);
2801 if (error) {
2802 if (!no_modify)
2803 do_error(
2804 _("couldn't map inode %" PRIu64 ", err = %d\n"),
2805 ino, error);
2806 else {
2807 do_warn(
2808 _("couldn't map inode %" PRIu64 ", err = %d\n"),
2809 ino, error);
2810 /*
2811 * see below for what we're doing if this
2812 * is root. Why do we need to do this here?
2813 * to ensure that the root doesn't show up
2814 * as being disconnected in the no_modify case.
2815 */
2816 if (mp->m_sb.sb_rootino == ino) {
2817 add_inode_reached(irec, 0);
2818 add_inode_ref(irec, 0);
2819 }
2820 }
2821
2822 add_inode_refchecked(irec, 0);
2823 return;
2824 }
2825
2826 need_dot = dirty = num_illegal = 0;
2827
2828 if (mp->m_sb.sb_rootino == ino) {
2829 /*
2830 * mark root inode reached and bump up
2831 * link count for root inode to account
2832 * for '..' entry since the root inode is
2833 * never reached by a parent. we know
2834 * that root's '..' is always good --
2835 * guaranteed by phase 3 and/or below.
2836 */
2837 add_inode_reached(irec, ino_offset);
2838 }
2839
2840 add_inode_refchecked(irec, ino_offset);
2841
2842 hashtab = dir_hash_init(ip->i_disk_size);
2843
2844 /*
2845 * look for bogus entries
2846 */
2847 switch (ip->i_df.if_format) {
2848 case XFS_DINODE_FMT_EXTENTS:
2849 case XFS_DINODE_FMT_BTREE:
2850 /*
2851 * also check for missing '.' in longform dirs.
2852 * missing .. entries are added if required when
2853 * the directory is connected to lost+found. but
2854 * we need to create '.' entries here.
2855 */
2856 longform_dir2_entry_check(mp, ino, ip,
2857 &num_illegal, &need_dot,
2858 irec, ino_offset,
2859 hashtab);
2860 break;
2861
2862 case XFS_DINODE_FMT_LOCAL:
2863 /*
2864 * using the remove reservation is overkill
2865 * since at most we'll only need to log the
2866 * inode but it's easier than wedging a
2867 * new define in ourselves.
2868 */
2869 nres = no_modify ? 0 : XFS_REMOVE_SPACE_RES(mp);
2870 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove,
2871 nres, 0, 0, &tp);
2872 if (error)
2873 res_failed(error);
2874
2875 libxfs_trans_ijoin(tp, ip, 0);
2876
2877 shortform_dir2_entry_check(mp, ino, ip, &dirty,
2878 irec, ino_offset,
2879 hashtab);
2880
2881 ASSERT(dirty == 0 || (dirty && !no_modify));
2882 if (dirty) {
2883 libxfs_trans_log_inode(tp, ip,
2884 XFS_ILOG_CORE | XFS_ILOG_DDATA);
2885 error = -libxfs_trans_commit(tp);
2886 if (error)
2887 do_error(
2888 _("error %d fixing shortform directory %llu\n"),
2889 error,
2890 (unsigned long long)ip->i_ino);
2891 } else {
2892 libxfs_trans_cancel(tp);
2893 }
2894 break;
2895
2896 default:
2897 break;
2898 }
2899 dir_hash_done(hashtab);
2900
2901 /*
2902 * if we have to create a .. for /, do it now *before*
2903 * we delete the bogus entries, otherwise the directory
2904 * could transform into a shortform dir which would
2905 * probably cause the simulation to choke. Even
2906 * if the illegal entries get shifted around, it's ok
2907 * because the entries are structurally intact and in
2908 * in hash-value order so the simulation won't get confused
2909 * if it has to move them around.
2910 */
2911 if (!no_modify && need_root_dotdot && ino == mp->m_sb.sb_rootino) {
2912 ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_LOCAL);
2913
2914 do_warn(_("recreating root directory .. entry\n"));
2915
2916 nres = XFS_MKDIR_SPACE_RES(mp, 2);
2917 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir,
2918 nres, 0, 0, &tp);
2919 if (error)
2920 res_failed(error);
2921
2922 libxfs_trans_ijoin(tp, ip, 0);
2923
2924 error = -libxfs_dir_createname(tp, ip, &xfs_name_dotdot,
2925 ip->i_ino, nres);
2926 if (error)
2927 do_error(
2928 _("can't make \"..\" entry in root inode %" PRIu64 ", createname error %d\n"), ino, error);
2929
2930 libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2931 error = -libxfs_trans_commit(tp);
2932 if (error)
2933 do_error(
2934 _("root inode \"..\" entry recreation failed (%d)\n"), error);
2935
2936 need_root_dotdot = 0;
2937 } else if (need_root_dotdot && ino == mp->m_sb.sb_rootino) {
2938 do_warn(_("would recreate root directory .. entry\n"));
2939 }
2940
2941 /*
2942 * if we need to create the '.' entry, do so only if
2943 * the directory is a longform dir. if it's been
2944 * turned into a shortform dir, then the inode is ok
2945 * since shortform dirs have no '.' entry and the inode
2946 * has already been committed by prune_lf_dir_entry().
2947 */
2948 if (need_dot) {
2949 /*
2950 * bump up our link count but don't
2951 * bump up the inode link count. chances
2952 * are good that even though we lost '.'
2953 * the inode link counts reflect '.' so
2954 * leave the inode link count alone and if
2955 * it turns out to be wrong, we'll catch
2956 * that in phase 7.
2957 */
2958 add_inode_ref(irec, ino_offset);
2959
2960 if (no_modify) {
2961 do_warn(
2962 _("would create missing \".\" entry in dir ino %" PRIu64 "\n"),
2963 ino);
2964 } else if (ip->i_df.if_format != XFS_DINODE_FMT_LOCAL) {
2965 /*
2966 * need to create . entry in longform dir.
2967 */
2968 do_warn(
2969 _("creating missing \".\" entry in dir ino %" PRIu64 "\n"), ino);
2970
2971 nres = XFS_MKDIR_SPACE_RES(mp, 1);
2972 error = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_mkdir,
2973 nres, 0, 0, &tp);
2974 if (error)
2975 res_failed(error);
2976
2977 libxfs_trans_ijoin(tp, ip, 0);
2978
2979 error = -libxfs_dir_createname(tp, ip, &xfs_name_dot,
2980 ip->i_ino, nres);
2981 if (error)
2982 do_error(
2983 _("can't make \".\" entry in dir ino %" PRIu64 ", createname error %d\n"),
2984 ino, error);
2985
2986 libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2987 error = -libxfs_trans_commit(tp);
2988 if (error)
2989 do_error(
2990 _("root inode \".\" entry recreation failed (%d)\n"), error);
2991 }
2992 }
2993 libxfs_irele(ip);
2994 }
2995
2996 /*
2997 * mark realtime bitmap and summary inodes as reached.
2998 * quota inode will be marked here as well
2999 */
3000 static void
3001 mark_standalone_inodes(xfs_mount_t *mp)
3002 {
3003 ino_tree_node_t *irec;
3004 int offset;
3005
3006 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rbmino),
3007 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino));
3008
3009 offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino) -
3010 irec->ino_startnum;
3011
3012 add_inode_reached(irec, offset);
3013
3014 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rsumino),
3015 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino));
3016
3017 offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino) -
3018 irec->ino_startnum;
3019
3020 add_inode_reached(irec, offset);
3021
3022 if (fs_quotas) {
3023 if (mp->m_sb.sb_uquotino
3024 && mp->m_sb.sb_uquotino != NULLFSINO) {
3025 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
3026 mp->m_sb.sb_uquotino),
3027 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino));
3028 offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino)
3029 - irec->ino_startnum;
3030 add_inode_reached(irec, offset);
3031 }
3032 if (mp->m_sb.sb_gquotino
3033 && mp->m_sb.sb_gquotino != NULLFSINO) {
3034 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
3035 mp->m_sb.sb_gquotino),
3036 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_gquotino));
3037 offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_gquotino)
3038 - irec->ino_startnum;
3039 add_inode_reached(irec, offset);
3040 }
3041 if (mp->m_sb.sb_pquotino
3042 && mp->m_sb.sb_pquotino != NULLFSINO) {
3043 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp,
3044 mp->m_sb.sb_pquotino),
3045 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino));
3046 offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino)
3047 - irec->ino_startnum;
3048 add_inode_reached(irec, offset);
3049 }
3050 }
3051 }
3052
3053 static void
3054 check_for_orphaned_inodes(
3055 xfs_mount_t *mp,
3056 xfs_agnumber_t agno,
3057 ino_tree_node_t *irec)
3058 {
3059 int i;
3060 xfs_ino_t ino;
3061
3062 for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
3063 ASSERT(is_inode_confirmed(irec, i));
3064 if (is_inode_free(irec, i))
3065 continue;
3066
3067 if (is_inode_reached(irec, i))
3068 continue;
3069
3070 ASSERT(inode_isadir(irec, i) ||
3071 num_inode_references(irec, i) == 0);
3072
3073 ino = XFS_AGINO_TO_INO(mp, agno, i + irec->ino_startnum);
3074 if (inode_isadir(irec, i))
3075 do_warn(_("disconnected dir inode %" PRIu64 ", "), ino);
3076 else
3077 do_warn(_("disconnected inode %" PRIu64 ", "), ino);
3078 if (!no_modify) {
3079 if (!orphanage_ino)
3080 orphanage_ino = mk_orphanage(mp);
3081 do_warn(_("moving to %s\n"), ORPHANAGE);
3082 mv_orphanage(mp, ino, inode_isadir(irec, i));
3083 } else {
3084 do_warn(_("would move to %s\n"), ORPHANAGE);
3085 }
3086 /*
3087 * for read-only case, even though the inode isn't
3088 * really reachable, set the flag (and bump our link
3089 * count) anyway to fool phase 7
3090 */
3091 add_inode_reached(irec, i);
3092 }
3093 }
3094
3095 static void
3096 do_dir_inode(
3097 struct workqueue *wq,
3098 xfs_agnumber_t agno,
3099 void *arg)
3100 {
3101 struct ino_tree_node *irec = arg;
3102 int i;
3103
3104 for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
3105 if (inode_isadir(irec, i))
3106 process_dir_inode(wq->wq_ctx, agno, irec, i);
3107 }
3108 }
3109
3110 static void
3111 traverse_function(
3112 struct workqueue *wq,
3113 xfs_agnumber_t agno,
3114 void *arg)
3115 {
3116 struct ino_tree_node *irec;
3117 prefetch_args_t *pf_args = arg;
3118 struct workqueue lwq;
3119 struct xfs_mount *mp = wq->wq_ctx;
3120
3121 wait_for_inode_prefetch(pf_args);
3122
3123 if (verbose)
3124 do_log(_(" - agno = %d\n"), agno);
3125
3126 /*
3127 * The more AGs we have in flight at once, the fewer processing threads
3128 * per AG. This means we don't overwhelm the machine with hundreds of
3129 * threads when we start acting on lots of AGs at once. We just want
3130 * enough that we can keep multiple CPUs busy across multiple AGs.
3131 */
3132 workqueue_create_bound(&lwq, mp, ag_stride, 1000);
3133
3134 for (irec = findfirst_inode_rec(agno); irec; irec = next_ino_rec(irec)) {
3135 if (irec->ino_isa_dir == 0)
3136 continue;
3137
3138 if (pf_args) {
3139 sem_post(&pf_args->ra_count);
3140 #ifdef XR_PF_TRACE
3141 {
3142 int i;
3143 sem_getvalue(&pf_args->ra_count, &i);
3144 pftrace(
3145 "processing inode chunk %p in AG %d (sem count = %d)",
3146 irec, agno, i);
3147 }
3148 #endif
3149 }
3150
3151 queue_work(&lwq, do_dir_inode, agno, irec);
3152 }
3153 destroy_work_queue(&lwq);
3154 cleanup_inode_prefetch(pf_args);
3155 }
3156
3157 static void
3158 update_missing_dotdot_entries(
3159 xfs_mount_t *mp)
3160 {
3161 dotdot_update_t *dir;
3162
3163 /*
3164 * these entries parents were updated, rebuild them again
3165 * set dotdot_update flag so processing routines do not count links
3166 */
3167 dotdot_update = 1;
3168 while (!list_empty(&dotdot_update_list)) {
3169 dir = list_entry(dotdot_update_list.prev, struct dotdot_update,
3170 list);
3171 list_del(&dir->list);
3172 process_dir_inode(mp, dir->agno, dir->irec, dir->ino_offset);
3173 free(dir);
3174 }
3175 }
3176
3177 static void
3178 traverse_ags(
3179 struct xfs_mount *mp)
3180 {
3181 do_inode_prefetch(mp, ag_stride, traverse_function, false, true);
3182 }
3183
3184 void
3185 phase6(xfs_mount_t *mp)
3186 {
3187 ino_tree_node_t *irec;
3188 int i;
3189
3190 memset(&zerocr, 0, sizeof(struct cred));
3191 memset(&zerofsx, 0, sizeof(struct fsxattr));
3192 orphanage_ino = 0;
3193
3194 do_log(_("Phase 6 - check inode connectivity...\n"));
3195
3196 incore_ext_teardown(mp);
3197
3198 add_ino_ex_data(mp);
3199
3200 /*
3201 * verify existence of root directory - if we have to
3202 * make one, it's ok for the incore data structs not to
3203 * know about it since everything about it (and the other
3204 * inodes in its chunk if a new chunk was created) are ok
3205 */
3206 if (need_root_inode) {
3207 if (!no_modify) {
3208 do_warn(_("reinitializing root directory\n"));
3209 mk_root_dir(mp);
3210 need_root_inode = 0;
3211 need_root_dotdot = 0;
3212 } else {
3213 do_warn(_("would reinitialize root directory\n"));
3214 }
3215 }
3216
3217 if (need_rbmino) {
3218 if (!no_modify) {
3219 do_warn(_("reinitializing realtime bitmap inode\n"));
3220 mk_rbmino(mp);
3221 need_rbmino = 0;
3222 } else {
3223 do_warn(_("would reinitialize realtime bitmap inode\n"));
3224 }
3225 }
3226
3227 if (need_rsumino) {
3228 if (!no_modify) {
3229 do_warn(_("reinitializing realtime summary inode\n"));
3230 mk_rsumino(mp);
3231 need_rsumino = 0;
3232 } else {
3233 do_warn(_("would reinitialize realtime summary inode\n"));
3234 }
3235 }
3236
3237 if (!no_modify) {
3238 do_log(
3239 _(" - resetting contents of realtime bitmap and summary inodes\n"));
3240 if (fill_rbmino(mp)) {
3241 do_warn(
3242 _("Warning: realtime bitmap may be inconsistent\n"));
3243 }
3244
3245 if (fill_rsumino(mp)) {
3246 do_warn(
3247 _("Warning: realtime bitmap may be inconsistent\n"));
3248 }
3249 }
3250
3251 mark_standalone_inodes(mp);
3252
3253 do_log(_(" - traversing filesystem ...\n"));
3254
3255 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
3256 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
3257
3258 /*
3259 * we always have a root inode, even if it's free...
3260 * if the root is free, forget it, lost+found is already gone
3261 */
3262 if (is_inode_free(irec, 0) || !inode_isadir(irec, 0)) {
3263 need_root_inode = 1;
3264 }
3265
3266 /*
3267 * then process all inodes by walking incore inode tree
3268 */
3269 traverse_ags(mp);
3270
3271 /*
3272 * any directories that had updated ".." entries, rebuild them now
3273 */
3274 update_missing_dotdot_entries(mp);
3275
3276 do_log(_(" - traversal finished ...\n"));
3277 do_log(_(" - moving disconnected inodes to %s ...\n"),
3278 ORPHANAGE);
3279
3280 /*
3281 * move all disconnected inodes to the orphanage
3282 */
3283 for (i = 0; i < glob_agcount; i++) {
3284 irec = findfirst_inode_rec(i);
3285 while (irec != NULL) {
3286 check_for_orphaned_inodes(mp, i, irec);
3287 irec = next_ino_rec(irec);
3288 }
3289 }
3290 }