1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
17 #include "err_protos.h"
22 static struct cred zerocr
;
23 static struct fsxattr zerofsx
;
24 static xfs_ino_t orphanage_ino
;
26 static struct xfs_name xfs_name_dot
= {(unsigned char *)".",
31 * Data structures used to keep track of directories where the ".."
32 * entries are updated. These must be rebuilt after the initial pass
34 typedef struct dotdot_update
{
35 struct list_head list
;
36 ino_tree_node_t
*irec
;
41 static LIST_HEAD(dotdot_update_list
);
42 static int dotdot_update
;
47 ino_tree_node_t
*irec
,
50 dotdot_update_t
*dir
= malloc(sizeof(dotdot_update_t
));
53 do_error(_("malloc failed add_dotdot_update (%zu bytes)\n"),
54 sizeof(dotdot_update_t
));
56 INIT_LIST_HEAD(&dir
->list
);
59 dir
->ino_offset
= ino_offset
;
61 list_add(&dir
->list
, &dotdot_update_list
);
65 * Data structures and routines to keep track of directory entries
66 * and whether their leaf entry has been seen. Also used for name
67 * duplicate checking and rebuilding step if required.
70 struct dir_hash_ent
*nextbyhash
; /* next in name bucket */
71 struct dir_hash_ent
*nextbyorder
; /* next in order added */
72 xfs_dahash_t hashval
; /* hash value of name */
73 uint32_t address
; /* offset of data entry */
74 xfs_ino_t inum
; /* inode num of entry */
75 short junkit
; /* name starts with / */
76 short seen
; /* have seen leaf entry */
78 unsigned char namebuf
[];
82 int size
; /* size of hash tables */
83 struct dir_hash_ent
*first
; /* ptr to first added entry */
84 struct dir_hash_ent
*last
; /* ptr to last added entry */
85 struct dir_hash_ent
**byhash
; /* ptr to name hash buckets */
87 struct radix_tree_root byaddr
;
90 #define DIR_HASH_TAB_SIZE(n) \
91 (sizeof(struct dir_hash_tab) + (sizeof(struct dir_hash_ent *) * (n)))
92 #define DIR_HASH_FUNC(t,a) ((a) % (t)->size)
95 * Track the contents of the freespace table in a directory.
97 typedef struct freetab
{
98 int naents
; /* expected number of data blocks */
99 int nents
; /* number of data blocks processed */
101 xfs_dir2_data_off_t v
;
105 #define FREETAB_SIZE(n) \
106 (offsetof(freetab_t, ents) + (sizeof(struct freetab_ent) * (n)))
108 #define DIR_HASH_CK_OK 0
109 #define DIR_HASH_CK_DUPLEAF 1
110 #define DIR_HASH_CK_BADHASH 2
111 #define DIR_HASH_CK_NODATA 3
112 #define DIR_HASH_CK_NOLEAF 4
113 #define DIR_HASH_CK_BADSTALE 5
114 #define DIR_HASH_CK_TOTAL 6
117 * Need to handle CRC and validation errors specially here. If there is a
118 * validator error, re-read without the verifier so that we get a buffer we can
119 * check and repair. Re-attach the ops to the buffer after the read so that when
120 * it is rewritten the CRC is recalculated.
122 * If the buffer was not read, we return an error. If the buffer was read but
123 * had a CRC or corruption error, we reread it without the verifier and if it is
124 * read successfully we increment *crc_error and return 0. Otherwise we
125 * return the read error.
129 struct xfs_inode
*ip
,
131 struct xfs_buf
**bpp
,
132 const struct xfs_buf_ops
*ops
,
138 error
= -libxfs_da_read_buf(NULL
, ip
, bno
, 0, bpp
, XFS_DATA_FORK
, ops
);
140 if (error
!= EFSBADCRC
&& error
!= EFSCORRUPTED
)
143 error2
= -libxfs_da_read_buf(NULL
, ip
, bno
, 0, bpp
, XFS_DATA_FORK
,
154 * Returns 0 if the name already exists (ie. a duplicate)
158 struct xfs_mount
*mp
,
159 struct dir_hash_tab
*hashtab
,
166 xfs_dahash_t hash
= 0;
168 struct dir_hash_ent
*p
;
171 struct xfs_name xname
;
178 junk
= name
[0] == '/';
182 hash
= libxfs_dir2_hashname(mp
, &xname
);
183 byhash
= DIR_HASH_FUNC(hashtab
, hash
);
186 * search hash bucket for existing name.
188 for (p
= hashtab
->byhash
[byhash
]; p
; p
= p
->nextbyhash
) {
189 if (p
->hashval
== hash
&& p
->name
.len
== namelen
) {
190 if (memcmp(p
->name
.name
, name
, namelen
) == 0) {
200 * Allocate enough space for the hash entry and the name in a single
201 * allocation so we can store our own copy of the name for later use.
203 p
= calloc(1, sizeof(*p
) + namelen
+ 1);
205 do_error(_("malloc failed in dir_hash_add (%zu bytes)\n"),
208 error
= radix_tree_insert(&hashtab
->byaddr
, addr
, p
);
209 if (error
== EEXIST
) {
210 do_warn(_("duplicate addrs %u in directory!\n"), addr
);
214 radix_tree_tag_set(&hashtab
->byaddr
, addr
, HT_UNSEEN
);
217 hashtab
->last
->nextbyorder
= p
;
220 p
->nextbyorder
= NULL
;
223 if (!(p
->junkit
= junk
)) {
225 p
->nextbyhash
= hashtab
->byhash
[byhash
];
226 hashtab
->byhash
[byhash
] = p
;
232 /* Set up the name in the region trailing the hash entry. */
233 memcpy(p
->namebuf
, name
, namelen
);
234 p
->name
.name
= p
->namebuf
;
235 p
->name
.len
= namelen
;
236 p
->name
.type
= ftype
;
240 /* Mark an existing directory hashtable entry as junk. */
243 struct dir_hash_tab
*hashtab
,
244 xfs_dir2_dataptr_t addr
)
246 struct dir_hash_ent
*p
;
248 p
= radix_tree_lookup(&hashtab
->byaddr
, addr
);
257 struct dir_hash_tab
*hashtab
,
258 struct xfs_inode
*ip
,
261 static char *seevalstr
[DIR_HASH_CK_TOTAL
];
265 seevalstr
[DIR_HASH_CK_OK
] = _("ok");
266 seevalstr
[DIR_HASH_CK_DUPLEAF
] = _("duplicate leaf");
267 seevalstr
[DIR_HASH_CK_BADHASH
] = _("hash value mismatch");
268 seevalstr
[DIR_HASH_CK_NODATA
] = _("no data entry");
269 seevalstr
[DIR_HASH_CK_NOLEAF
] = _("no leaf entry");
270 seevalstr
[DIR_HASH_CK_BADSTALE
] = _("bad stale count");
274 if (seeval
== DIR_HASH_CK_OK
&&
275 radix_tree_tagged(&hashtab
->byaddr
, HT_UNSEEN
))
276 seeval
= DIR_HASH_CK_NOLEAF
;
277 if (seeval
== DIR_HASH_CK_OK
)
279 do_warn(_("bad hash table for directory inode %" PRIu64
" (%s): "),
280 ip
->i_ino
, seevalstr
[seeval
]);
282 do_warn(_("rebuilding\n"));
284 do_warn(_("would rebuild\n"));
290 struct dir_hash_tab
*hashtab
)
293 struct dir_hash_ent
*n
;
294 struct dir_hash_ent
*p
;
296 for (i
= 0; i
< hashtab
->size
; i
++) {
297 for (p
= hashtab
->byhash
[i
]; p
; p
= n
) {
299 radix_tree_delete(&hashtab
->byaddr
, p
->address
);
307 * Create a directory hash index structure based on the size of the directory we
308 * are about to try to repair. The size passed in is the size of the data
309 * segment of the directory in bytes, so we don't really know exactly how many
310 * entries are in it. Hence assume an entry size of around 64 bytes - that's a
311 * name length of 40+ bytes so should cover a most situations with really large
314 static struct dir_hash_tab
*
318 struct dir_hash_tab
*hashtab
= NULL
;
326 * Try to allocate as large a hash table as possible. Failure to
327 * allocate isn't fatal, it will just result in slower performance as we
328 * reduce the size of the table.
330 while (hsize
>= 16) {
331 hashtab
= calloc(DIR_HASH_TAB_SIZE(hsize
), 1);
337 do_error(_("calloc failed in dir_hash_init\n"));
338 hashtab
->size
= hsize
;
339 hashtab
->byhash
= (struct dir_hash_ent
**)((char *)hashtab
+
340 sizeof(struct dir_hash_tab
));
341 INIT_RADIX_TREE(&hashtab
->byaddr
, 0);
347 struct dir_hash_tab
*hashtab
,
349 xfs_dir2_dataptr_t addr
)
351 struct dir_hash_ent
*p
;
353 p
= radix_tree_lookup(&hashtab
->byaddr
, addr
);
355 return DIR_HASH_CK_NODATA
;
356 if (!radix_tree_tag_get(&hashtab
->byaddr
, addr
, HT_UNSEEN
))
357 return DIR_HASH_CK_DUPLEAF
;
358 if (p
->junkit
== 0 && p
->hashval
!= hash
)
359 return DIR_HASH_CK_BADHASH
;
360 radix_tree_tag_clear(&hashtab
->byaddr
, addr
, HT_UNSEEN
);
361 return DIR_HASH_CK_OK
;
365 dir_hash_update_ftype(
366 struct dir_hash_tab
*hashtab
,
367 xfs_dir2_dataptr_t addr
,
370 struct dir_hash_ent
*p
;
372 p
= radix_tree_lookup(&hashtab
->byaddr
, addr
);
375 p
->name
.type
= ftype
;
379 * checks to make sure leafs match a data entry, and that the stale
384 struct dir_hash_tab
*hashtab
,
385 xfs_dir2_leaf_entry_t
*ents
,
393 for (i
= j
= 0; i
< count
; i
++) {
394 if (be32_to_cpu(ents
[i
].address
) == XFS_DIR2_NULL_DATAPTR
) {
398 rval
= dir_hash_see(hashtab
, be32_to_cpu(ents
[i
].hashval
),
399 be32_to_cpu(ents
[i
].address
));
400 if (rval
!= DIR_HASH_CK_OK
)
403 return j
== stale
? DIR_HASH_CK_OK
: DIR_HASH_CK_BADSTALE
;
407 * Given a block number in a fork, return the next valid block number (not a
408 * hole). If this is the last block number then NULLFILEOFF is returned.
412 struct xfs_inode
*ip
,
417 struct xfs_bmbt_irec got
;
418 struct xfs_iext_cursor icur
;
420 switch (ip
->i_df
.if_format
) {
421 case XFS_DINODE_FMT_LOCAL
:
424 case XFS_DINODE_FMT_BTREE
:
425 case XFS_DINODE_FMT_EXTENTS
:
431 /* Read extent map. */
432 error
= -libxfs_iread_extents(NULL
, ip
, XFS_DATA_FORK
);
437 if (!libxfs_iext_lookup_extent(ip
, &ip
->i_df
, bno
, &icur
, &got
))
440 *bnop
= got
.br_startoff
< bno
? bno
: got
.br_startoff
;
449 do_error(_("ran out of disk space!\n"));
451 do_error(_("xfs_trans_reserve returned %d\n"), err
);
455 reset_inode_fields(struct xfs_inode
*ip
)
461 ip
->i_cowextsize
= 0;
466 ip
->i_crtime
.tv_sec
= 0;
467 ip
->i_crtime
.tv_nsec
= 0;
471 mk_rbmino(xfs_mount_t
*mp
)
480 xfs_bmbt_irec_t map
[XFS_BMAP_MAX_NMAP
];
487 i
= -libxfs_trans_alloc_rollable(mp
, 10, &tp
);
491 error
= -libxfs_iget(mp
, tp
, mp
->m_sb
.sb_rbmino
, 0, &ip
);
494 _("couldn't iget realtime bitmap inode -- error - %d\n"),
498 reset_inode_fields(ip
);
500 VFS_I(ip
)->i_mode
= S_IFREG
;
501 ip
->i_df
.if_format
= XFS_DINODE_FMT_EXTENTS
;
503 ip
->i_afp
->if_format
= XFS_DINODE_FMT_EXTENTS
;
505 set_nlink(VFS_I(ip
), 1); /* account for sb ptr */
507 times
= XFS_ICHGTIME_CHG
| XFS_ICHGTIME_MOD
;
508 if (xfs_has_v3inodes(mp
)) {
509 VFS_I(ip
)->i_version
= 1;
511 times
|= XFS_ICHGTIME_CREATE
;
513 libxfs_trans_ichgtime(tp
, ip
, times
);
518 ip
->i_df
.if_bytes
= 0;
519 ip
->i_df
.if_u1
.if_root
= NULL
;
521 ip
->i_disk_size
= mp
->m_sb
.sb_rbmblocks
* mp
->m_sb
.sb_blocksize
;
526 libxfs_trans_ijoin(tp
, ip
, 0);
527 libxfs_trans_log_inode(tp
, ip
, XFS_ILOG_CORE
);
528 error
= -libxfs_trans_commit(tp
);
530 do_error(_("%s: commit failed, error %d\n"), __func__
, error
);
533 * then allocate blocks for file and fill with zeroes (stolen
536 blocks
= mp
->m_sb
.sb_rbmblocks
+
537 XFS_BM_MAXLEVELS(mp
, XFS_DATA_FORK
) - 1;
538 error
= -libxfs_trans_alloc_rollable(mp
, blocks
, &tp
);
542 libxfs_trans_ijoin(tp
, ip
, 0);
544 while (bno
< mp
->m_sb
.sb_rbmblocks
) {
545 nmap
= XFS_BMAP_MAX_NMAP
;
546 error
= -libxfs_bmapi_write(tp
, ip
, bno
,
547 (xfs_extlen_t
)(mp
->m_sb
.sb_rbmblocks
- bno
),
548 0, mp
->m_sb
.sb_rbmblocks
, map
, &nmap
);
551 _("couldn't allocate realtime bitmap, error = %d\n"),
554 for (i
= 0, ep
= map
; i
< nmap
; i
++, ep
++) {
555 libxfs_device_zero(mp
->m_ddev_targp
,
556 XFS_FSB_TO_DADDR(mp
, ep
->br_startblock
),
557 XFS_FSB_TO_BB(mp
, ep
->br_blockcount
));
558 bno
+= ep
->br_blockcount
;
561 error
= -libxfs_trans_commit(tp
);
564 _("allocation of the realtime bitmap failed, error = %d\n"),
571 fill_rbmino(xfs_mount_t
*mp
)
585 error
= -libxfs_trans_alloc_rollable(mp
, 10, &tp
);
589 error
= -libxfs_iget(mp
, tp
, mp
->m_sb
.sb_rbmino
, 0, &ip
);
592 _("couldn't iget realtime bitmap inode -- error - %d\n"),
596 while (bno
< mp
->m_sb
.sb_rbmblocks
) {
598 * fill the file one block at a time
601 error
= -libxfs_bmapi_write(tp
, ip
, bno
, 1, 0, 1, &map
, &nmap
);
602 if (error
|| nmap
!= 1) {
604 _("couldn't map realtime bitmap block %" PRIu64
", error = %d\n"),
608 ASSERT(map
.br_startblock
!= HOLESTARTBLOCK
);
610 error
= -libxfs_trans_read_buf(
612 XFS_FSB_TO_DADDR(mp
, map
.br_startblock
),
613 XFS_FSB_TO_BB(mp
, 1), 1, &bp
, NULL
);
617 _("can't access block %" PRIu64
" (fsbno %" PRIu64
") of realtime bitmap inode %" PRIu64
"\n"),
618 bno
, map
.br_startblock
, mp
->m_sb
.sb_rbmino
);
622 memmove(bp
->b_addr
, bmp
, mp
->m_sb
.sb_blocksize
);
624 libxfs_trans_log_buf(tp
, bp
, 0, mp
->m_sb
.sb_blocksize
- 1);
626 bmp
= (xfs_rtword_t
*)((intptr_t) bmp
+ mp
->m_sb
.sb_blocksize
);
630 libxfs_trans_ijoin(tp
, ip
, 0);
631 error
= -libxfs_trans_commit(tp
);
633 do_error(_("%s: commit failed, error %d\n"), __func__
, error
);
639 fill_rsumino(xfs_mount_t
*mp
)
648 xfs_fileoff_t end_bno
;
653 end_bno
= mp
->m_rsumsize
>> mp
->m_sb
.sb_blocklog
;
655 error
= -libxfs_trans_alloc_rollable(mp
, 10, &tp
);
659 error
= -libxfs_iget(mp
, tp
, mp
->m_sb
.sb_rsumino
, 0, &ip
);
662 _("couldn't iget realtime summary inode -- error - %d\n"),
666 while (bno
< end_bno
) {
668 * fill the file one block at a time
671 error
= -libxfs_bmapi_write(tp
, ip
, bno
, 1, 0, 1, &map
, &nmap
);
672 if (error
|| nmap
!= 1) {
674 _("couldn't map realtime summary inode block %" PRIu64
", error = %d\n"),
678 ASSERT(map
.br_startblock
!= HOLESTARTBLOCK
);
680 error
= -libxfs_trans_read_buf(
682 XFS_FSB_TO_DADDR(mp
, map
.br_startblock
),
683 XFS_FSB_TO_BB(mp
, 1), 1, &bp
, NULL
);
687 _("can't access block %" PRIu64
" (fsbno %" PRIu64
") of realtime summary inode %" PRIu64
"\n"),
688 bno
, map
.br_startblock
, mp
->m_sb
.sb_rsumino
);
693 memmove(bp
->b_addr
, smp
, mp
->m_sb
.sb_blocksize
);
695 libxfs_trans_log_buf(tp
, bp
, 0, mp
->m_sb
.sb_blocksize
- 1);
697 smp
= (xfs_suminfo_t
*)((intptr_t)smp
+ mp
->m_sb
.sb_blocksize
);
701 libxfs_trans_ijoin(tp
, ip
, 0);
702 error
= -libxfs_trans_commit(tp
);
704 do_error(_("%s: commit failed, error %d\n"), __func__
, error
);
710 mk_rsumino(xfs_mount_t
*mp
)
720 xfs_bmbt_irec_t map
[XFS_BMAP_MAX_NMAP
];
727 i
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_ichange
, 10, 0, 0, &tp
);
731 error
= -libxfs_iget(mp
, tp
, mp
->m_sb
.sb_rsumino
, 0, &ip
);
734 _("couldn't iget realtime summary inode -- error - %d\n"),
738 reset_inode_fields(ip
);
740 VFS_I(ip
)->i_mode
= S_IFREG
;
741 ip
->i_df
.if_format
= XFS_DINODE_FMT_EXTENTS
;
743 ip
->i_afp
->if_format
= XFS_DINODE_FMT_EXTENTS
;
745 set_nlink(VFS_I(ip
), 1); /* account for sb ptr */
747 times
= XFS_ICHGTIME_CHG
| XFS_ICHGTIME_MOD
;
748 if (xfs_has_v3inodes(mp
)) {
749 VFS_I(ip
)->i_version
= 1;
751 times
|= XFS_ICHGTIME_CREATE
;
753 libxfs_trans_ichgtime(tp
, ip
, times
);
758 ip
->i_df
.if_bytes
= 0;
759 ip
->i_df
.if_u1
.if_root
= NULL
;
761 ip
->i_disk_size
= mp
->m_rsumsize
;
766 libxfs_trans_ijoin(tp
, ip
, 0);
767 libxfs_trans_log_inode(tp
, ip
, XFS_ILOG_CORE
);
768 error
= -libxfs_trans_commit(tp
);
770 do_error(_("%s: commit failed, error %d\n"), __func__
, error
);
773 * then allocate blocks for file and fill with zeroes (stolen
776 nsumblocks
= mp
->m_rsumsize
>> mp
->m_sb
.sb_blocklog
;
777 blocks
= nsumblocks
+ XFS_BM_MAXLEVELS(mp
, XFS_DATA_FORK
) - 1;
778 error
= -libxfs_trans_alloc_rollable(mp
, blocks
, &tp
);
782 libxfs_trans_ijoin(tp
, ip
, 0);
784 while (bno
< nsumblocks
) {
785 nmap
= XFS_BMAP_MAX_NMAP
;
786 error
= -libxfs_bmapi_write(tp
, ip
, bno
,
787 (xfs_extlen_t
)(nsumblocks
- bno
),
788 0, nsumblocks
, map
, &nmap
);
791 _("couldn't allocate realtime summary inode, error = %d\n"),
794 for (i
= 0, ep
= map
; i
< nmap
; i
++, ep
++) {
795 libxfs_device_zero(mp
->m_ddev_targp
,
796 XFS_FSB_TO_DADDR(mp
, ep
->br_startblock
),
797 XFS_FSB_TO_BB(mp
, ep
->br_blockcount
));
798 bno
+= ep
->br_blockcount
;
801 error
= -libxfs_trans_commit(tp
);
804 _("allocation of the realtime summary ino failed, error = %d\n"),
811 * makes a new root directory.
814 mk_root_dir(xfs_mount_t
*mp
)
820 const mode_t mode
= 0755;
821 ino_tree_node_t
*irec
;
825 i
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_ichange
, 10, 0, 0, &tp
);
829 error
= -libxfs_iget(mp
, tp
, mp
->m_sb
.sb_rootino
, 0, &ip
);
831 do_error(_("could not iget root inode -- error - %d\n"), error
);
835 * take care of the core -- initialization from xfs_ialloc()
837 reset_inode_fields(ip
);
839 VFS_I(ip
)->i_mode
= mode
|S_IFDIR
;
840 ip
->i_df
.if_format
= XFS_DINODE_FMT_EXTENTS
;
842 ip
->i_afp
->if_format
= XFS_DINODE_FMT_EXTENTS
;
844 set_nlink(VFS_I(ip
), 2); /* account for . and .. */
846 times
= XFS_ICHGTIME_CHG
| XFS_ICHGTIME_MOD
;
847 if (xfs_has_v3inodes(mp
)) {
848 VFS_I(ip
)->i_version
= 1;
850 times
|= XFS_ICHGTIME_CREATE
;
852 libxfs_trans_ichgtime(tp
, ip
, times
);
853 libxfs_trans_ijoin(tp
, ip
, 0);
854 libxfs_trans_log_inode(tp
, ip
, XFS_ILOG_CORE
);
859 ip
->i_df
.if_bytes
= 0;
860 ip
->i_df
.if_u1
.if_root
= NULL
;
863 * initialize the directory
865 libxfs_dir_init(tp
, ip
, ip
);
867 error
= -libxfs_trans_commit(tp
);
869 do_error(_("%s: commit failed, error %d\n"), __func__
, error
);
873 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
, mp
->m_sb
.sb_rootino
),
874 XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_rootino
));
875 set_inode_isadir(irec
, XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_rootino
) -
880 * orphanage name == lost+found
883 mk_orphanage(xfs_mount_t
*mp
)
889 ino_tree_node_t
*irec
;
893 const int mode
= 0755;
895 struct xfs_name xname
;
898 * check for an existing lost+found first, if it exists, return
899 * its inode. Otherwise, we can create it. Bad lost+found inodes
900 * would have been cleared in phase3 and phase4.
903 i
= -libxfs_iget(mp
, NULL
, mp
->m_sb
.sb_rootino
, 0, &pip
);
905 do_error(_("%d - couldn't iget root inode to obtain %s\n"),
908 xname
.name
= (unsigned char *)ORPHANAGE
;
909 xname
.len
= strlen(ORPHANAGE
);
910 xname
.type
= XFS_DIR3_FT_DIR
;
912 if (libxfs_dir_lookup(NULL
, pip
, &xname
, &ino
, NULL
) == 0)
916 * could not be found, create it
918 nres
= XFS_MKDIR_SPACE_RES(mp
, xname
.len
);
919 i
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_mkdir
, nres
, 0, 0, &tp
);
924 * use iget/ijoin instead of trans_iget because the ialloc
925 * wrapper can commit the transaction and start a new one
927 /* i = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip);
929 do_error(_("%d - couldn't iget root inode to make %s\n"),
932 error
= -libxfs_dir_ialloc(&tp
, pip
, mode
|S_IFDIR
,
933 1, 0, &zerocr
, &zerofsx
, &ip
);
935 do_error(_("%s inode allocation failed %d\n"),
938 inc_nlink(VFS_I(ip
)); /* account for . */
941 irec
= find_inode_rec(mp
,
942 XFS_INO_TO_AGNO(mp
, ino
),
943 XFS_INO_TO_AGINO(mp
, ino
));
947 * This inode is allocated from a newly created inode
948 * chunk and therefore did not exist when inode chunks
949 * were processed in phase3. Add this group of inodes to
950 * the entry avl tree as if they were discovered in phase3.
952 irec
= set_inode_free_alloc(mp
, XFS_INO_TO_AGNO(mp
, ino
),
953 XFS_INO_TO_AGINO(mp
, ino
));
956 for (i
= 0; i
< XFS_INODES_PER_CHUNK
; i
++)
957 set_inode_free(irec
, i
);
960 ino_offset
= get_inode_offset(mp
, ino
, irec
);
963 * Mark the inode allocated to lost+found as used in the AVL tree
964 * so it is not skipped in phase 7
966 set_inode_used(irec
, ino_offset
);
967 add_inode_ref(irec
, ino_offset
);
968 add_inode_reached(irec
, ino_offset
);
971 * now that we know the transaction will stay around,
972 * add the root inode to it
974 libxfs_trans_ijoin(tp
, pip
, 0);
977 * create the actual entry
979 error
= -libxfs_dir_createname(tp
, pip
, &xname
, ip
->i_ino
, nres
);
982 _("can't make %s, createname error %d\n"),
986 * bump up the link count in the root directory to account
987 * for .. in the new directory, and update the irec copy of the
988 * on-disk nlink so we don't fail the link count check later.
990 inc_nlink(VFS_I(pip
));
991 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
, mp
->m_sb
.sb_rootino
),
992 XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_rootino
));
993 add_inode_ref(irec
, 0);
994 set_inode_disk_nlinks(irec
, 0, get_inode_disk_nlinks(irec
, 0) + 1);
996 libxfs_trans_log_inode(tp
, pip
, XFS_ILOG_CORE
);
997 libxfs_dir_init(tp
, ip
, pip
);
998 libxfs_trans_log_inode(tp
, ip
, XFS_ILOG_CORE
);
999 error
= -libxfs_trans_commit(tp
);
1001 do_error(_("%s directory creation failed -- bmapf error %d\n"),
1011 * move a file to the orphange.
1016 xfs_ino_t ino
, /* inode # to be moved */
1017 int isa_dir
) /* 1 if inode is a directory */
1019 xfs_inode_t
*orphanage_ip
;
1020 xfs_ino_t entry_ino_num
;
1024 unsigned char fname
[MAXPATHLEN
+ 1];
1027 ino_tree_node_t
*irec
;
1029 struct xfs_name xname
;
1032 xname
.len
= snprintf((char *)fname
, sizeof(fname
), "%llu",
1033 (unsigned long long)ino
);
1035 err
= -libxfs_iget(mp
, NULL
, orphanage_ino
, 0, &orphanage_ip
);
1037 do_error(_("%d - couldn't iget orphanage inode\n"), err
);
1039 * Make sure the filename is unique in the lost+found
1042 while (libxfs_dir_lookup(NULL
, orphanage_ip
, &xname
, &entry_ino_num
,
1044 xname
.len
= snprintf((char *)fname
, sizeof(fname
), "%llu.%d",
1045 (unsigned long long)ino
, ++incr
);
1047 /* Orphans may not have a proper parent, so use custom ops here */
1048 err
= -libxfs_iget(mp
, NULL
, ino
, 0, &ino_p
);
1050 do_error(_("%d - couldn't iget disconnected inode\n"), err
);
1052 xname
.type
= libxfs_mode_to_ftype(VFS_I(ino_p
)->i_mode
);
1055 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
, orphanage_ino
),
1056 XFS_INO_TO_AGINO(mp
, orphanage_ino
));
1058 ino_offset
= XFS_INO_TO_AGINO(mp
, orphanage_ino
) -
1060 nres
= XFS_DIRENTER_SPACE_RES(mp
, fnamelen
) +
1061 XFS_DIRENTER_SPACE_RES(mp
, 2);
1062 err
= -libxfs_dir_lookup(NULL
, ino_p
, &xfs_name_dotdot
,
1063 &entry_ino_num
, NULL
);
1065 ASSERT(err
== ENOENT
);
1067 err
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_rename
,
1072 libxfs_trans_ijoin(tp
, orphanage_ip
, 0);
1073 libxfs_trans_ijoin(tp
, ino_p
, 0);
1075 err
= -libxfs_dir_createname(tp
, orphanage_ip
, &xname
,
1079 _("name create failed in %s (%d)\n"), ORPHANAGE
, err
);
1082 add_inode_ref(irec
, ino_offset
);
1084 inc_nlink(VFS_I(orphanage_ip
));
1085 libxfs_trans_log_inode(tp
, orphanage_ip
, XFS_ILOG_CORE
);
1087 err
= -libxfs_dir_createname(tp
, ino_p
, &xfs_name_dotdot
,
1088 orphanage_ino
, nres
);
1091 _("creation of .. entry failed (%d)\n"), err
);
1093 inc_nlink(VFS_I(ino_p
));
1094 libxfs_trans_log_inode(tp
, ino_p
, XFS_ILOG_CORE
);
1095 err
= -libxfs_trans_commit(tp
);
1098 _("creation of .. entry failed (%d)\n"), err
);
1100 err
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_rename
,
1105 libxfs_trans_ijoin(tp
, orphanage_ip
, 0);
1106 libxfs_trans_ijoin(tp
, ino_p
, 0);
1109 err
= -libxfs_dir_createname(tp
, orphanage_ip
, &xname
,
1113 _("name create failed in %s (%d)\n"), ORPHANAGE
, err
);
1116 add_inode_ref(irec
, ino_offset
);
1118 inc_nlink(VFS_I(orphanage_ip
));
1119 libxfs_trans_log_inode(tp
, orphanage_ip
, XFS_ILOG_CORE
);
1122 * don't replace .. value if it already points
1123 * to us. that'll pop a libxfs/kernel ASSERT.
1125 if (entry_ino_num
!= orphanage_ino
) {
1126 err
= -libxfs_dir_replace(tp
, ino_p
,
1127 &xfs_name_dotdot
, orphanage_ino
,
1131 _("name replace op failed (%d)\n"), err
);
1134 err
= -libxfs_trans_commit(tp
);
1137 _("orphanage name replace op failed (%d)\n"), err
);
1142 * use the remove log reservation as that's
1143 * more accurate. we're only creating the
1144 * links, we're not doing the inode allocation
1145 * also accounted for in the create
1147 nres
= XFS_DIRENTER_SPACE_RES(mp
, xname
.len
);
1148 err
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_remove
,
1153 libxfs_trans_ijoin(tp
, orphanage_ip
, 0);
1154 libxfs_trans_ijoin(tp
, ino_p
, 0);
1156 err
= -libxfs_dir_createname(tp
, orphanage_ip
, &xname
, ino
,
1160 _("name create failed in %s (%d)\n"), ORPHANAGE
, err
);
1163 set_nlink(VFS_I(ino_p
), 1);
1164 libxfs_trans_log_inode(tp
, ino_p
, XFS_ILOG_CORE
);
1165 err
= -libxfs_trans_commit(tp
);
1168 _("orphanage name create failed (%d)\n"), err
);
1170 libxfs_irele(ino_p
);
1171 libxfs_irele(orphanage_ip
);
1181 do_warn(msg
, iname
, ino1
, ino2
);
1184 do_warn(_(", marking entry to be junked\n"));
1188 do_warn(_(", would junk entry\n"));
1192 /* Find and invalidate all the directory's buffers. */
1195 struct xfs_trans
*tp
,
1196 struct xfs_inode
*ip
,
1199 struct xfs_iext_cursor icur
;
1200 struct xfs_bmbt_irec rec
;
1201 struct xfs_ifork
*ifp
;
1202 struct xfs_da_geometry
*geo
;
1207 if (ip
->i_df
.if_format
!= XFS_DINODE_FMT_EXTENTS
&&
1208 ip
->i_df
.if_format
!= XFS_DINODE_FMT_BTREE
)
1211 geo
= tp
->t_mountp
->m_dir_geo
;
1212 ifp
= XFS_IFORK_PTR(ip
, XFS_DATA_FORK
);
1213 for_each_xfs_iext(ifp
, &icur
, &rec
) {
1214 for (dabno
= roundup(rec
.br_startoff
, geo
->fsbcount
);
1215 dabno
< rec
.br_startoff
+ rec
.br_blockcount
;
1216 dabno
+= geo
->fsbcount
) {
1218 error
= -libxfs_da_get_buf(tp
, ip
, dabno
, &bp
,
1224 libxfs_trans_binval(tp
, bp
);
1225 libxfs_trans_brelse(tp
, bp
);
1233 * Unexpected failure during the rebuild will leave the entries in
1234 * lost+found on the next run
1238 longform_dir2_rebuild(
1239 struct xfs_mount
*mp
,
1241 struct xfs_inode
*ip
,
1242 struct ino_tree_node
*irec
,
1244 struct dir_hash_tab
*hashtab
)
1248 struct xfs_trans
*tp
;
1249 xfs_fileoff_t lastblock
;
1250 struct xfs_inode pip
;
1251 struct dir_hash_ent
*p
;
1255 * trash directory completely and rebuild from scratch using the
1256 * name/inode pairs in the hash table
1259 do_warn(_("rebuilding directory inode %" PRIu64
"\n"), ino
);
1262 * first attempt to locate the parent inode, if it can't be
1263 * found, set it to the root inode and it'll be moved to the
1264 * orphanage later (the inode number here needs to be valid
1265 * for the libxfs_dir_init() call).
1267 pip
.i_ino
= get_inode_parent(irec
, ino_offset
);
1268 if (pip
.i_ino
== NULLFSINO
||
1269 libxfs_dir_ino_validate(mp
, pip
.i_ino
))
1270 pip
.i_ino
= mp
->m_sb
.sb_rootino
;
1272 nres
= XFS_REMOVE_SPACE_RES(mp
);
1273 error
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_remove
, nres
, 0, 0, &tp
);
1276 libxfs_trans_ijoin(tp
, ip
, 0);
1278 error
= dir_binval(tp
, ip
, XFS_DATA_FORK
);
1280 do_error(_("error %d invalidating directory %llu blocks\n"),
1281 error
, (unsigned long long)ip
->i_ino
);
1283 if ((error
= -libxfs_bmap_last_offset(ip
, &lastblock
, XFS_DATA_FORK
)))
1284 do_error(_("xfs_bmap_last_offset failed -- error - %d\n"),
1287 /* free all data, leaf, node and freespace blocks */
1289 error
= -libxfs_bunmapi(tp
, ip
, 0, lastblock
, XFS_BMAPI_METADATA
,
1292 do_warn(_("xfs_bunmapi failed -- error - %d\n"), error
);
1293 goto out_bmap_cancel
;
1295 error
= -libxfs_defer_finish(&tp
);
1297 do_warn(("defer_finish failed -- error - %d\n"), error
);
1298 goto out_bmap_cancel
;
1301 * Close out trans and start the next one in the chain.
1303 error
= -libxfs_trans_roll_inode(&tp
, ip
);
1305 goto out_bmap_cancel
;
1308 error
= -libxfs_dir_init(tp
, ip
, &pip
);
1310 do_warn(_("xfs_dir_init failed -- error - %d\n"), error
);
1311 goto out_bmap_cancel
;
1314 error
= -libxfs_trans_commit(tp
);
1317 _("dir init failed (%d)\n"), error
);
1319 if (ino
== mp
->m_sb
.sb_rootino
)
1320 need_root_dotdot
= 0;
1322 /* go through the hash list and re-add the inodes */
1324 for (p
= hashtab
->first
; p
; p
= p
->nextbyorder
) {
1326 if (p
->name
.name
[0] == '/' || (p
->name
.name
[0] == '.' &&
1327 (p
->name
.len
== 1 || (p
->name
.len
== 2 &&
1328 p
->name
.name
[1] == '.'))))
1331 nres
= XFS_CREATE_SPACE_RES(mp
, p
->name
.len
);
1332 error
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_create
,
1337 libxfs_trans_ijoin(tp
, ip
, 0);
1339 error
= -libxfs_dir_createname(tp
, ip
, &p
->name
, p
->inum
,
1343 _("name create failed in ino %" PRIu64
" (%d)\n"), ino
, error
);
1344 goto out_bmap_cancel
;
1347 error
= -libxfs_trans_commit(tp
);
1350 _("name create failed (%d) during rebuild\n"), error
);
1356 libxfs_trans_cancel(tp
);
1362 * Kill a block in a version 2 inode.
1363 * Makes its own transaction.
1377 nres
= XFS_REMOVE_SPACE_RES(mp
);
1378 error
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_remove
, nres
, 0, 0, &tp
);
1381 libxfs_trans_ijoin(tp
, ip
, 0);
1382 libxfs_trans_bjoin(tp
, bp
);
1383 libxfs_trans_bhold(tp
, bp
);
1384 memset(&args
, 0, sizeof(args
));
1387 args
.whichfork
= XFS_DATA_FORK
;
1388 args
.geo
= mp
->m_dir_geo
;
1389 if (da_bno
>= mp
->m_dir_geo
->leafblk
&& da_bno
< mp
->m_dir_geo
->freeblk
)
1390 error
= -libxfs_da_shrink_inode(&args
, da_bno
, bp
);
1392 error
= -libxfs_dir2_shrink_inode(&args
,
1393 xfs_dir2_da_to_db(mp
->m_dir_geo
, da_bno
), bp
);
1395 do_error(_("shrink_inode failed inode %" PRIu64
" block %u\n"),
1397 error
= -libxfs_trans_commit(tp
);
1400 _("directory shrink failed (%d)\n"), error
);
1404 check_longform_ftype(
1405 struct xfs_mount
*mp
,
1406 struct xfs_inode
*ip
,
1407 xfs_dir2_data_entry_t
*dep
,
1408 ino_tree_node_t
*irec
,
1410 struct dir_hash_tab
*hashtab
,
1411 xfs_dir2_dataptr_t addr
,
1412 struct xfs_da_args
*da
,
1415 xfs_ino_t inum
= be64_to_cpu(dep
->inumber
);
1419 if (!xfs_has_ftype(mp
))
1422 dir_ftype
= libxfs_dir2_data_get_ftype(mp
, dep
);
1423 ino_ftype
= get_inode_ftype(irec
, ino_offset
);
1425 if (dir_ftype
== ino_ftype
)
1430 _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64
"/%" PRIu64
"\n"),
1431 dir_ftype
, ino_ftype
,
1437 _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64
"/%" PRIu64
"\n"),
1438 dir_ftype
, ino_ftype
,
1440 libxfs_dir2_data_put_ftype(mp
, dep
, ino_ftype
);
1441 libxfs_dir2_data_log_entry(da
, bp
, dep
);
1442 dir_hash_update_ftype(hashtab
, addr
, ino_ftype
);
1446 * process a data block, also checks for .. entry
1447 * and corrects it to match what we think .. should be
1450 longform_dir2_entry_check_data(
1451 struct xfs_mount
*mp
,
1452 struct xfs_inode
*ip
,
1455 struct ino_tree_node
*current_irec
,
1456 int current_ino_offset
,
1458 struct dir_hash_tab
*hashtab
,
1459 freetab_t
**freetabp
,
1463 xfs_dir2_dataptr_t addr
;
1464 xfs_dir2_leaf_entry_t
*blp
;
1465 xfs_dir2_block_tail_t
*btp
;
1466 struct xfs_dir2_data_hdr
*d
;
1468 xfs_dir2_data_entry_t
*dep
;
1469 xfs_dir2_data_unused_t
*dup
;
1470 struct xfs_dir2_data_free
*bf
;
1473 char fname
[MAXNAMELEN
+ 1];
1478 ino_tree_node_t
*irec
;
1489 struct xfs_da_args da
= {
1491 .geo
= mp
->m_dir_geo
,
1496 ptr
= (char *)d
+ mp
->m_dir_geo
->data_entry_offset
;
1498 needscan
= needlog
= 0;
1500 freetab
= *freetabp
;
1502 btp
= xfs_dir2_block_tail_p(mp
->m_dir_geo
, d
);
1503 blp
= xfs_dir2_block_leaf_p(btp
);
1504 endptr
= (char *)blp
;
1505 if (endptr
> (char *)btp
)
1506 endptr
= (char *)btp
;
1507 if (xfs_has_crc(mp
))
1508 wantmagic
= XFS_DIR3_BLOCK_MAGIC
;
1510 wantmagic
= XFS_DIR2_BLOCK_MAGIC
;
1512 endptr
= (char *)d
+ mp
->m_dir_geo
->blksize
;
1513 if (xfs_has_crc(mp
))
1514 wantmagic
= XFS_DIR3_DATA_MAGIC
;
1516 wantmagic
= XFS_DIR2_DATA_MAGIC
;
1518 db
= xfs_dir2_da_to_db(mp
->m_dir_geo
, da_bno
);
1520 /* check for data block beyond expected end */
1521 if (freetab
->naents
<= db
) {
1522 struct freetab_ent e
;
1524 *freetabp
= freetab
= realloc(freetab
, FREETAB_SIZE(db
+ 1));
1526 do_error(_("realloc failed in %s (%zu bytes)\n"),
1527 __func__
, FREETAB_SIZE(db
+ 1));
1531 for (i
= freetab
->naents
; i
< db
; i
++)
1532 freetab
->ents
[i
] = e
;
1533 freetab
->naents
= db
+ 1;
1536 /* check the data block */
1537 while (ptr
< endptr
) {
1539 /* check for freespace */
1540 dup
= (xfs_dir2_data_unused_t
*)ptr
;
1541 if (XFS_DIR2_DATA_FREE_TAG
== be16_to_cpu(dup
->freetag
)) {
1543 /* check for invalid freespace length */
1544 if (ptr
+ be16_to_cpu(dup
->length
) > endptr
||
1545 be16_to_cpu(dup
->length
) == 0 ||
1546 (be16_to_cpu(dup
->length
) &
1547 (XFS_DIR2_DATA_ALIGN
- 1)))
1550 /* check for invalid tag */
1551 if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup
)) !=
1552 (char *)dup
- (char *)d
)
1555 /* check for block with no data entries */
1556 if ((ptr
== (char *)d
+ mp
->m_dir_geo
->data_entry_offset
) &&
1557 (ptr
+ be16_to_cpu(dup
->length
) >= endptr
)) {
1563 /* continue at the end of the freespace */
1564 ptr
+= be16_to_cpu(dup
->length
);
1569 /* validate data entry size */
1570 dep
= (xfs_dir2_data_entry_t
*)ptr
;
1571 if (ptr
+ libxfs_dir2_data_entsize(mp
, dep
->namelen
) > endptr
)
1573 if (be16_to_cpu(*libxfs_dir2_data_entry_tag_p(mp
, dep
)) !=
1574 (char *)dep
- (char *)d
)
1576 ptr
+= libxfs_dir2_data_entsize(mp
, dep
->namelen
);
1579 /* did we find an empty or corrupt block? */
1580 if (ptr
!= endptr
) {
1583 _("empty data block %u in directory inode %" PRIu64
": "),
1587 ("corrupt block %u in directory inode %" PRIu64
": "),
1591 do_warn(_("junking block\n"));
1592 dir2_kill_block(mp
, ip
, da_bno
, bp
);
1594 do_warn(_("would junk block\n"));
1596 freetab
->ents
[db
].v
= NULLDATAOFF
;
1600 /* update number of data blocks processed */
1601 if (freetab
->nents
< db
+ 1)
1602 freetab
->nents
= db
+ 1;
1604 error
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_remove
, 0, 0, 0, &tp
);
1608 libxfs_trans_ijoin(tp
, ip
, 0);
1609 libxfs_trans_bjoin(tp
, bp
);
1610 libxfs_trans_bhold(tp
, bp
);
1611 if (be32_to_cpu(d
->magic
) != wantmagic
) {
1613 _("bad directory block magic # %#x for directory inode %" PRIu64
" block %d: "),
1614 be32_to_cpu(d
->magic
), ip
->i_ino
, da_bno
);
1616 do_warn(_("fixing magic # to %#x\n"), wantmagic
);
1617 d
->magic
= cpu_to_be32(wantmagic
);
1620 do_warn(_("would fix magic # to %#x\n"), wantmagic
);
1623 ptr
= (char *)d
+ mp
->m_dir_geo
->data_entry_offset
;
1625 * look at each entry. reference inode pointed to by each
1626 * entry in the incore inode tree.
1627 * if not a directory, set reached flag, increment link count
1628 * if a directory and reached, mark entry as to be deleted.
1629 * if a directory, check to see if recorded parent
1630 * matches current inode #,
1631 * if so, then set reached flag, increment link count
1632 * of current and child dir inodes, push the child
1633 * directory inode onto the directory stack.
1634 * if current inode != parent, then mark entry to be deleted.
1636 while (ptr
< endptr
) {
1637 dup
= (xfs_dir2_data_unused_t
*)ptr
;
1638 if (be16_to_cpu(dup
->freetag
) == XFS_DIR2_DATA_FREE_TAG
) {
1641 _("directory inode %" PRIu64
" block %u has consecutive free entries: "),
1645 do_warn(_("joining together\n"));
1646 len
= be16_to_cpu(dup
->length
);
1647 libxfs_dir2_data_use_free(&da
, bp
, dup
,
1648 ptr
- (char *)d
, len
, &needlog
,
1650 libxfs_dir2_data_make_free(&da
, bp
,
1651 ptr
- (char *)d
, len
, &needlog
,
1654 do_warn(_("would join together\n"));
1656 ptr
+= be16_to_cpu(dup
->length
);
1660 addr
= xfs_dir2_db_off_to_dataptr(mp
->m_dir_geo
, db
,
1662 dep
= (xfs_dir2_data_entry_t
*)ptr
;
1663 ptr
+= libxfs_dir2_data_entsize(mp
, dep
->namelen
);
1664 inum
= be64_to_cpu(dep
->inumber
);
1667 * skip bogus entries (leading '/'). they'll be deleted
1668 * later. must still log it, else we leak references to
1671 if (dep
->name
[0] == '/') {
1674 libxfs_dir2_data_log_entry(&da
, bp
, dep
);
1678 memmove(fname
, dep
->name
, dep
->namelen
);
1679 fname
[dep
->namelen
] = '\0';
1680 ASSERT(inum
!= NULLFSINO
);
1682 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
, inum
),
1683 XFS_INO_TO_AGINO(mp
, inum
));
1687 _("entry \"%s\" in directory inode %" PRIu64
" points to non-existent inode %" PRIu64
""),
1688 fname
, ip
->i_ino
, inum
)) {
1690 libxfs_dir2_data_log_entry(&da
, bp
, dep
);
1694 ino_offset
= XFS_INO_TO_AGINO(mp
, inum
) - irec
->ino_startnum
;
1697 * if it's a free inode, blow out the entry.
1698 * by now, any inode that we think is free
1701 if (is_inode_free(irec
, ino_offset
)) {
1704 _("entry \"%s\" in directory inode %" PRIu64
" points to free inode %" PRIu64
),
1705 fname
, ip
->i_ino
, inum
)) {
1707 libxfs_dir2_data_log_entry(&da
, bp
, dep
);
1713 * check if this inode is lost+found dir in the root
1715 if (inum
== mp
->m_sb
.sb_rootino
&& strcmp(fname
, ORPHANAGE
) == 0) {
1717 * if it's not a directory, trash it
1719 if (!inode_isadir(irec
, ino_offset
)) {
1722 _("%s (ino %" PRIu64
") in root (%" PRIu64
") is not a directory"),
1723 ORPHANAGE
, inum
, ip
->i_ino
)) {
1725 libxfs_dir2_data_log_entry(&da
, bp
, dep
);
1730 * if this is a dup, it will be picked up below,
1731 * otherwise, mark it as the orphanage for later.
1734 orphanage_ino
= inum
;
1738 * check for duplicate names in directory.
1740 if (!dir_hash_add(mp
, hashtab
, addr
, inum
, dep
->namelen
,
1741 dep
->name
, libxfs_dir2_data_get_ftype(mp
, dep
))) {
1744 _("entry \"%s\" (ino %" PRIu64
") in dir %" PRIu64
" is a duplicate name"),
1745 fname
, inum
, ip
->i_ino
)) {
1747 libxfs_dir2_data_log_entry(&da
, bp
, dep
);
1749 if (inum
== orphanage_ino
)
1755 * if just scanning to rebuild a directory due to a ".."
1756 * update, just continue
1762 * skip the '..' entry since it's checked when the
1763 * directory is reached by something else. if it never
1764 * gets reached, it'll be moved to the orphanage and we'll
1765 * take care of it then. If it doesn't exist at all, the
1766 * directory needs to be rebuilt first before being added
1769 if (dep
->namelen
== 2 && dep
->name
[0] == '.' &&
1770 dep
->name
[1] == '.') {
1772 /* ".." should be in the first block */
1775 _("entry \"%s\" (ino %" PRIu64
") in dir %" PRIu64
" is not in the the first block"), fname
,
1777 dir_hash_junkit(hashtab
, addr
);
1779 libxfs_dir2_data_log_entry(&da
, bp
, dep
);
1784 check_longform_ftype(mp
, ip
, dep
, irec
,
1785 ino_offset
, hashtab
, addr
, &da
,
1789 ASSERT(no_modify
|| libxfs_verify_dir_ino(mp
, inum
));
1791 * special case the . entry. we know there's only one
1792 * '.' and only '.' points to itself because bogus entries
1793 * got trashed in phase 3 if there were > 1.
1794 * bump up link count for '.' but don't set reached
1795 * until we're actually reached by another directory
1796 * '..' is already accounted for or will be taken care
1797 * of when directory is moved to orphanage.
1799 if (ip
->i_ino
== inum
) {
1801 (dep
->name
[0] == '.' && dep
->namelen
== 1));
1802 add_inode_ref(current_irec
, current_ino_offset
);
1804 dep
!= (void *)d
+ mp
->m_dir_geo
->data_entry_offset
) {
1805 /* "." should be the first entry */
1808 _("entry \"%s\" in dir %" PRIu64
" is not the first entry"),
1809 fname
, inum
, ip
->i_ino
)) {
1810 dir_hash_junkit(hashtab
, addr
);
1812 libxfs_dir2_data_log_entry(&da
, bp
, dep
);
1817 check_longform_ftype(mp
, ip
, dep
, irec
,
1818 ino_offset
, hashtab
, addr
, &da
,
1824 * skip entries with bogus inumbers if we're in no modify mode
1826 if (no_modify
&& !libxfs_verify_dir_ino(mp
, inum
))
1829 /* validate ftype field if supported */
1830 check_longform_ftype(mp
, ip
, dep
, irec
, ino_offset
, hashtab
,
1834 * check easy case first, regular inode, just bump
1835 * the link count and continue
1837 if (!inode_isadir(irec
, ino_offset
)) {
1838 add_inode_reached(irec
, ino_offset
);
1841 parent
= get_inode_parent(irec
, ino_offset
);
1842 ASSERT(parent
!= 0);
1845 * bump up the link counts in parent and child
1846 * directory but if the link doesn't agree with
1847 * the .. in the child, blow out the entry.
1848 * if the directory has already been reached,
1849 * blow away the entry also.
1851 if (is_inode_reached(irec
, ino_offset
)) {
1854 _("entry \"%s\" in dir %" PRIu64
" points to an already connected directory inode %" PRIu64
"\n"),
1855 fname
, ip
->i_ino
, inum
);
1856 } else if (parent
== ip
->i_ino
) {
1857 add_inode_reached(irec
, ino_offset
);
1858 add_inode_ref(current_irec
, current_ino_offset
);
1859 } else if (parent
== NULLFSINO
) {
1860 /* ".." was missing, but this entry refers to it,
1861 so, set it as the parent and mark for rebuild */
1863 _("entry \"%s\" in dir ino %" PRIu64
" doesn't have a .. entry, will set it in ino %" PRIu64
".\n"),
1864 fname
, ip
->i_ino
, inum
);
1865 set_inode_parent(irec
, ino_offset
, ip
->i_ino
);
1866 add_inode_reached(irec
, ino_offset
);
1867 add_inode_ref(current_irec
, current_ino_offset
);
1868 add_dotdot_update(XFS_INO_TO_AGNO(mp
, inum
), irec
,
1873 _("entry \"%s\" in dir inode %" PRIu64
" inconsistent with .. value (%" PRIu64
") in ino %" PRIu64
"\n"),
1874 fname
, ip
->i_ino
, parent
, inum
);
1877 if (inum
== orphanage_ino
)
1881 dir_hash_junkit(hashtab
, addr
);
1883 libxfs_dir2_data_log_entry(&da
, bp
, dep
);
1886 _("\twill clear entry \"%s\"\n"),
1889 do_warn(_("\twould clear entry \"%s\"\n"),
1894 *num_illegal
+= nbad
;
1896 libxfs_dir2_data_freescan(mp
, d
, &i
);
1898 libxfs_dir2_data_log_header(&da
, bp
);
1899 error
= -libxfs_trans_commit(tp
);
1902 _("directory block fixing failed (%d)\n"), error
);
1904 /* record the largest free space in the freetab for later checking */
1905 bf
= libxfs_dir2_data_bestfree_p(mp
, d
);
1906 freetab
->ents
[db
].v
= be16_to_cpu(bf
[0].length
);
1907 freetab
->ents
[db
].s
= 0;
1910 /* check v5 metadata */
1912 __check_dir3_header(
1913 struct xfs_mount
*mp
,
1922 if (be64_to_cpu(owner
) != ino
) {
1924 _("expected owner inode %" PRIu64
", got %llu, directory block %" PRIu64
"\n"),
1925 ino
, (unsigned long long)be64_to_cpu(owner
), xfs_buf_daddr(bp
));
1928 /* verify block number */
1929 if (be64_to_cpu(blkno
) != xfs_buf_daddr(bp
)) {
1931 _("expected block %" PRIu64
", got %llu, directory inode %" PRIu64
"\n"),
1932 xfs_buf_daddr(bp
), (unsigned long long)be64_to_cpu(blkno
), ino
);
1936 if (platform_uuid_compare(uuid
, &mp
->m_sb
.sb_meta_uuid
) != 0) {
1938 _("wrong FS UUID, directory inode %" PRIu64
" block %" PRIu64
"\n"),
1939 ino
, xfs_buf_daddr(bp
));
1948 struct xfs_mount
*mp
,
1952 struct xfs_da3_blkinfo
*info
= bp
->b_addr
;
1954 return __check_dir3_header(mp
, bp
, ino
, info
->owner
, info
->blkno
,
1960 struct xfs_mount
*mp
,
1964 struct xfs_dir3_blk_hdr
*info
= bp
->b_addr
;
1966 return __check_dir3_header(mp
, bp
, ino
, info
->owner
, info
->blkno
,
1971 * Check contents of leaf-form block.
1974 longform_dir2_check_leaf(
1975 struct xfs_mount
*mp
,
1976 struct xfs_inode
*ip
,
1977 struct dir_hash_tab
*hashtab
,
1978 struct freetab
*freetab
)
1985 xfs_dir2_leaf_t
*leaf
;
1986 xfs_dir2_leaf_tail_t
*ltp
;
1988 struct xfs_dir2_leaf_entry
*ents
;
1989 struct xfs_dir3_icleaf_hdr leafhdr
;
1993 da_bno
= mp
->m_dir_geo
->leafblk
;
1994 error
= dir_read_buf(ip
, da_bno
, &bp
, &xfs_dir3_leaf1_buf_ops
, &fixit
);
1995 if (error
== EFSBADCRC
|| error
== EFSCORRUPTED
|| fixit
) {
1997 _("leaf block %u for directory inode %" PRIu64
" bad CRC\n"),
2002 _("can't read block %u for directory inode %" PRIu64
", error %d\n"),
2003 da_bno
, ip
->i_ino
, error
);
2008 libxfs_dir2_leaf_hdr_from_disk(mp
, &leafhdr
, leaf
);
2009 ents
= leafhdr
.ents
;
2010 ltp
= xfs_dir2_leaf_tail_p(mp
->m_dir_geo
, leaf
);
2011 bestsp
= xfs_dir2_leaf_bests_p(ltp
);
2012 if (!(leafhdr
.magic
== XFS_DIR2_LEAF1_MAGIC
||
2013 leafhdr
.magic
== XFS_DIR3_LEAF1_MAGIC
) ||
2014 leafhdr
.forw
|| leafhdr
.back
||
2015 leafhdr
.count
< leafhdr
.stale
||
2016 leafhdr
.count
> mp
->m_dir_geo
->leaf_max_ents
||
2017 (char *)&ents
[leafhdr
.count
] > (char *)bestsp
) {
2019 _("leaf block %u for directory inode %" PRIu64
" bad header\n"),
2021 libxfs_buf_relse(bp
);
2025 if (leafhdr
.magic
== XFS_DIR3_LEAF1_MAGIC
) {
2026 error
= check_da3_header(mp
, bp
, ip
->i_ino
);
2028 libxfs_buf_relse(bp
);
2033 seeval
= dir_hash_see_all(hashtab
, ents
, leafhdr
.count
, leafhdr
.stale
);
2034 if (dir_hash_check(hashtab
, ip
, seeval
)) {
2035 libxfs_buf_relse(bp
);
2038 badtail
= freetab
->nents
!= be32_to_cpu(ltp
->bestcount
);
2039 for (i
= 0; !badtail
&& i
< be32_to_cpu(ltp
->bestcount
); i
++) {
2040 freetab
->ents
[i
].s
= 1;
2041 badtail
= freetab
->ents
[i
].v
!= be16_to_cpu(bestsp
[i
]);
2045 _("leaf block %u for directory inode %" PRIu64
" bad tail\n"),
2047 libxfs_buf_relse(bp
);
2050 libxfs_buf_relse(bp
);
2055 * Check contents of the node blocks (leaves)
2056 * Looks for matching hash values for the data entries.
2059 longform_dir2_check_node(
2060 struct xfs_mount
*mp
,
2061 struct xfs_inode
*ip
,
2062 struct dir_hash_tab
*hashtab
,
2063 struct freetab
*freetab
)
2068 xfs_dir2_free_t
*free
;
2070 xfs_dir2_leaf_t
*leaf
;
2071 xfs_fileoff_t next_da_bno
;
2074 struct xfs_dir2_leaf_entry
*ents
;
2075 struct xfs_dir3_icleaf_hdr leafhdr
;
2076 struct xfs_dir3_icfree_hdr freehdr
;
2081 for (da_bno
= mp
->m_dir_geo
->leafblk
, next_da_bno
= 0;
2082 next_da_bno
!= NULLFILEOFF
&& da_bno
< mp
->m_dir_geo
->freeblk
;
2083 da_bno
= (xfs_dablk_t
)next_da_bno
) {
2084 next_da_bno
= da_bno
+ mp
->m_dir_geo
->fsbcount
- 1;
2085 if (bmap_next_offset(ip
, &next_da_bno
))
2089 * we need to use the da3 node verifier here as it handles the
2090 * fact that reading the leaf hash tree blocks can return either
2091 * leaf or node blocks and calls the correct verifier. If we get
2092 * a node block, then we'll skip it below based on a magic
2095 error
= dir_read_buf(ip
, da_bno
, &bp
, &xfs_da3_node_buf_ops
,
2099 _("can't read leaf block %u for directory inode %" PRIu64
", error %d\n"),
2100 da_bno
, ip
->i_ino
, error
);
2104 libxfs_dir2_leaf_hdr_from_disk(mp
, &leafhdr
, leaf
);
2105 ents
= leafhdr
.ents
;
2106 if (!(leafhdr
.magic
== XFS_DIR2_LEAFN_MAGIC
||
2107 leafhdr
.magic
== XFS_DIR3_LEAFN_MAGIC
||
2108 leafhdr
.magic
== XFS_DA_NODE_MAGIC
||
2109 leafhdr
.magic
== XFS_DA3_NODE_MAGIC
)) {
2111 _("unknown magic number %#x for block %u in directory inode %" PRIu64
"\n"),
2112 leafhdr
.magic
, da_bno
, ip
->i_ino
);
2113 libxfs_buf_relse(bp
);
2117 /* check v5 metadata */
2118 if (leafhdr
.magic
== XFS_DIR3_LEAFN_MAGIC
||
2119 leafhdr
.magic
== XFS_DA3_NODE_MAGIC
) {
2120 error
= check_da3_header(mp
, bp
, ip
->i_ino
);
2122 libxfs_buf_relse(bp
);
2128 if (leafhdr
.magic
== XFS_DA_NODE_MAGIC
||
2129 leafhdr
.magic
== XFS_DA3_NODE_MAGIC
) {
2130 libxfs_buf_relse(bp
);
2135 * If there's a validator error, we need to ensure that we got
2136 * the right ops on the buffer for when we write it back out.
2138 bp
->b_ops
= &xfs_dir3_leafn_buf_ops
;
2139 if (leafhdr
.count
> mp
->m_dir_geo
->leaf_max_ents
||
2140 leafhdr
.count
< leafhdr
.stale
) {
2142 _("leaf block %u for directory inode %" PRIu64
" bad header\n"),
2144 libxfs_buf_relse(bp
);
2147 seeval
= dir_hash_see_all(hashtab
, ents
,
2148 leafhdr
.count
, leafhdr
.stale
);
2149 libxfs_buf_relse(bp
);
2150 if (seeval
!= DIR_HASH_CK_OK
)
2153 if (dir_hash_check(hashtab
, ip
, seeval
))
2156 for (da_bno
= mp
->m_dir_geo
->freeblk
, next_da_bno
= 0;
2157 next_da_bno
!= NULLFILEOFF
;
2158 da_bno
= (xfs_dablk_t
)next_da_bno
) {
2159 next_da_bno
= da_bno
+ mp
->m_dir_geo
->fsbcount
- 1;
2160 if (bmap_next_offset(ip
, &next_da_bno
))
2163 error
= dir_read_buf(ip
, da_bno
, &bp
, &xfs_dir3_free_buf_ops
,
2167 _("can't read freespace block %u for directory inode %" PRIu64
", error %d\n"),
2168 da_bno
, ip
->i_ino
, error
);
2172 libxfs_dir2_free_hdr_from_disk(mp
, &freehdr
, free
);
2173 bests
= freehdr
.bests
;
2174 fdb
= xfs_dir2_da_to_db(mp
->m_dir_geo
, da_bno
);
2175 if (!(freehdr
.magic
== XFS_DIR2_FREE_MAGIC
||
2176 freehdr
.magic
== XFS_DIR3_FREE_MAGIC
) ||
2178 (fdb
- xfs_dir2_byte_to_db(mp
->m_dir_geo
, XFS_DIR2_FREE_OFFSET
)) *
2179 mp
->m_dir_geo
->free_max_bests
||
2180 freehdr
.nvalid
< freehdr
.nused
) {
2182 _("free block %u for directory inode %" PRIu64
" bad header\n"),
2184 libxfs_buf_relse(bp
);
2188 if (freehdr
.magic
== XFS_DIR3_FREE_MAGIC
) {
2189 error
= check_dir3_header(mp
, bp
, ip
->i_ino
);
2191 libxfs_buf_relse(bp
);
2195 for (i
= used
= 0; i
< freehdr
.nvalid
; i
++) {
2196 if (i
+ freehdr
.firstdb
>= freetab
->nents
||
2197 freetab
->ents
[i
+ freehdr
.firstdb
].v
!=
2198 be16_to_cpu(bests
[i
])) {
2200 _("free block %u entry %i for directory ino %" PRIu64
" bad\n"),
2201 da_bno
, i
, ip
->i_ino
);
2202 libxfs_buf_relse(bp
);
2205 used
+= be16_to_cpu(bests
[i
]) != NULLDATAOFF
;
2206 freetab
->ents
[i
+ freehdr
.firstdb
].s
= 1;
2208 if (used
!= freehdr
.nused
) {
2210 _("free block %u for directory inode %" PRIu64
" bad nused\n"),
2212 libxfs_buf_relse(bp
);
2215 libxfs_buf_relse(bp
);
2217 for (i
= 0; i
< freetab
->nents
; i
++) {
2218 if ((freetab
->ents
[i
].s
== 0) &&
2219 (freetab
->ents
[i
].v
!= NULLDATAOFF
)) {
2221 _("missing freetab entry %u for directory inode %" PRIu64
"\n"),
2230 * If a directory is corrupt, we need to read in as many entries as possible,
2231 * destroy the entry and create a new one with recovered name/inode pairs.
2232 * (ie. get libxfs to do all the grunt work)
2235 longform_dir2_entry_check(
2236 struct xfs_mount
*mp
,
2238 struct xfs_inode
*ip
,
2241 struct ino_tree_node
*irec
,
2243 struct dir_hash_tab
*hashtab
)
2245 struct xfs_buf
*bp
= NULL
;
2251 xfs_fileoff_t next_da_bno
;
2254 struct xfs_da_args args
;
2257 freetab
= malloc(FREETAB_SIZE(ip
->i_disk_size
/ mp
->m_dir_geo
->blksize
));
2259 do_error(_("malloc failed in %s (%" PRId64
" bytes)\n"),
2261 FREETAB_SIZE(ip
->i_disk_size
/ mp
->m_dir_geo
->blksize
));
2264 freetab
->naents
= ip
->i_disk_size
/ mp
->m_dir_geo
->blksize
;
2266 for (i
= 0; i
< freetab
->naents
; i
++) {
2267 freetab
->ents
[i
].v
= NULLDATAOFF
;
2268 freetab
->ents
[i
].s
= 0;
2271 /* is this a block, leaf, or node directory? */
2273 args
.geo
= mp
->m_dir_geo
;
2274 libxfs_dir2_isblock(&args
, &isblock
);
2275 libxfs_dir2_isleaf(&args
, &isleaf
);
2277 /* check directory "data" blocks (ie. name/inode pairs) */
2278 for (da_bno
= 0, next_da_bno
= 0;
2279 next_da_bno
!= NULLFILEOFF
&& da_bno
< mp
->m_dir_geo
->leafblk
;
2280 da_bno
= (xfs_dablk_t
)next_da_bno
) {
2281 const struct xfs_buf_ops
*ops
;
2283 struct xfs_dir2_data_hdr
*d
;
2285 next_da_bno
= da_bno
+ mp
->m_dir_geo
->fsbcount
- 1;
2286 if (bmap_next_offset(ip
, &next_da_bno
)) {
2288 * if this is the first block, there isn't anything we
2289 * can recover so we just trash it.
2299 ops
= &xfs_dir3_block_buf_ops
;
2301 ops
= &xfs_dir3_data_buf_ops
;
2303 error
= dir_read_buf(ip
, da_bno
, &bp
, ops
, &fixit
);
2306 _("can't read data block %u for directory inode %" PRIu64
" error %d\n"),
2307 da_bno
, ino
, error
);
2311 * we try to read all "data" blocks, but if we are in
2312 * block form and we fail, there isn't anything else to
2313 * read, and nothing we can do but trash it.
2322 /* check v5 metadata */
2324 if (be32_to_cpu(d
->magic
) == XFS_DIR3_BLOCK_MAGIC
||
2325 be32_to_cpu(d
->magic
) == XFS_DIR3_DATA_MAGIC
) {
2326 error
= check_dir3_header(mp
, bp
, ino
);
2335 longform_dir2_entry_check_data(mp
, ip
, num_illegal
, need_dot
,
2336 irec
, ino_offset
, bp
, hashtab
,
2337 &freetab
, da_bno
, isblock
);
2341 libxfs_buf_relse(bp
);
2343 fixit
|= (*num_illegal
!= 0) || dir2_is_badino(ino
) || *need_dot
;
2345 if (!dotdot_update
) {
2346 /* check btree and freespace */
2348 struct xfs_dir2_data_hdr
*block
;
2349 xfs_dir2_block_tail_t
*btp
;
2350 xfs_dir2_leaf_entry_t
*blp
;
2353 btp
= xfs_dir2_block_tail_p(mp
->m_dir_geo
, block
);
2354 blp
= xfs_dir2_block_leaf_p(btp
);
2355 seeval
= dir_hash_see_all(hashtab
, blp
,
2356 be32_to_cpu(btp
->count
),
2357 be32_to_cpu(btp
->stale
));
2358 if (dir_hash_check(hashtab
, ip
, seeval
))
2360 } else if (isleaf
) {
2361 fixit
|= longform_dir2_check_leaf(mp
, ip
, hashtab
,
2364 fixit
|= longform_dir2_check_node(mp
, ip
, hashtab
,
2370 libxfs_buf_relse(bp
);
2372 if (!no_modify
&& (fixit
|| dotdot_update
)) {
2373 longform_dir2_rebuild(mp
, ino
, ip
, irec
, ino_offset
, hashtab
);
2377 if (fixit
|| dotdot_update
)
2379 _("would rebuild directory inode %" PRIu64
"\n"), ino
);
2386 * shortform directory v2 processing routines -- entry verification and
2387 * bad entry deletion (pruning).
2389 static struct xfs_dir2_sf_entry
*
2390 shortform_dir2_junk(
2391 struct xfs_mount
*mp
,
2392 struct xfs_dir2_sf_hdr
*sfp
,
2393 struct xfs_dir2_sf_entry
*sfep
,
2400 struct xfs_dir2_sf_entry
*next_sfep
;
2404 if (lino
== orphanage_ino
)
2407 next_elen
= libxfs_dir2_sf_entsize(mp
, sfp
, sfep
->namelen
);
2408 next_sfep
= libxfs_dir2_sf_nextentry(mp
, sfp
, sfep
);
2411 * if we are just checking, simply return the pointer to the next entry
2412 * here so that the checking loop can continue.
2415 do_warn(_("would junk entry\n"));
2420 * now move all the remaining entries down over the junked entry and
2421 * clear the newly unused bytes at the tail of the directory region.
2423 next_len
= *max_size
- ((intptr_t)next_sfep
- (intptr_t)sfp
);
2424 *max_size
-= next_elen
;
2425 *bytes_deleted
+= next_elen
;
2427 memmove(sfep
, next_sfep
, next_len
);
2428 memset((void *)((intptr_t)sfep
+ next_len
), 0, next_elen
);
2433 * WARNING: drop the index i by one so it matches the decremented count
2434 * for accurate comparisons in the loop test
2439 do_warn(_("junking entry\n"));
2446 shortform_dir2_entry_check(
2447 struct xfs_mount
*mp
,
2449 struct xfs_inode
*ip
,
2451 struct ino_tree_node
*current_irec
,
2452 int current_ino_offset
,
2453 struct dir_hash_tab
*hashtab
)
2457 struct xfs_dir2_sf_hdr
*sfp
;
2458 struct xfs_dir2_sf_entry
*sfep
;
2459 struct xfs_dir2_sf_entry
*next_sfep
;
2460 struct xfs_ifork
*ifp
;
2461 struct ino_tree_node
*irec
;
2468 char fname
[MAXNAMELEN
+ 1];
2472 sfp
= (struct xfs_dir2_sf_hdr
*) ifp
->if_u1
.if_data
;
2476 max_size
= ifp
->if_bytes
;
2477 ASSERT(ip
->i_disk_size
<= ifp
->if_bytes
);
2480 * if just rebuild a directory due to a "..", update and return
2482 if (dotdot_update
) {
2483 parent
= get_inode_parent(current_irec
, current_ino_offset
);
2486 _("would set .. in sf dir inode %" PRIu64
" to %" PRIu64
"\n"),
2490 _("setting .. in sf dir inode %" PRIu64
" to %" PRIu64
"\n"),
2492 libxfs_dir2_sf_put_parent_ino(sfp
, parent
);
2499 * no '.' entry in shortform dirs, just bump up ref count by 1
2500 * '..' was already (or will be) accounted for and checked when
2501 * the directory is reached or will be taken care of when the
2502 * directory is moved to orphanage.
2504 add_inode_ref(current_irec
, current_ino_offset
);
2507 * Initialise i8 counter -- the parent inode number counts as well.
2509 i8
= libxfs_dir2_sf_get_parent_ino(sfp
) > XFS_DIR2_MAX_SHORT_INUM
;
2512 * now run through entries, stop at first bad entry, don't need
2513 * to skip over '..' since that's encoded in its own field and
2514 * no need to worry about '.' since it doesn't exist.
2516 sfep
= next_sfep
= xfs_dir2_sf_firstentry(sfp
);
2518 for (i
= 0; i
< sfp
->count
&& max_size
>
2519 (intptr_t)next_sfep
- (intptr_t)sfp
;
2520 sfep
= next_sfep
, i
++) {
2523 lino
= libxfs_dir2_sf_get_ino(mp
, sfp
, sfep
);
2525 namelen
= sfep
->namelen
;
2527 ASSERT(no_modify
|| namelen
> 0);
2529 if (no_modify
&& namelen
== 0) {
2531 * if we're really lucky, this is
2532 * the last entry in which case we
2533 * can use the dir size to set the
2534 * namelen value. otherwise, forget
2535 * it because we're not going to be
2536 * able to find the next entry.
2540 if (i
== sfp
->count
- 1) {
2541 namelen
= ip
->i_disk_size
-
2542 ((intptr_t) &sfep
->name
[0] -
2546 * don't process the rest of the directory,
2547 * break out of processing loop
2551 } else if (no_modify
&& (intptr_t) sfep
- (intptr_t) sfp
+
2552 + libxfs_dir2_sf_entsize(mp
, sfp
, sfep
->namelen
)
2553 > ip
->i_disk_size
) {
2556 if (i
== sfp
->count
- 1) {
2557 namelen
= ip
->i_disk_size
-
2558 ((intptr_t) &sfep
->name
[0] -
2562 * don't process the rest of the directory,
2563 * break out of processing loop
2569 memmove(fname
, sfep
->name
, sfep
->namelen
);
2570 fname
[sfep
->namelen
] = '\0';
2572 ASSERT(no_modify
|| (lino
!= NULLFSINO
&& lino
!= 0));
2573 ASSERT(no_modify
|| libxfs_verify_dir_ino(mp
, lino
));
2576 * Also skip entries with bogus inode numbers if we're
2577 * in no modify mode.
2580 if (no_modify
&& !libxfs_verify_dir_ino(mp
, lino
)) {
2581 next_sfep
= libxfs_dir2_sf_nextentry(mp
, sfp
, sfep
);
2585 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
, lino
),
2586 XFS_INO_TO_AGINO(mp
, lino
));
2590 _("entry \"%s\" in shortform directory %" PRIu64
" references non-existent inode %" PRIu64
"\n"),
2592 next_sfep
= shortform_dir2_junk(mp
, sfp
, sfep
, lino
,
2593 &max_size
, &i
, &bytes_deleted
,
2598 ino_offset
= XFS_INO_TO_AGINO(mp
, lino
) - irec
->ino_startnum
;
2601 * if it's a free inode, blow out the entry.
2602 * by now, any inode that we think is free
2605 if (is_inode_free(irec
, ino_offset
)) {
2607 _("entry \"%s\" in shortform directory inode %" PRIu64
" points to free inode %" PRIu64
"\n"),
2609 next_sfep
= shortform_dir2_junk(mp
, sfp
, sfep
, lino
,
2610 &max_size
, &i
, &bytes_deleted
,
2615 * check if this inode is lost+found dir in the root
2617 if (ino
== mp
->m_sb
.sb_rootino
&& strcmp(fname
, ORPHANAGE
) == 0) {
2619 * if it's not a directory, trash it
2621 if (!inode_isadir(irec
, ino_offset
)) {
2623 _("%s (ino %" PRIu64
") in root (%" PRIu64
") is not a directory"),
2624 ORPHANAGE
, lino
, ino
);
2625 next_sfep
= shortform_dir2_junk(mp
, sfp
, sfep
,
2626 lino
, &max_size
, &i
,
2627 &bytes_deleted
, ino_dirty
);
2631 * if this is a dup, it will be picked up below,
2632 * otherwise, mark it as the orphanage for later.
2635 orphanage_ino
= lino
;
2638 * check for duplicate names in directory.
2640 if (!dir_hash_add(mp
, hashtab
, (xfs_dir2_dataptr_t
)
2641 (sfep
- xfs_dir2_sf_firstentry(sfp
)),
2642 lino
, sfep
->namelen
, sfep
->name
,
2643 libxfs_dir2_sf_get_ftype(mp
, sfep
))) {
2645 _("entry \"%s\" (ino %" PRIu64
") in dir %" PRIu64
" is a duplicate name"),
2647 next_sfep
= shortform_dir2_junk(mp
, sfp
, sfep
, lino
,
2648 &max_size
, &i
, &bytes_deleted
,
2653 if (!inode_isadir(irec
, ino_offset
)) {
2655 * check easy case first, regular inode, just bump
2658 add_inode_reached(irec
, ino_offset
);
2660 parent
= get_inode_parent(irec
, ino_offset
);
2663 * bump up the link counts in parent and child.
2664 * directory but if the link doesn't agree with
2665 * the .. in the child, blow out the entry
2667 if (is_inode_reached(irec
, ino_offset
)) {
2669 _("entry \"%s\" in directory inode %" PRIu64
2670 " references already connected inode %" PRIu64
".\n"),
2672 next_sfep
= shortform_dir2_junk(mp
, sfp
, sfep
,
2673 lino
, &max_size
, &i
,
2674 &bytes_deleted
, ino_dirty
);
2676 } else if (parent
== ino
) {
2677 add_inode_reached(irec
, ino_offset
);
2678 add_inode_ref(current_irec
, current_ino_offset
);
2679 } else if (parent
== NULLFSINO
) {
2680 /* ".." was missing, but this entry refers to it,
2681 so, set it as the parent and mark for rebuild */
2683 _("entry \"%s\" in dir ino %" PRIu64
" doesn't have a .. entry, will set it in ino %" PRIu64
".\n"),
2685 set_inode_parent(irec
, ino_offset
, ino
);
2686 add_inode_reached(irec
, ino_offset
);
2687 add_inode_ref(current_irec
, current_ino_offset
);
2688 add_dotdot_update(XFS_INO_TO_AGNO(mp
, lino
),
2692 _("entry \"%s\" in directory inode %" PRIu64
2693 " not consistent with .. value (%" PRIu64
2694 ") in inode %" PRIu64
",\n"),
2695 fname
, ino
, parent
, lino
);
2696 next_sfep
= shortform_dir2_junk(mp
, sfp
, sfep
,
2697 lino
, &max_size
, &i
,
2698 &bytes_deleted
, ino_dirty
);
2703 /* validate ftype field if supported */
2704 if (xfs_has_ftype(mp
)) {
2708 dir_ftype
= libxfs_dir2_sf_get_ftype(mp
, sfep
);
2709 ino_ftype
= get_inode_ftype(irec
, ino_offset
);
2711 if (dir_ftype
!= ino_ftype
) {
2714 _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64
"/%" PRIu64
"\n"),
2715 dir_ftype
, ino_ftype
,
2719 _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64
"/%" PRIu64
"\n"),
2720 dir_ftype
, ino_ftype
,
2722 libxfs_dir2_sf_put_ftype(mp
, sfep
,
2724 dir_hash_update_ftype(hashtab
,
2725 (xfs_dir2_dataptr_t
)(sfep
- xfs_dir2_sf_firstentry(sfp
)),
2732 if (lino
> XFS_DIR2_MAX_SHORT_INUM
)
2736 * go onto next entry - we have to take entries with bad namelen
2737 * into account in no modify mode since we calculate size based
2740 ASSERT(no_modify
|| bad_sfnamelen
== 0);
2741 next_sfep
= (struct xfs_dir2_sf_entry
*)((intptr_t)sfep
+
2743 ? libxfs_dir2_sf_entsize(mp
, sfp
, namelen
)
2744 : libxfs_dir2_sf_entsize(mp
, sfp
, sfep
->namelen
)));
2747 if (sfp
->i8count
!= i8
) {
2749 do_warn(_("would fix i8count in inode %" PRIu64
"\n"),
2753 struct xfs_dir2_sf_entry
*tmp_sfep
;
2755 tmp_sfep
= next_sfep
;
2756 process_sf_dir2_fixi8(mp
, sfp
, &tmp_sfep
);
2758 (intptr_t)next_sfep
-
2760 next_sfep
= tmp_sfep
;
2764 do_warn(_("fixing i8count in inode %" PRIu64
"\n"),
2770 * sync up sizes if required
2772 if (*ino_dirty
&& bytes_deleted
> 0) {
2774 libxfs_idata_realloc(ip
, -bytes_deleted
, XFS_DATA_FORK
);
2775 ip
->i_disk_size
-= bytes_deleted
;
2778 if (ip
->i_disk_size
!= ip
->i_df
.if_bytes
) {
2779 ASSERT(ip
->i_df
.if_bytes
== (xfs_fsize_t
)
2780 ((intptr_t) next_sfep
- (intptr_t) sfp
));
2781 ip
->i_disk_size
= (xfs_fsize_t
)
2782 ((intptr_t) next_sfep
- (intptr_t) sfp
);
2784 _("setting size to %" PRId64
" bytes to reflect junked entries\n"),
2791 * processes all reachable inodes in directories
2795 struct xfs_mount
*mp
,
2796 xfs_agnumber_t agno
,
2797 struct ino_tree_node
*irec
,
2801 struct xfs_inode
*ip
;
2802 struct xfs_trans
*tp
;
2803 struct dir_hash_tab
*hashtab
;
2805 int dirty
, num_illegal
, error
, nres
;
2807 ino
= XFS_AGINO_TO_INO(mp
, agno
, irec
->ino_startnum
+ ino_offset
);
2810 * open up directory inode, check all entries,
2811 * then call prune_dir_entries to remove all
2812 * remaining illegal directory entries.
2815 ASSERT(!is_inode_refchecked(irec
, ino_offset
) || dotdot_update
);
2817 error
= -libxfs_iget(mp
, NULL
, ino
, 0, &ip
);
2821 _("couldn't map inode %" PRIu64
", err = %d\n"),
2825 _("couldn't map inode %" PRIu64
", err = %d\n"),
2828 * see below for what we're doing if this
2829 * is root. Why do we need to do this here?
2830 * to ensure that the root doesn't show up
2831 * as being disconnected in the no_modify case.
2833 if (mp
->m_sb
.sb_rootino
== ino
) {
2834 add_inode_reached(irec
, 0);
2835 add_inode_ref(irec
, 0);
2839 add_inode_refchecked(irec
, 0);
2843 need_dot
= dirty
= num_illegal
= 0;
2845 if (mp
->m_sb
.sb_rootino
== ino
) {
2847 * mark root inode reached and bump up
2848 * link count for root inode to account
2849 * for '..' entry since the root inode is
2850 * never reached by a parent. we know
2851 * that root's '..' is always good --
2852 * guaranteed by phase 3 and/or below.
2854 add_inode_reached(irec
, ino_offset
);
2857 add_inode_refchecked(irec
, ino_offset
);
2859 hashtab
= dir_hash_init(ip
->i_disk_size
);
2862 * look for bogus entries
2864 switch (ip
->i_df
.if_format
) {
2865 case XFS_DINODE_FMT_EXTENTS
:
2866 case XFS_DINODE_FMT_BTREE
:
2868 * also check for missing '.' in longform dirs.
2869 * missing .. entries are added if required when
2870 * the directory is connected to lost+found. but
2871 * we need to create '.' entries here.
2873 longform_dir2_entry_check(mp
, ino
, ip
,
2874 &num_illegal
, &need_dot
,
2879 case XFS_DINODE_FMT_LOCAL
:
2881 * using the remove reservation is overkill
2882 * since at most we'll only need to log the
2883 * inode but it's easier than wedging a
2884 * new define in ourselves.
2886 nres
= no_modify
? 0 : XFS_REMOVE_SPACE_RES(mp
);
2887 error
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_remove
,
2892 libxfs_trans_ijoin(tp
, ip
, 0);
2894 shortform_dir2_entry_check(mp
, ino
, ip
, &dirty
,
2898 ASSERT(dirty
== 0 || (dirty
&& !no_modify
));
2900 libxfs_trans_log_inode(tp
, ip
,
2901 XFS_ILOG_CORE
| XFS_ILOG_DDATA
);
2902 error
= -libxfs_trans_commit(tp
);
2905 _("error %d fixing shortform directory %llu\n"),
2907 (unsigned long long)ip
->i_ino
);
2909 libxfs_trans_cancel(tp
);
2916 dir_hash_done(hashtab
);
2919 * if we have to create a .. for /, do it now *before*
2920 * we delete the bogus entries, otherwise the directory
2921 * could transform into a shortform dir which would
2922 * probably cause the simulation to choke. Even
2923 * if the illegal entries get shifted around, it's ok
2924 * because the entries are structurally intact and in
2925 * in hash-value order so the simulation won't get confused
2926 * if it has to move them around.
2928 if (!no_modify
&& need_root_dotdot
&& ino
== mp
->m_sb
.sb_rootino
) {
2929 ASSERT(ip
->i_df
.if_format
!= XFS_DINODE_FMT_LOCAL
);
2931 do_warn(_("recreating root directory .. entry\n"));
2933 nres
= XFS_MKDIR_SPACE_RES(mp
, 2);
2934 error
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_mkdir
,
2939 libxfs_trans_ijoin(tp
, ip
, 0);
2941 error
= -libxfs_dir_createname(tp
, ip
, &xfs_name_dotdot
,
2945 _("can't make \"..\" entry in root inode %" PRIu64
", createname error %d\n"), ino
, error
);
2947 libxfs_trans_log_inode(tp
, ip
, XFS_ILOG_CORE
);
2948 error
= -libxfs_trans_commit(tp
);
2951 _("root inode \"..\" entry recreation failed (%d)\n"), error
);
2953 need_root_dotdot
= 0;
2954 } else if (need_root_dotdot
&& ino
== mp
->m_sb
.sb_rootino
) {
2955 do_warn(_("would recreate root directory .. entry\n"));
2959 * if we need to create the '.' entry, do so only if
2960 * the directory is a longform dir. if it's been
2961 * turned into a shortform dir, then the inode is ok
2962 * since shortform dirs have no '.' entry and the inode
2963 * has already been committed by prune_lf_dir_entry().
2967 * bump up our link count but don't
2968 * bump up the inode link count. chances
2969 * are good that even though we lost '.'
2970 * the inode link counts reflect '.' so
2971 * leave the inode link count alone and if
2972 * it turns out to be wrong, we'll catch
2975 add_inode_ref(irec
, ino_offset
);
2979 _("would create missing \".\" entry in dir ino %" PRIu64
"\n"),
2981 } else if (ip
->i_df
.if_format
!= XFS_DINODE_FMT_LOCAL
) {
2983 * need to create . entry in longform dir.
2986 _("creating missing \".\" entry in dir ino %" PRIu64
"\n"), ino
);
2988 nres
= XFS_MKDIR_SPACE_RES(mp
, 1);
2989 error
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_mkdir
,
2994 libxfs_trans_ijoin(tp
, ip
, 0);
2996 error
= -libxfs_dir_createname(tp
, ip
, &xfs_name_dot
,
3000 _("can't make \".\" entry in dir ino %" PRIu64
", createname error %d\n"),
3003 libxfs_trans_log_inode(tp
, ip
, XFS_ILOG_CORE
);
3004 error
= -libxfs_trans_commit(tp
);
3007 _("root inode \".\" entry recreation failed (%d)\n"), error
);
3014 * mark realtime bitmap and summary inodes as reached.
3015 * quota inode will be marked here as well
3018 mark_standalone_inodes(xfs_mount_t
*mp
)
3020 ino_tree_node_t
*irec
;
3023 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
, mp
->m_sb
.sb_rbmino
),
3024 XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_rbmino
));
3026 offset
= XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_rbmino
) -
3029 add_inode_reached(irec
, offset
);
3031 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
, mp
->m_sb
.sb_rsumino
),
3032 XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_rsumino
));
3034 offset
= XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_rsumino
) -
3037 add_inode_reached(irec
, offset
);
3040 if (mp
->m_sb
.sb_uquotino
3041 && mp
->m_sb
.sb_uquotino
!= NULLFSINO
) {
3042 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
,
3043 mp
->m_sb
.sb_uquotino
),
3044 XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_uquotino
));
3045 offset
= XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_uquotino
)
3046 - irec
->ino_startnum
;
3047 add_inode_reached(irec
, offset
);
3049 if (mp
->m_sb
.sb_gquotino
3050 && mp
->m_sb
.sb_gquotino
!= NULLFSINO
) {
3051 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
,
3052 mp
->m_sb
.sb_gquotino
),
3053 XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_gquotino
));
3054 offset
= XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_gquotino
)
3055 - irec
->ino_startnum
;
3056 add_inode_reached(irec
, offset
);
3058 if (mp
->m_sb
.sb_pquotino
3059 && mp
->m_sb
.sb_pquotino
!= NULLFSINO
) {
3060 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
,
3061 mp
->m_sb
.sb_pquotino
),
3062 XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_pquotino
));
3063 offset
= XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_pquotino
)
3064 - irec
->ino_startnum
;
3065 add_inode_reached(irec
, offset
);
3071 check_for_orphaned_inodes(
3073 xfs_agnumber_t agno
,
3074 ino_tree_node_t
*irec
)
3079 for (i
= 0; i
< XFS_INODES_PER_CHUNK
; i
++) {
3080 ASSERT(is_inode_confirmed(irec
, i
));
3081 if (is_inode_free(irec
, i
))
3084 if (is_inode_reached(irec
, i
))
3087 ASSERT(inode_isadir(irec
, i
) ||
3088 num_inode_references(irec
, i
) == 0);
3090 ino
= XFS_AGINO_TO_INO(mp
, agno
, i
+ irec
->ino_startnum
);
3091 if (inode_isadir(irec
, i
))
3092 do_warn(_("disconnected dir inode %" PRIu64
", "), ino
);
3094 do_warn(_("disconnected inode %" PRIu64
", "), ino
);
3097 orphanage_ino
= mk_orphanage(mp
);
3098 do_warn(_("moving to %s\n"), ORPHANAGE
);
3099 mv_orphanage(mp
, ino
, inode_isadir(irec
, i
));
3101 do_warn(_("would move to %s\n"), ORPHANAGE
);
3104 * for read-only case, even though the inode isn't
3105 * really reachable, set the flag (and bump our link
3106 * count) anyway to fool phase 7
3108 add_inode_reached(irec
, i
);
3114 struct workqueue
*wq
,
3115 xfs_agnumber_t agno
,
3118 struct ino_tree_node
*irec
= arg
;
3121 for (i
= 0; i
< XFS_INODES_PER_CHUNK
; i
++) {
3122 if (inode_isadir(irec
, i
))
3123 process_dir_inode(wq
->wq_ctx
, agno
, irec
, i
);
3129 struct workqueue
*wq
,
3130 xfs_agnumber_t agno
,
3133 struct ino_tree_node
*irec
;
3134 prefetch_args_t
*pf_args
= arg
;
3135 struct workqueue lwq
;
3136 struct xfs_mount
*mp
= wq
->wq_ctx
;
3138 wait_for_inode_prefetch(pf_args
);
3141 do_log(_(" - agno = %d\n"), agno
);
3144 * The more AGs we have in flight at once, the fewer processing threads
3145 * per AG. This means we don't overwhelm the machine with hundreds of
3146 * threads when we start acting on lots of AGs at once. We just want
3147 * enough that we can keep multiple CPUs busy across multiple AGs.
3149 workqueue_create_bound(&lwq
, mp
, ag_stride
, 1000);
3151 for (irec
= findfirst_inode_rec(agno
); irec
; irec
= next_ino_rec(irec
)) {
3152 if (irec
->ino_isa_dir
== 0)
3156 sem_post(&pf_args
->ra_count
);
3160 sem_getvalue(&pf_args
->ra_count
, &i
);
3162 "processing inode chunk %p in AG %d (sem count = %d)",
3168 queue_work(&lwq
, do_dir_inode
, agno
, irec
);
3170 destroy_work_queue(&lwq
);
3171 cleanup_inode_prefetch(pf_args
);
3175 update_missing_dotdot_entries(
3178 dotdot_update_t
*dir
;
3181 * these entries parents were updated, rebuild them again
3182 * set dotdot_update flag so processing routines do not count links
3185 while (!list_empty(&dotdot_update_list
)) {
3186 dir
= list_entry(dotdot_update_list
.prev
, struct dotdot_update
,
3188 list_del(&dir
->list
);
3189 process_dir_inode(mp
, dir
->agno
, dir
->irec
, dir
->ino_offset
);
3196 struct xfs_mount
*mp
)
3198 do_inode_prefetch(mp
, ag_stride
, traverse_function
, false, true);
3202 phase6(xfs_mount_t
*mp
)
3204 ino_tree_node_t
*irec
;
3207 memset(&zerocr
, 0, sizeof(struct cred
));
3208 memset(&zerofsx
, 0, sizeof(struct fsxattr
));
3211 do_log(_("Phase 6 - check inode connectivity...\n"));
3213 incore_ext_teardown(mp
);
3215 add_ino_ex_data(mp
);
3218 * verify existence of root directory - if we have to
3219 * make one, it's ok for the incore data structs not to
3220 * know about it since everything about it (and the other
3221 * inodes in its chunk if a new chunk was created) are ok
3223 if (need_root_inode
) {
3225 do_warn(_("reinitializing root directory\n"));
3227 need_root_inode
= 0;
3228 need_root_dotdot
= 0;
3230 do_warn(_("would reinitialize root directory\n"));
3236 do_warn(_("reinitializing realtime bitmap inode\n"));
3240 do_warn(_("would reinitialize realtime bitmap inode\n"));
3246 do_warn(_("reinitializing realtime summary inode\n"));
3250 do_warn(_("would reinitialize realtime summary inode\n"));
3256 _(" - resetting contents of realtime bitmap and summary inodes\n"));
3257 if (fill_rbmino(mp
)) {
3259 _("Warning: realtime bitmap may be inconsistent\n"));
3262 if (fill_rsumino(mp
)) {
3264 _("Warning: realtime bitmap may be inconsistent\n"));
3268 mark_standalone_inodes(mp
);
3270 do_log(_(" - traversing filesystem ...\n"));
3272 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
, mp
->m_sb
.sb_rootino
),
3273 XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_rootino
));
3276 * we always have a root inode, even if it's free...
3277 * if the root is free, forget it, lost+found is already gone
3279 if (is_inode_free(irec
, 0) || !inode_isadir(irec
, 0)) {
3280 need_root_inode
= 1;
3284 * then process all inodes by walking incore inode tree
3289 * any directories that had updated ".." entries, rebuild them now
3291 update_missing_dotdot_entries(mp
);
3293 do_log(_(" - traversal finished ...\n"));
3294 do_log(_(" - moving disconnected inodes to %s ...\n"),
3298 * move all disconnected inodes to the orphanage
3300 for (i
= 0; i
< glob_agcount
; i
++) {
3301 irec
= findfirst_inode_rec(i
);
3302 while (irec
!= NULL
) {
3303 check_for_orphaned_inodes(mp
, i
, irec
);
3304 irec
= next_ino_rec(irec
);