1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
17 #include "err_protos.h"
22 static struct cred zerocr
;
23 static struct fsxattr zerofsx
;
24 static xfs_ino_t orphanage_ino
;
26 static struct xfs_name xfs_name_dot
= {(unsigned char *)".",
31 * Data structures used to keep track of directories where the ".."
32 * entries are updated. These must be rebuilt after the initial pass
34 typedef struct dotdot_update
{
35 struct list_head list
;
36 ino_tree_node_t
*irec
;
41 static LIST_HEAD(dotdot_update_list
);
42 static int dotdot_update
;
47 ino_tree_node_t
*irec
,
50 dotdot_update_t
*dir
= malloc(sizeof(dotdot_update_t
));
53 do_error(_("malloc failed add_dotdot_update (%zu bytes)\n"),
54 sizeof(dotdot_update_t
));
56 INIT_LIST_HEAD(&dir
->list
);
59 dir
->ino_offset
= ino_offset
;
61 list_add(&dir
->list
, &dotdot_update_list
);
65 * Data structures and routines to keep track of directory entries
66 * and whether their leaf entry has been seen. Also used for name
67 * duplicate checking and rebuilding step if required.
70 struct dir_hash_ent
*nextbyhash
; /* next in name bucket */
71 struct dir_hash_ent
*nextbyorder
; /* next in order added */
72 xfs_dahash_t hashval
; /* hash value of name */
73 uint32_t address
; /* offset of data entry */
74 xfs_ino_t inum
; /* inode num of entry */
75 short junkit
; /* name starts with / */
76 short seen
; /* have seen leaf entry */
78 unsigned char namebuf
[];
82 int size
; /* size of hash tables */
83 struct dir_hash_ent
*first
; /* ptr to first added entry */
84 struct dir_hash_ent
*last
; /* ptr to last added entry */
85 struct dir_hash_ent
**byhash
; /* ptr to name hash buckets */
87 struct radix_tree_root byaddr
;
90 #define DIR_HASH_TAB_SIZE(n) \
91 (sizeof(struct dir_hash_tab) + (sizeof(struct dir_hash_ent *) * (n)))
92 #define DIR_HASH_FUNC(t,a) ((a) % (t)->size)
95 * Track the contents of the freespace table in a directory.
97 typedef struct freetab
{
98 int naents
; /* expected number of data blocks */
99 int nents
; /* number of data blocks processed */
101 xfs_dir2_data_off_t v
;
105 #define FREETAB_SIZE(n) \
106 (offsetof(freetab_t, ents) + (sizeof(struct freetab_ent) * (n)))
108 #define DIR_HASH_CK_OK 0
109 #define DIR_HASH_CK_DUPLEAF 1
110 #define DIR_HASH_CK_BADHASH 2
111 #define DIR_HASH_CK_NODATA 3
112 #define DIR_HASH_CK_NOLEAF 4
113 #define DIR_HASH_CK_BADSTALE 5
114 #define DIR_HASH_CK_TOTAL 6
117 * Need to handle CRC and validation errors specially here. If there is a
118 * validator error, re-read without the verifier so that we get a buffer we can
119 * check and repair. Re-attach the ops to the buffer after the read so that when
120 * it is rewritten the CRC is recalculated.
122 * If the buffer was not read, we return an error. If the buffer was read but
123 * had a CRC or corruption error, we reread it without the verifier and if it is
124 * read successfully we increment *crc_error and return 0. Otherwise we
125 * return the read error.
129 struct xfs_inode
*ip
,
131 struct xfs_buf
**bpp
,
132 const struct xfs_buf_ops
*ops
,
138 error
= -libxfs_da_read_buf(NULL
, ip
, bno
, 0, bpp
, XFS_DATA_FORK
, ops
);
140 if (error
!= EFSBADCRC
&& error
!= EFSCORRUPTED
)
143 error2
= -libxfs_da_read_buf(NULL
, ip
, bno
, 0, bpp
, XFS_DATA_FORK
,
154 * Returns 0 if the name already exists (ie. a duplicate)
158 struct xfs_mount
*mp
,
159 struct dir_hash_tab
*hashtab
,
166 xfs_dahash_t hash
= 0;
168 struct dir_hash_ent
*p
;
171 struct xfs_name xname
;
178 junk
= name
[0] == '/';
182 hash
= libxfs_dir2_hashname(mp
, &xname
);
183 byhash
= DIR_HASH_FUNC(hashtab
, hash
);
186 * search hash bucket for existing name.
188 for (p
= hashtab
->byhash
[byhash
]; p
; p
= p
->nextbyhash
) {
189 if (p
->hashval
== hash
&& p
->name
.len
== namelen
) {
190 if (memcmp(p
->name
.name
, name
, namelen
) == 0) {
200 * Allocate enough space for the hash entry and the name in a single
201 * allocation so we can store our own copy of the name for later use.
203 p
= calloc(1, sizeof(*p
) + namelen
+ 1);
205 do_error(_("malloc failed in dir_hash_add (%zu bytes)\n"),
208 error
= radix_tree_insert(&hashtab
->byaddr
, addr
, p
);
209 if (error
== EEXIST
) {
210 do_warn(_("duplicate addrs %u in directory!\n"), addr
);
214 radix_tree_tag_set(&hashtab
->byaddr
, addr
, HT_UNSEEN
);
217 hashtab
->last
->nextbyorder
= p
;
220 p
->nextbyorder
= NULL
;
223 if (!(p
->junkit
= junk
)) {
225 p
->nextbyhash
= hashtab
->byhash
[byhash
];
226 hashtab
->byhash
[byhash
] = p
;
232 /* Set up the name in the region trailing the hash entry. */
233 memcpy(p
->namebuf
, name
, namelen
);
234 p
->name
.name
= p
->namebuf
;
235 p
->name
.len
= namelen
;
236 p
->name
.type
= ftype
;
240 /* Mark an existing directory hashtable entry as junk. */
243 struct dir_hash_tab
*hashtab
,
244 xfs_dir2_dataptr_t addr
)
246 struct dir_hash_ent
*p
;
248 p
= radix_tree_lookup(&hashtab
->byaddr
, addr
);
257 struct dir_hash_tab
*hashtab
,
258 struct xfs_inode
*ip
,
261 static char *seevalstr
[DIR_HASH_CK_TOTAL
];
265 seevalstr
[DIR_HASH_CK_OK
] = _("ok");
266 seevalstr
[DIR_HASH_CK_DUPLEAF
] = _("duplicate leaf");
267 seevalstr
[DIR_HASH_CK_BADHASH
] = _("hash value mismatch");
268 seevalstr
[DIR_HASH_CK_NODATA
] = _("no data entry");
269 seevalstr
[DIR_HASH_CK_NOLEAF
] = _("no leaf entry");
270 seevalstr
[DIR_HASH_CK_BADSTALE
] = _("bad stale count");
274 if (seeval
== DIR_HASH_CK_OK
&&
275 radix_tree_tagged(&hashtab
->byaddr
, HT_UNSEEN
))
276 seeval
= DIR_HASH_CK_NOLEAF
;
277 if (seeval
== DIR_HASH_CK_OK
)
279 do_warn(_("bad hash table for directory inode %" PRIu64
" (%s): "),
280 ip
->i_ino
, seevalstr
[seeval
]);
282 do_warn(_("rebuilding\n"));
284 do_warn(_("would rebuild\n"));
290 struct dir_hash_tab
*hashtab
)
293 struct dir_hash_ent
*n
;
294 struct dir_hash_ent
*p
;
296 for (i
= 0; i
< hashtab
->size
; i
++) {
297 for (p
= hashtab
->byhash
[i
]; p
; p
= n
) {
299 radix_tree_delete(&hashtab
->byaddr
, p
->address
);
307 * Create a directory hash index structure based on the size of the directory we
308 * are about to try to repair. The size passed in is the size of the data
309 * segment of the directory in bytes, so we don't really know exactly how many
310 * entries are in it. Hence assume an entry size of around 64 bytes - that's a
311 * name length of 40+ bytes so should cover a most situations with really large
314 static struct dir_hash_tab
*
318 struct dir_hash_tab
*hashtab
= NULL
;
326 * Try to allocate as large a hash table as possible. Failure to
327 * allocate isn't fatal, it will just result in slower performance as we
328 * reduce the size of the table.
330 while (hsize
>= 16) {
331 hashtab
= calloc(DIR_HASH_TAB_SIZE(hsize
), 1);
337 do_error(_("calloc failed in dir_hash_init\n"));
338 hashtab
->size
= hsize
;
339 hashtab
->byhash
= (struct dir_hash_ent
**)((char *)hashtab
+
340 sizeof(struct dir_hash_tab
));
341 INIT_RADIX_TREE(&hashtab
->byaddr
, 0);
347 struct dir_hash_tab
*hashtab
,
349 xfs_dir2_dataptr_t addr
)
351 struct dir_hash_ent
*p
;
353 p
= radix_tree_lookup(&hashtab
->byaddr
, addr
);
355 return DIR_HASH_CK_NODATA
;
356 if (!radix_tree_tag_get(&hashtab
->byaddr
, addr
, HT_UNSEEN
))
357 return DIR_HASH_CK_DUPLEAF
;
358 if (p
->junkit
== 0 && p
->hashval
!= hash
)
359 return DIR_HASH_CK_BADHASH
;
360 radix_tree_tag_clear(&hashtab
->byaddr
, addr
, HT_UNSEEN
);
361 return DIR_HASH_CK_OK
;
365 dir_hash_update_ftype(
366 struct dir_hash_tab
*hashtab
,
367 xfs_dir2_dataptr_t addr
,
370 struct dir_hash_ent
*p
;
372 p
= radix_tree_lookup(&hashtab
->byaddr
, addr
);
375 p
->name
.type
= ftype
;
379 * checks to make sure leafs match a data entry, and that the stale
384 struct dir_hash_tab
*hashtab
,
385 xfs_dir2_leaf_entry_t
*ents
,
393 for (i
= j
= 0; i
< count
; i
++) {
394 if (be32_to_cpu(ents
[i
].address
) == XFS_DIR2_NULL_DATAPTR
) {
398 rval
= dir_hash_see(hashtab
, be32_to_cpu(ents
[i
].hashval
),
399 be32_to_cpu(ents
[i
].address
));
400 if (rval
!= DIR_HASH_CK_OK
)
403 return j
== stale
? DIR_HASH_CK_OK
: DIR_HASH_CK_BADSTALE
;
407 * Given a block number in a fork, return the next valid block number (not a
408 * hole). If this is the last block number then NULLFILEOFF is returned.
412 struct xfs_inode
*ip
,
417 struct xfs_bmbt_irec got
;
418 struct xfs_iext_cursor icur
;
420 switch (ip
->i_df
.if_format
) {
421 case XFS_DINODE_FMT_LOCAL
:
424 case XFS_DINODE_FMT_BTREE
:
425 case XFS_DINODE_FMT_EXTENTS
:
431 /* Read extent map. */
432 error
= -libxfs_iread_extents(NULL
, ip
, XFS_DATA_FORK
);
437 if (!libxfs_iext_lookup_extent(ip
, &ip
->i_df
, bno
, &icur
, &got
))
440 *bnop
= got
.br_startoff
< bno
? bno
: got
.br_startoff
;
449 do_error(_("ran out of disk space!\n"));
451 do_error(_("xfs_trans_reserve returned %d\n"), err
);
455 reset_inode_fields(struct xfs_inode
*ip
)
461 ip
->i_cowextsize
= 0;
466 ip
->i_crtime
.tv_sec
= 0;
467 ip
->i_crtime
.tv_nsec
= 0;
471 mk_rbmino(xfs_mount_t
*mp
)
480 xfs_bmbt_irec_t map
[XFS_BMAP_MAX_NMAP
];
487 i
= -libxfs_trans_alloc_rollable(mp
, 10, &tp
);
491 error
= -libxfs_iget(mp
, tp
, mp
->m_sb
.sb_rbmino
, 0, &ip
);
494 _("couldn't iget realtime bitmap inode -- error - %d\n"),
498 reset_inode_fields(ip
);
500 VFS_I(ip
)->i_mode
= S_IFREG
;
501 ip
->i_df
.if_format
= XFS_DINODE_FMT_EXTENTS
;
503 ip
->i_afp
->if_format
= XFS_DINODE_FMT_EXTENTS
;
505 set_nlink(VFS_I(ip
), 1); /* account for sb ptr */
507 times
= XFS_ICHGTIME_CHG
| XFS_ICHGTIME_MOD
;
508 if (xfs_has_v3inodes(mp
)) {
509 VFS_I(ip
)->i_version
= 1;
511 times
|= XFS_ICHGTIME_CREATE
;
513 libxfs_trans_ichgtime(tp
, ip
, times
);
518 ip
->i_df
.if_bytes
= 0;
519 ip
->i_df
.if_u1
.if_root
= NULL
;
521 ip
->i_disk_size
= mp
->m_sb
.sb_rbmblocks
* mp
->m_sb
.sb_blocksize
;
526 libxfs_trans_ijoin(tp
, ip
, 0);
527 libxfs_trans_log_inode(tp
, ip
, XFS_ILOG_CORE
);
528 error
= -libxfs_trans_commit(tp
);
530 do_error(_("%s: commit failed, error %d\n"), __func__
, error
);
533 * then allocate blocks for file and fill with zeroes (stolen
536 blocks
= mp
->m_sb
.sb_rbmblocks
+
537 XFS_BM_MAXLEVELS(mp
, XFS_DATA_FORK
) - 1;
538 error
= -libxfs_trans_alloc_rollable(mp
, blocks
, &tp
);
542 libxfs_trans_ijoin(tp
, ip
, 0);
544 while (bno
< mp
->m_sb
.sb_rbmblocks
) {
545 nmap
= XFS_BMAP_MAX_NMAP
;
546 error
= -libxfs_bmapi_write(tp
, ip
, bno
,
547 (xfs_extlen_t
)(mp
->m_sb
.sb_rbmblocks
- bno
),
548 0, mp
->m_sb
.sb_rbmblocks
, map
, &nmap
);
551 _("couldn't allocate realtime bitmap, error = %d\n"),
554 for (i
= 0, ep
= map
; i
< nmap
; i
++, ep
++) {
555 libxfs_device_zero(mp
->m_ddev_targp
,
556 XFS_FSB_TO_DADDR(mp
, ep
->br_startblock
),
557 XFS_FSB_TO_BB(mp
, ep
->br_blockcount
));
558 bno
+= ep
->br_blockcount
;
561 error
= -libxfs_trans_commit(tp
);
564 _("allocation of the realtime bitmap failed, error = %d\n"),
571 fill_rbmino(xfs_mount_t
*mp
)
585 error
= -libxfs_trans_alloc_rollable(mp
, 10, &tp
);
589 error
= -libxfs_iget(mp
, tp
, mp
->m_sb
.sb_rbmino
, 0, &ip
);
592 _("couldn't iget realtime bitmap inode -- error - %d\n"),
596 while (bno
< mp
->m_sb
.sb_rbmblocks
) {
598 * fill the file one block at a time
601 error
= -libxfs_bmapi_write(tp
, ip
, bno
, 1, 0, 1, &map
, &nmap
);
602 if (error
|| nmap
!= 1) {
604 _("couldn't map realtime bitmap block %" PRIu64
", error = %d\n"),
608 ASSERT(map
.br_startblock
!= HOLESTARTBLOCK
);
610 error
= -libxfs_trans_read_buf(
612 XFS_FSB_TO_DADDR(mp
, map
.br_startblock
),
613 XFS_FSB_TO_BB(mp
, 1), 1, &bp
, NULL
);
617 _("can't access block %" PRIu64
" (fsbno %" PRIu64
") of realtime bitmap inode %" PRIu64
"\n"),
618 bno
, map
.br_startblock
, mp
->m_sb
.sb_rbmino
);
622 memmove(bp
->b_addr
, bmp
, mp
->m_sb
.sb_blocksize
);
624 libxfs_trans_log_buf(tp
, bp
, 0, mp
->m_sb
.sb_blocksize
- 1);
626 bmp
= (xfs_rtword_t
*)((intptr_t) bmp
+ mp
->m_sb
.sb_blocksize
);
630 libxfs_trans_ijoin(tp
, ip
, 0);
631 error
= -libxfs_trans_commit(tp
);
633 do_error(_("%s: commit failed, error %d\n"), __func__
, error
);
639 fill_rsumino(xfs_mount_t
*mp
)
648 xfs_fileoff_t end_bno
;
653 end_bno
= mp
->m_rsumsize
>> mp
->m_sb
.sb_blocklog
;
655 error
= -libxfs_trans_alloc_rollable(mp
, 10, &tp
);
659 error
= -libxfs_iget(mp
, tp
, mp
->m_sb
.sb_rsumino
, 0, &ip
);
662 _("couldn't iget realtime summary inode -- error - %d\n"),
666 while (bno
< end_bno
) {
668 * fill the file one block at a time
671 error
= -libxfs_bmapi_write(tp
, ip
, bno
, 1, 0, 1, &map
, &nmap
);
672 if (error
|| nmap
!= 1) {
674 _("couldn't map realtime summary inode block %" PRIu64
", error = %d\n"),
678 ASSERT(map
.br_startblock
!= HOLESTARTBLOCK
);
680 error
= -libxfs_trans_read_buf(
682 XFS_FSB_TO_DADDR(mp
, map
.br_startblock
),
683 XFS_FSB_TO_BB(mp
, 1), 1, &bp
, NULL
);
687 _("can't access block %" PRIu64
" (fsbno %" PRIu64
") of realtime summary inode %" PRIu64
"\n"),
688 bno
, map
.br_startblock
, mp
->m_sb
.sb_rsumino
);
693 memmove(bp
->b_addr
, smp
, mp
->m_sb
.sb_blocksize
);
695 libxfs_trans_log_buf(tp
, bp
, 0, mp
->m_sb
.sb_blocksize
- 1);
697 smp
= (xfs_suminfo_t
*)((intptr_t)smp
+ mp
->m_sb
.sb_blocksize
);
701 libxfs_trans_ijoin(tp
, ip
, 0);
702 error
= -libxfs_trans_commit(tp
);
704 do_error(_("%s: commit failed, error %d\n"), __func__
, error
);
710 mk_rsumino(xfs_mount_t
*mp
)
720 xfs_bmbt_irec_t map
[XFS_BMAP_MAX_NMAP
];
727 i
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_ichange
, 10, 0, 0, &tp
);
731 error
= -libxfs_iget(mp
, tp
, mp
->m_sb
.sb_rsumino
, 0, &ip
);
734 _("couldn't iget realtime summary inode -- error - %d\n"),
738 reset_inode_fields(ip
);
740 VFS_I(ip
)->i_mode
= S_IFREG
;
741 ip
->i_df
.if_format
= XFS_DINODE_FMT_EXTENTS
;
743 ip
->i_afp
->if_format
= XFS_DINODE_FMT_EXTENTS
;
745 set_nlink(VFS_I(ip
), 1); /* account for sb ptr */
747 times
= XFS_ICHGTIME_CHG
| XFS_ICHGTIME_MOD
;
748 if (xfs_has_v3inodes(mp
)) {
749 VFS_I(ip
)->i_version
= 1;
751 times
|= XFS_ICHGTIME_CREATE
;
753 libxfs_trans_ichgtime(tp
, ip
, times
);
758 ip
->i_df
.if_bytes
= 0;
759 ip
->i_df
.if_u1
.if_root
= NULL
;
761 ip
->i_disk_size
= mp
->m_rsumsize
;
766 libxfs_trans_ijoin(tp
, ip
, 0);
767 libxfs_trans_log_inode(tp
, ip
, XFS_ILOG_CORE
);
768 error
= -libxfs_trans_commit(tp
);
770 do_error(_("%s: commit failed, error %d\n"), __func__
, error
);
773 * then allocate blocks for file and fill with zeroes (stolen
776 nsumblocks
= mp
->m_rsumsize
>> mp
->m_sb
.sb_blocklog
;
777 blocks
= nsumblocks
+ XFS_BM_MAXLEVELS(mp
, XFS_DATA_FORK
) - 1;
778 error
= -libxfs_trans_alloc_rollable(mp
, blocks
, &tp
);
782 libxfs_trans_ijoin(tp
, ip
, 0);
784 while (bno
< nsumblocks
) {
785 nmap
= XFS_BMAP_MAX_NMAP
;
786 error
= -libxfs_bmapi_write(tp
, ip
, bno
,
787 (xfs_extlen_t
)(nsumblocks
- bno
),
788 0, nsumblocks
, map
, &nmap
);
791 _("couldn't allocate realtime summary inode, error = %d\n"),
794 for (i
= 0, ep
= map
; i
< nmap
; i
++, ep
++) {
795 libxfs_device_zero(mp
->m_ddev_targp
,
796 XFS_FSB_TO_DADDR(mp
, ep
->br_startblock
),
797 XFS_FSB_TO_BB(mp
, ep
->br_blockcount
));
798 bno
+= ep
->br_blockcount
;
801 error
= -libxfs_trans_commit(tp
);
804 _("allocation of the realtime summary ino failed, error = %d\n"),
811 * makes a new root directory.
814 mk_root_dir(xfs_mount_t
*mp
)
820 const mode_t mode
= 0755;
821 ino_tree_node_t
*irec
;
825 i
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_ichange
, 10, 0, 0, &tp
);
829 error
= -libxfs_iget(mp
, tp
, mp
->m_sb
.sb_rootino
, 0, &ip
);
831 do_error(_("could not iget root inode -- error - %d\n"), error
);
835 * take care of the core -- initialization from xfs_ialloc()
837 reset_inode_fields(ip
);
839 VFS_I(ip
)->i_mode
= mode
|S_IFDIR
;
840 ip
->i_df
.if_format
= XFS_DINODE_FMT_EXTENTS
;
842 ip
->i_afp
->if_format
= XFS_DINODE_FMT_EXTENTS
;
844 set_nlink(VFS_I(ip
), 2); /* account for . and .. */
846 times
= XFS_ICHGTIME_CHG
| XFS_ICHGTIME_MOD
;
847 if (xfs_has_v3inodes(mp
)) {
848 VFS_I(ip
)->i_version
= 1;
850 times
|= XFS_ICHGTIME_CREATE
;
852 libxfs_trans_ichgtime(tp
, ip
, times
);
853 libxfs_trans_ijoin(tp
, ip
, 0);
854 libxfs_trans_log_inode(tp
, ip
, XFS_ILOG_CORE
);
859 ip
->i_df
.if_bytes
= 0;
860 ip
->i_df
.if_u1
.if_root
= NULL
;
863 * initialize the directory
865 libxfs_dir_init(tp
, ip
, ip
);
867 error
= -libxfs_trans_commit(tp
);
869 do_error(_("%s: commit failed, error %d\n"), __func__
, error
);
873 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
, mp
->m_sb
.sb_rootino
),
874 XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_rootino
));
875 set_inode_isadir(irec
, XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_rootino
) -
880 * orphanage name == lost+found
883 mk_orphanage(xfs_mount_t
*mp
)
889 ino_tree_node_t
*irec
;
893 const int mode
= 0755;
895 struct xfs_name xname
;
898 * check for an existing lost+found first, if it exists, return
899 * its inode. Otherwise, we can create it. Bad lost+found inodes
900 * would have been cleared in phase3 and phase4.
903 i
= -libxfs_iget(mp
, NULL
, mp
->m_sb
.sb_rootino
, 0, &pip
);
905 do_error(_("%d - couldn't iget root inode to obtain %s\n"),
908 xname
.name
= (unsigned char *)ORPHANAGE
;
909 xname
.len
= strlen(ORPHANAGE
);
910 xname
.type
= XFS_DIR3_FT_DIR
;
912 if (libxfs_dir_lookup(NULL
, pip
, &xname
, &ino
, NULL
) == 0)
916 * could not be found, create it
918 nres
= XFS_MKDIR_SPACE_RES(mp
, xname
.len
);
919 i
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_mkdir
, nres
, 0, 0, &tp
);
924 * use iget/ijoin instead of trans_iget because the ialloc
925 * wrapper can commit the transaction and start a new one
927 /* i = -libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip);
929 do_error(_("%d - couldn't iget root inode to make %s\n"),
932 error
= -libxfs_dir_ialloc(&tp
, pip
, mode
|S_IFDIR
,
933 1, 0, &zerocr
, &zerofsx
, &ip
);
935 do_error(_("%s inode allocation failed %d\n"),
938 inc_nlink(VFS_I(ip
)); /* account for . */
941 irec
= find_inode_rec(mp
,
942 XFS_INO_TO_AGNO(mp
, ino
),
943 XFS_INO_TO_AGINO(mp
, ino
));
947 * This inode is allocated from a newly created inode
948 * chunk and therefore did not exist when inode chunks
949 * were processed in phase3. Add this group of inodes to
950 * the entry avl tree as if they were discovered in phase3.
952 irec
= set_inode_free_alloc(mp
, XFS_INO_TO_AGNO(mp
, ino
),
953 XFS_INO_TO_AGINO(mp
, ino
));
956 for (i
= 0; i
< XFS_INODES_PER_CHUNK
; i
++)
957 set_inode_free(irec
, i
);
960 ino_offset
= get_inode_offset(mp
, ino
, irec
);
963 * Mark the inode allocated to lost+found as used in the AVL tree
964 * so it is not skipped in phase 7
966 set_inode_used(irec
, ino_offset
);
967 add_inode_ref(irec
, ino_offset
);
968 add_inode_reached(irec
, ino_offset
);
971 * now that we know the transaction will stay around,
972 * add the root inode to it
974 libxfs_trans_ijoin(tp
, pip
, 0);
977 * create the actual entry
979 error
= -libxfs_dir_createname(tp
, pip
, &xname
, ip
->i_ino
, nres
);
982 _("can't make %s, createname error %d\n"),
986 * bump up the link count in the root directory to account
987 * for .. in the new directory, and update the irec copy of the
988 * on-disk nlink so we don't fail the link count check later.
990 inc_nlink(VFS_I(pip
));
991 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
, mp
->m_sb
.sb_rootino
),
992 XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_rootino
));
993 add_inode_ref(irec
, 0);
994 set_inode_disk_nlinks(irec
, 0, get_inode_disk_nlinks(irec
, 0) + 1);
996 libxfs_trans_log_inode(tp
, pip
, XFS_ILOG_CORE
);
997 libxfs_dir_init(tp
, ip
, pip
);
998 libxfs_trans_log_inode(tp
, ip
, XFS_ILOG_CORE
);
999 error
= -libxfs_trans_commit(tp
);
1001 do_error(_("%s directory creation failed -- bmapf error %d\n"),
1011 * move a file to the orphange.
1016 xfs_ino_t ino
, /* inode # to be moved */
1017 int isa_dir
) /* 1 if inode is a directory */
1019 xfs_inode_t
*orphanage_ip
;
1020 xfs_ino_t entry_ino_num
;
1024 unsigned char fname
[MAXPATHLEN
+ 1];
1027 ino_tree_node_t
*irec
;
1029 struct xfs_name xname
;
1032 xname
.len
= snprintf((char *)fname
, sizeof(fname
), "%llu",
1033 (unsigned long long)ino
);
1035 err
= -libxfs_iget(mp
, NULL
, orphanage_ino
, 0, &orphanage_ip
);
1037 do_error(_("%d - couldn't iget orphanage inode\n"), err
);
1039 * Make sure the filename is unique in the lost+found
1042 while (libxfs_dir_lookup(NULL
, orphanage_ip
, &xname
, &entry_ino_num
,
1044 xname
.len
= snprintf((char *)fname
, sizeof(fname
), "%llu.%d",
1045 (unsigned long long)ino
, ++incr
);
1047 /* Orphans may not have a proper parent, so use custom ops here */
1048 err
= -libxfs_iget(mp
, NULL
, ino
, 0, &ino_p
);
1050 do_error(_("%d - couldn't iget disconnected inode\n"), err
);
1052 xname
.type
= libxfs_mode_to_ftype(VFS_I(ino_p
)->i_mode
);
1055 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
, orphanage_ino
),
1056 XFS_INO_TO_AGINO(mp
, orphanage_ino
));
1058 ino_offset
= XFS_INO_TO_AGINO(mp
, orphanage_ino
) -
1060 nres
= XFS_DIRENTER_SPACE_RES(mp
, fnamelen
) +
1061 XFS_DIRENTER_SPACE_RES(mp
, 2);
1062 err
= -libxfs_dir_lookup(NULL
, ino_p
, &xfs_name_dotdot
,
1063 &entry_ino_num
, NULL
);
1065 ASSERT(err
== ENOENT
);
1067 err
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_rename
,
1071 _("space reservation failed (%d), filesystem may be out of space\n"),
1074 libxfs_trans_ijoin(tp
, orphanage_ip
, 0);
1075 libxfs_trans_ijoin(tp
, ino_p
, 0);
1077 err
= -libxfs_dir_createname(tp
, orphanage_ip
, &xname
,
1081 _("name create failed in %s (%d), filesystem may be out of space\n"),
1085 add_inode_ref(irec
, ino_offset
);
1087 inc_nlink(VFS_I(orphanage_ip
));
1088 libxfs_trans_log_inode(tp
, orphanage_ip
, XFS_ILOG_CORE
);
1090 err
= -libxfs_dir_createname(tp
, ino_p
, &xfs_name_dotdot
,
1091 orphanage_ino
, nres
);
1094 _("creation of .. entry failed (%d), filesystem may be out of space\n"),
1097 inc_nlink(VFS_I(ino_p
));
1098 libxfs_trans_log_inode(tp
, ino_p
, XFS_ILOG_CORE
);
1099 err
= -libxfs_trans_commit(tp
);
1102 _("creation of .. entry failed (%d)\n"), err
);
1104 err
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_rename
,
1108 _("space reservation failed (%d), filesystem may be out of space\n"),
1111 libxfs_trans_ijoin(tp
, orphanage_ip
, 0);
1112 libxfs_trans_ijoin(tp
, ino_p
, 0);
1115 err
= -libxfs_dir_createname(tp
, orphanage_ip
, &xname
,
1119 _("name create failed in %s (%d), filesystem may be out of space\n"),
1123 add_inode_ref(irec
, ino_offset
);
1125 inc_nlink(VFS_I(orphanage_ip
));
1126 libxfs_trans_log_inode(tp
, orphanage_ip
, XFS_ILOG_CORE
);
1129 * don't replace .. value if it already points
1130 * to us. that'll pop a libxfs/kernel ASSERT.
1132 if (entry_ino_num
!= orphanage_ino
) {
1133 err
= -libxfs_dir_replace(tp
, ino_p
,
1134 &xfs_name_dotdot
, orphanage_ino
,
1138 _("name replace op failed (%d), filesystem may be out of space\n"),
1142 err
= -libxfs_trans_commit(tp
);
1145 _("orphanage name replace op failed (%d)\n"), err
);
1150 * use the remove log reservation as that's
1151 * more accurate. we're only creating the
1152 * links, we're not doing the inode allocation
1153 * also accounted for in the create
1155 nres
= XFS_DIRENTER_SPACE_RES(mp
, xname
.len
);
1156 err
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_remove
,
1160 _("space reservation failed (%d), filesystem may be out of space\n"),
1163 libxfs_trans_ijoin(tp
, orphanage_ip
, 0);
1164 libxfs_trans_ijoin(tp
, ino_p
, 0);
1166 err
= -libxfs_dir_createname(tp
, orphanage_ip
, &xname
, ino
,
1170 _("name create failed in %s (%d), filesystem may be out of space\n"),
1174 set_nlink(VFS_I(ino_p
), 1);
1175 libxfs_trans_log_inode(tp
, ino_p
, XFS_ILOG_CORE
);
1176 err
= -libxfs_trans_commit(tp
);
1179 _("orphanage name create failed (%d)\n"), err
);
1181 libxfs_irele(ino_p
);
1182 libxfs_irele(orphanage_ip
);
1192 do_warn(msg
, iname
, ino1
, ino2
);
1195 do_warn(_(", marking entry to be junked\n"));
1199 do_warn(_(", would junk entry\n"));
1203 /* Find and invalidate all the directory's buffers. */
1206 struct xfs_trans
*tp
,
1207 struct xfs_inode
*ip
,
1210 struct xfs_iext_cursor icur
;
1211 struct xfs_bmbt_irec rec
;
1212 struct xfs_ifork
*ifp
;
1213 struct xfs_da_geometry
*geo
;
1218 if (ip
->i_df
.if_format
!= XFS_DINODE_FMT_EXTENTS
&&
1219 ip
->i_df
.if_format
!= XFS_DINODE_FMT_BTREE
)
1222 geo
= tp
->t_mountp
->m_dir_geo
;
1223 ifp
= XFS_IFORK_PTR(ip
, XFS_DATA_FORK
);
1224 for_each_xfs_iext(ifp
, &icur
, &rec
) {
1225 for (dabno
= roundup(rec
.br_startoff
, geo
->fsbcount
);
1226 dabno
< rec
.br_startoff
+ rec
.br_blockcount
;
1227 dabno
+= geo
->fsbcount
) {
1229 error
= -libxfs_da_get_buf(tp
, ip
, dabno
, &bp
,
1235 libxfs_trans_binval(tp
, bp
);
1236 libxfs_trans_brelse(tp
, bp
);
1244 * Unexpected failure during the rebuild will leave the entries in
1245 * lost+found on the next run
1249 longform_dir2_rebuild(
1250 struct xfs_mount
*mp
,
1252 struct xfs_inode
*ip
,
1253 struct ino_tree_node
*irec
,
1255 struct dir_hash_tab
*hashtab
)
1259 struct xfs_trans
*tp
;
1260 xfs_fileoff_t lastblock
;
1261 struct xfs_inode pip
;
1262 struct dir_hash_ent
*p
;
1266 * trash directory completely and rebuild from scratch using the
1267 * name/inode pairs in the hash table
1270 do_warn(_("rebuilding directory inode %" PRIu64
"\n"), ino
);
1273 * first attempt to locate the parent inode, if it can't be
1274 * found, set it to the root inode and it'll be moved to the
1275 * orphanage later (the inode number here needs to be valid
1276 * for the libxfs_dir_init() call).
1278 pip
.i_ino
= get_inode_parent(irec
, ino_offset
);
1279 if (pip
.i_ino
== NULLFSINO
||
1280 libxfs_dir_ino_validate(mp
, pip
.i_ino
))
1281 pip
.i_ino
= mp
->m_sb
.sb_rootino
;
1283 nres
= XFS_REMOVE_SPACE_RES(mp
);
1284 error
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_remove
, nres
, 0, 0, &tp
);
1287 libxfs_trans_ijoin(tp
, ip
, 0);
1289 error
= dir_binval(tp
, ip
, XFS_DATA_FORK
);
1291 do_error(_("error %d invalidating directory %llu blocks\n"),
1292 error
, (unsigned long long)ip
->i_ino
);
1294 if ((error
= -libxfs_bmap_last_offset(ip
, &lastblock
, XFS_DATA_FORK
)))
1295 do_error(_("xfs_bmap_last_offset failed -- error - %d\n"),
1298 /* free all data, leaf, node and freespace blocks */
1300 error
= -libxfs_bunmapi(tp
, ip
, 0, lastblock
, XFS_BMAPI_METADATA
,
1303 do_warn(_("xfs_bunmapi failed -- error - %d\n"), error
);
1304 goto out_bmap_cancel
;
1306 error
= -libxfs_defer_finish(&tp
);
1308 do_warn(("defer_finish failed -- error - %d\n"), error
);
1309 goto out_bmap_cancel
;
1312 * Close out trans and start the next one in the chain.
1314 error
= -libxfs_trans_roll_inode(&tp
, ip
);
1316 goto out_bmap_cancel
;
1319 error
= -libxfs_dir_init(tp
, ip
, &pip
);
1321 do_warn(_("xfs_dir_init failed -- error - %d\n"), error
);
1322 goto out_bmap_cancel
;
1325 error
= -libxfs_trans_commit(tp
);
1328 _("dir init failed (%d)\n"), error
);
1330 if (ino
== mp
->m_sb
.sb_rootino
)
1331 need_root_dotdot
= 0;
1333 /* go through the hash list and re-add the inodes */
1335 for (p
= hashtab
->first
; p
; p
= p
->nextbyorder
) {
1337 if (p
->name
.name
[0] == '/' || (p
->name
.name
[0] == '.' &&
1338 (p
->name
.len
== 1 || (p
->name
.len
== 2 &&
1339 p
->name
.name
[1] == '.'))))
1342 nres
= XFS_CREATE_SPACE_RES(mp
, p
->name
.len
);
1343 error
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_create
,
1348 libxfs_trans_ijoin(tp
, ip
, 0);
1350 error
= -libxfs_dir_createname(tp
, ip
, &p
->name
, p
->inum
,
1354 _("name create failed in ino %" PRIu64
" (%d), filesystem may be out of space\n"),
1356 goto out_bmap_cancel
;
1359 error
= -libxfs_trans_commit(tp
);
1362 _("name create failed (%d) during rebuild\n"), error
);
1368 libxfs_trans_cancel(tp
);
1374 * Kill a block in a version 2 inode.
1375 * Makes its own transaction.
1389 nres
= XFS_REMOVE_SPACE_RES(mp
);
1390 error
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_remove
, nres
, 0, 0, &tp
);
1393 libxfs_trans_ijoin(tp
, ip
, 0);
1394 libxfs_trans_bjoin(tp
, bp
);
1395 libxfs_trans_bhold(tp
, bp
);
1396 memset(&args
, 0, sizeof(args
));
1399 args
.whichfork
= XFS_DATA_FORK
;
1400 args
.geo
= mp
->m_dir_geo
;
1401 if (da_bno
>= mp
->m_dir_geo
->leafblk
&& da_bno
< mp
->m_dir_geo
->freeblk
)
1402 error
= -libxfs_da_shrink_inode(&args
, da_bno
, bp
);
1404 error
= -libxfs_dir2_shrink_inode(&args
,
1405 xfs_dir2_da_to_db(mp
->m_dir_geo
, da_bno
), bp
);
1407 do_error(_("shrink_inode failed inode %" PRIu64
" block %u\n"),
1409 error
= -libxfs_trans_commit(tp
);
1412 _("directory shrink failed (%d)\n"), error
);
1416 * process a data block, also checks for .. entry
1417 * and corrects it to match what we think .. should be
1420 longform_dir2_entry_check_data(
1421 struct xfs_mount
*mp
,
1422 struct xfs_inode
*ip
,
1425 struct ino_tree_node
*current_irec
,
1426 int current_ino_offset
,
1428 struct dir_hash_tab
*hashtab
,
1429 freetab_t
**freetabp
,
1433 xfs_dir2_dataptr_t addr
;
1434 xfs_dir2_leaf_entry_t
*blp
;
1435 xfs_dir2_block_tail_t
*btp
;
1436 struct xfs_dir2_data_hdr
*d
;
1438 xfs_dir2_data_entry_t
*dep
;
1439 xfs_dir2_data_unused_t
*dup
;
1440 struct xfs_dir2_data_free
*bf
;
1443 char fname
[MAXNAMELEN
+ 1];
1448 ino_tree_node_t
*irec
;
1459 struct xfs_da_args da
= {
1461 .geo
= mp
->m_dir_geo
,
1466 ptr
= (char *)d
+ mp
->m_dir_geo
->data_entry_offset
;
1468 needscan
= needlog
= 0;
1470 freetab
= *freetabp
;
1472 btp
= xfs_dir2_block_tail_p(mp
->m_dir_geo
, d
);
1473 blp
= xfs_dir2_block_leaf_p(btp
);
1474 endptr
= (char *)blp
;
1475 if (endptr
> (char *)btp
)
1476 endptr
= (char *)btp
;
1477 if (xfs_has_crc(mp
))
1478 wantmagic
= XFS_DIR3_BLOCK_MAGIC
;
1480 wantmagic
= XFS_DIR2_BLOCK_MAGIC
;
1482 endptr
= (char *)d
+ mp
->m_dir_geo
->blksize
;
1483 if (xfs_has_crc(mp
))
1484 wantmagic
= XFS_DIR3_DATA_MAGIC
;
1486 wantmagic
= XFS_DIR2_DATA_MAGIC
;
1488 db
= xfs_dir2_da_to_db(mp
->m_dir_geo
, da_bno
);
1490 /* check for data block beyond expected end */
1491 if (freetab
->naents
<= db
) {
1492 struct freetab_ent e
;
1494 *freetabp
= freetab
= realloc(freetab
, FREETAB_SIZE(db
+ 1));
1496 do_error(_("realloc failed in %s (%zu bytes)\n"),
1497 __func__
, FREETAB_SIZE(db
+ 1));
1501 for (i
= freetab
->naents
; i
< db
; i
++)
1502 freetab
->ents
[i
] = e
;
1503 freetab
->naents
= db
+ 1;
1506 /* check the data block */
1507 while (ptr
< endptr
) {
1509 /* check for freespace */
1510 dup
= (xfs_dir2_data_unused_t
*)ptr
;
1511 if (XFS_DIR2_DATA_FREE_TAG
== be16_to_cpu(dup
->freetag
)) {
1513 /* check for invalid freespace length */
1514 if (ptr
+ be16_to_cpu(dup
->length
) > endptr
||
1515 be16_to_cpu(dup
->length
) == 0 ||
1516 (be16_to_cpu(dup
->length
) &
1517 (XFS_DIR2_DATA_ALIGN
- 1)))
1520 /* check for invalid tag */
1521 if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup
)) !=
1522 (char *)dup
- (char *)d
)
1525 /* check for block with no data entries */
1526 if ((ptr
== (char *)d
+ mp
->m_dir_geo
->data_entry_offset
) &&
1527 (ptr
+ be16_to_cpu(dup
->length
) >= endptr
)) {
1533 /* continue at the end of the freespace */
1534 ptr
+= be16_to_cpu(dup
->length
);
1539 /* validate data entry size */
1540 dep
= (xfs_dir2_data_entry_t
*)ptr
;
1541 if (ptr
+ libxfs_dir2_data_entsize(mp
, dep
->namelen
) > endptr
)
1543 if (be16_to_cpu(*libxfs_dir2_data_entry_tag_p(mp
, dep
)) !=
1544 (char *)dep
- (char *)d
)
1546 ptr
+= libxfs_dir2_data_entsize(mp
, dep
->namelen
);
1549 /* did we find an empty or corrupt block? */
1550 if (ptr
!= endptr
) {
1553 _("empty data block %u in directory inode %" PRIu64
": "),
1557 ("corrupt block %u in directory inode %" PRIu64
": "),
1561 do_warn(_("junking block\n"));
1562 dir2_kill_block(mp
, ip
, da_bno
, bp
);
1564 do_warn(_("would junk block\n"));
1566 freetab
->ents
[db
].v
= NULLDATAOFF
;
1570 /* update number of data blocks processed */
1571 if (freetab
->nents
< db
+ 1)
1572 freetab
->nents
= db
+ 1;
1574 error
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_remove
, 0, 0, 0, &tp
);
1578 libxfs_trans_ijoin(tp
, ip
, 0);
1579 libxfs_trans_bjoin(tp
, bp
);
1580 libxfs_trans_bhold(tp
, bp
);
1581 if (be32_to_cpu(d
->magic
) != wantmagic
) {
1583 _("bad directory block magic # %#x for directory inode %" PRIu64
" block %d: "),
1584 be32_to_cpu(d
->magic
), ip
->i_ino
, da_bno
);
1586 do_warn(_("fixing magic # to %#x\n"), wantmagic
);
1587 d
->magic
= cpu_to_be32(wantmagic
);
1590 do_warn(_("would fix magic # to %#x\n"), wantmagic
);
1593 ptr
= (char *)d
+ mp
->m_dir_geo
->data_entry_offset
;
1595 * look at each entry. reference inode pointed to by each
1596 * entry in the incore inode tree.
1597 * if not a directory, set reached flag, increment link count
1598 * if a directory and reached, mark entry as to be deleted.
1599 * if a directory, check to see if recorded parent
1600 * matches current inode #,
1601 * if so, then set reached flag, increment link count
1602 * of current and child dir inodes, push the child
1603 * directory inode onto the directory stack.
1604 * if current inode != parent, then mark entry to be deleted.
1606 while (ptr
< endptr
) {
1607 dup
= (xfs_dir2_data_unused_t
*)ptr
;
1608 if (be16_to_cpu(dup
->freetag
) == XFS_DIR2_DATA_FREE_TAG
) {
1611 _("directory inode %" PRIu64
" block %u has consecutive free entries: "),
1615 do_warn(_("joining together\n"));
1616 len
= be16_to_cpu(dup
->length
);
1617 libxfs_dir2_data_use_free(&da
, bp
, dup
,
1618 ptr
- (char *)d
, len
, &needlog
,
1620 libxfs_dir2_data_make_free(&da
, bp
,
1621 ptr
- (char *)d
, len
, &needlog
,
1624 do_warn(_("would join together\n"));
1626 ptr
+= be16_to_cpu(dup
->length
);
1630 addr
= xfs_dir2_db_off_to_dataptr(mp
->m_dir_geo
, db
,
1632 dep
= (xfs_dir2_data_entry_t
*)ptr
;
1633 ptr
+= libxfs_dir2_data_entsize(mp
, dep
->namelen
);
1634 inum
= be64_to_cpu(dep
->inumber
);
1637 * skip bogus entries (leading '/'). they'll be deleted
1638 * later. must still log it, else we leak references to
1641 if (dep
->name
[0] == '/') {
1644 libxfs_dir2_data_log_entry(&da
, bp
, dep
);
1648 memmove(fname
, dep
->name
, dep
->namelen
);
1649 fname
[dep
->namelen
] = '\0';
1650 ASSERT(inum
!= NULLFSINO
);
1652 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
, inum
),
1653 XFS_INO_TO_AGINO(mp
, inum
));
1657 _("entry \"%s\" in directory inode %" PRIu64
" points to non-existent inode %" PRIu64
""),
1658 fname
, ip
->i_ino
, inum
)) {
1660 libxfs_dir2_data_log_entry(&da
, bp
, dep
);
1664 ino_offset
= XFS_INO_TO_AGINO(mp
, inum
) - irec
->ino_startnum
;
1667 * if it's a free inode, blow out the entry.
1668 * by now, any inode that we think is free
1671 if (is_inode_free(irec
, ino_offset
)) {
1674 _("entry \"%s\" in directory inode %" PRIu64
" points to free inode %" PRIu64
),
1675 fname
, ip
->i_ino
, inum
)) {
1677 libxfs_dir2_data_log_entry(&da
, bp
, dep
);
1683 * check if this inode is lost+found dir in the root
1685 if (inum
== mp
->m_sb
.sb_rootino
&& strcmp(fname
, ORPHANAGE
) == 0) {
1687 * if it's not a directory, trash it
1689 if (!inode_isadir(irec
, ino_offset
)) {
1692 _("%s (ino %" PRIu64
") in root (%" PRIu64
") is not a directory"),
1693 ORPHANAGE
, inum
, ip
->i_ino
)) {
1695 libxfs_dir2_data_log_entry(&da
, bp
, dep
);
1700 * if this is a dup, it will be picked up below,
1701 * otherwise, mark it as the orphanage for later.
1704 orphanage_ino
= inum
;
1708 * check for duplicate names in directory.
1710 if (!dir_hash_add(mp
, hashtab
, addr
, inum
, dep
->namelen
,
1711 dep
->name
, libxfs_dir2_data_get_ftype(mp
, dep
))) {
1714 _("entry \"%s\" (ino %" PRIu64
") in dir %" PRIu64
" is a duplicate name"),
1715 fname
, inum
, ip
->i_ino
)) {
1717 libxfs_dir2_data_log_entry(&da
, bp
, dep
);
1719 if (inum
== orphanage_ino
)
1725 * if just scanning to rebuild a directory due to a ".."
1726 * update, just continue
1732 * skip the '..' entry since it's checked when the
1733 * directory is reached by something else. if it never
1734 * gets reached, it'll be moved to the orphanage and we'll
1735 * take care of it then. If it doesn't exist at all, the
1736 * directory needs to be rebuilt first before being added
1739 if (dep
->namelen
== 2 && dep
->name
[0] == '.' &&
1740 dep
->name
[1] == '.') {
1742 /* ".." should be in the first block */
1745 _("entry \"%s\" (ino %" PRIu64
") in dir %" PRIu64
" is not in the the first block"), fname
,
1747 dir_hash_junkit(hashtab
, addr
);
1749 libxfs_dir2_data_log_entry(&da
, bp
, dep
);
1754 ASSERT(no_modify
|| libxfs_verify_dir_ino(mp
, inum
));
1756 * special case the . entry. we know there's only one
1757 * '.' and only '.' points to itself because bogus entries
1758 * got trashed in phase 3 if there were > 1.
1759 * bump up link count for '.' but don't set reached
1760 * until we're actually reached by another directory
1761 * '..' is already accounted for or will be taken care
1762 * of when directory is moved to orphanage.
1764 if (ip
->i_ino
== inum
) {
1766 (dep
->name
[0] == '.' && dep
->namelen
== 1));
1767 add_inode_ref(current_irec
, current_ino_offset
);
1769 dep
!= (void *)d
+ mp
->m_dir_geo
->data_entry_offset
) {
1770 /* "." should be the first entry */
1773 _("entry \"%s\" in dir %" PRIu64
" is not the first entry"),
1774 fname
, inum
, ip
->i_ino
)) {
1775 dir_hash_junkit(hashtab
, addr
);
1777 libxfs_dir2_data_log_entry(&da
, bp
, dep
);
1784 * skip entries with bogus inumbers if we're in no modify mode
1786 if (no_modify
&& !libxfs_verify_dir_ino(mp
, inum
))
1789 /* validate ftype field if supported */
1790 if (xfs_has_ftype(mp
)) {
1794 dir_ftype
= libxfs_dir2_data_get_ftype(mp
, dep
);
1795 ino_ftype
= get_inode_ftype(irec
, ino_offset
);
1797 if (dir_ftype
!= ino_ftype
) {
1800 _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64
"/%" PRIu64
"\n"),
1801 dir_ftype
, ino_ftype
,
1805 _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64
"/%" PRIu64
"\n"),
1806 dir_ftype
, ino_ftype
,
1808 libxfs_dir2_data_put_ftype(mp
, dep
, ino_ftype
);
1809 libxfs_dir2_data_log_entry(&da
, bp
, dep
);
1810 dir_hash_update_ftype(hashtab
, addr
,
1817 * check easy case first, regular inode, just bump
1818 * the link count and continue
1820 if (!inode_isadir(irec
, ino_offset
)) {
1821 add_inode_reached(irec
, ino_offset
);
1824 parent
= get_inode_parent(irec
, ino_offset
);
1825 ASSERT(parent
!= 0);
1828 * bump up the link counts in parent and child
1829 * directory but if the link doesn't agree with
1830 * the .. in the child, blow out the entry.
1831 * if the directory has already been reached,
1832 * blow away the entry also.
1834 if (is_inode_reached(irec
, ino_offset
)) {
1837 _("entry \"%s\" in dir %" PRIu64
" points to an already connected directory inode %" PRIu64
"\n"),
1838 fname
, ip
->i_ino
, inum
);
1839 } else if (parent
== ip
->i_ino
) {
1840 add_inode_reached(irec
, ino_offset
);
1841 add_inode_ref(current_irec
, current_ino_offset
);
1842 } else if (parent
== NULLFSINO
) {
1843 /* ".." was missing, but this entry refers to it,
1844 so, set it as the parent and mark for rebuild */
1846 _("entry \"%s\" in dir ino %" PRIu64
" doesn't have a .. entry, will set it in ino %" PRIu64
".\n"),
1847 fname
, ip
->i_ino
, inum
);
1848 set_inode_parent(irec
, ino_offset
, ip
->i_ino
);
1849 add_inode_reached(irec
, ino_offset
);
1850 add_inode_ref(current_irec
, current_ino_offset
);
1851 add_dotdot_update(XFS_INO_TO_AGNO(mp
, inum
), irec
,
1856 _("entry \"%s\" in dir inode %" PRIu64
" inconsistent with .. value (%" PRIu64
") in ino %" PRIu64
"\n"),
1857 fname
, ip
->i_ino
, parent
, inum
);
1860 if (inum
== orphanage_ino
)
1864 dir_hash_junkit(hashtab
, addr
);
1866 libxfs_dir2_data_log_entry(&da
, bp
, dep
);
1869 _("\twill clear entry \"%s\"\n"),
1872 do_warn(_("\twould clear entry \"%s\"\n"),
1877 *num_illegal
+= nbad
;
1879 libxfs_dir2_data_freescan(mp
, d
, &i
);
1881 libxfs_dir2_data_log_header(&da
, bp
);
1882 error
= -libxfs_trans_commit(tp
);
1885 _("directory block fixing failed (%d)\n"), error
);
1887 /* record the largest free space in the freetab for later checking */
1888 bf
= libxfs_dir2_data_bestfree_p(mp
, d
);
1889 freetab
->ents
[db
].v
= be16_to_cpu(bf
[0].length
);
1890 freetab
->ents
[db
].s
= 0;
1893 /* check v5 metadata */
1895 __check_dir3_header(
1896 struct xfs_mount
*mp
,
1905 if (be64_to_cpu(owner
) != ino
) {
1907 _("expected owner inode %" PRIu64
", got %llu, directory block %" PRIu64
"\n"),
1908 ino
, (unsigned long long)be64_to_cpu(owner
), xfs_buf_daddr(bp
));
1911 /* verify block number */
1912 if (be64_to_cpu(blkno
) != xfs_buf_daddr(bp
)) {
1914 _("expected block %" PRIu64
", got %llu, directory inode %" PRIu64
"\n"),
1915 xfs_buf_daddr(bp
), (unsigned long long)be64_to_cpu(blkno
), ino
);
1919 if (platform_uuid_compare(uuid
, &mp
->m_sb
.sb_meta_uuid
) != 0) {
1921 _("wrong FS UUID, directory inode %" PRIu64
" block %" PRIu64
"\n"),
1922 ino
, xfs_buf_daddr(bp
));
1931 struct xfs_mount
*mp
,
1935 struct xfs_da3_blkinfo
*info
= bp
->b_addr
;
1937 return __check_dir3_header(mp
, bp
, ino
, info
->owner
, info
->blkno
,
1943 struct xfs_mount
*mp
,
1947 struct xfs_dir3_blk_hdr
*info
= bp
->b_addr
;
1949 return __check_dir3_header(mp
, bp
, ino
, info
->owner
, info
->blkno
,
1954 * Check contents of leaf-form block.
1957 longform_dir2_check_leaf(
1958 struct xfs_mount
*mp
,
1959 struct xfs_inode
*ip
,
1960 struct dir_hash_tab
*hashtab
,
1961 struct freetab
*freetab
)
1968 xfs_dir2_leaf_t
*leaf
;
1969 xfs_dir2_leaf_tail_t
*ltp
;
1971 struct xfs_dir2_leaf_entry
*ents
;
1972 struct xfs_dir3_icleaf_hdr leafhdr
;
1976 da_bno
= mp
->m_dir_geo
->leafblk
;
1977 error
= dir_read_buf(ip
, da_bno
, &bp
, &xfs_dir3_leaf1_buf_ops
, &fixit
);
1978 if (error
== EFSBADCRC
|| error
== EFSCORRUPTED
|| fixit
) {
1980 _("leaf block %u for directory inode %" PRIu64
" bad CRC\n"),
1985 _("can't read block %u for directory inode %" PRIu64
", error %d\n"),
1986 da_bno
, ip
->i_ino
, error
);
1991 libxfs_dir2_leaf_hdr_from_disk(mp
, &leafhdr
, leaf
);
1992 ents
= leafhdr
.ents
;
1993 ltp
= xfs_dir2_leaf_tail_p(mp
->m_dir_geo
, leaf
);
1994 bestsp
= xfs_dir2_leaf_bests_p(ltp
);
1995 if (!(leafhdr
.magic
== XFS_DIR2_LEAF1_MAGIC
||
1996 leafhdr
.magic
== XFS_DIR3_LEAF1_MAGIC
) ||
1997 leafhdr
.forw
|| leafhdr
.back
||
1998 leafhdr
.count
< leafhdr
.stale
||
1999 leafhdr
.count
> mp
->m_dir_geo
->leaf_max_ents
||
2000 (char *)&ents
[leafhdr
.count
] > (char *)bestsp
) {
2002 _("leaf block %u for directory inode %" PRIu64
" bad header\n"),
2004 libxfs_buf_relse(bp
);
2008 if (leafhdr
.magic
== XFS_DIR3_LEAF1_MAGIC
) {
2009 error
= check_da3_header(mp
, bp
, ip
->i_ino
);
2011 libxfs_buf_relse(bp
);
2016 seeval
= dir_hash_see_all(hashtab
, ents
, leafhdr
.count
, leafhdr
.stale
);
2017 if (dir_hash_check(hashtab
, ip
, seeval
)) {
2018 libxfs_buf_relse(bp
);
2021 badtail
= freetab
->nents
!= be32_to_cpu(ltp
->bestcount
);
2022 for (i
= 0; !badtail
&& i
< be32_to_cpu(ltp
->bestcount
); i
++) {
2023 freetab
->ents
[i
].s
= 1;
2024 badtail
= freetab
->ents
[i
].v
!= be16_to_cpu(bestsp
[i
]);
2028 _("leaf block %u for directory inode %" PRIu64
" bad tail\n"),
2030 libxfs_buf_relse(bp
);
2033 libxfs_buf_relse(bp
);
2038 * Check contents of the node blocks (leaves)
2039 * Looks for matching hash values for the data entries.
2042 longform_dir2_check_node(
2043 struct xfs_mount
*mp
,
2044 struct xfs_inode
*ip
,
2045 struct dir_hash_tab
*hashtab
,
2046 struct freetab
*freetab
)
2051 xfs_dir2_free_t
*free
;
2053 xfs_dir2_leaf_t
*leaf
;
2054 xfs_fileoff_t next_da_bno
;
2057 struct xfs_dir2_leaf_entry
*ents
;
2058 struct xfs_dir3_icleaf_hdr leafhdr
;
2059 struct xfs_dir3_icfree_hdr freehdr
;
2064 for (da_bno
= mp
->m_dir_geo
->leafblk
, next_da_bno
= 0;
2065 next_da_bno
!= NULLFILEOFF
&& da_bno
< mp
->m_dir_geo
->freeblk
;
2066 da_bno
= (xfs_dablk_t
)next_da_bno
) {
2067 next_da_bno
= da_bno
+ mp
->m_dir_geo
->fsbcount
- 1;
2068 if (bmap_next_offset(ip
, &next_da_bno
))
2072 * we need to use the da3 node verifier here as it handles the
2073 * fact that reading the leaf hash tree blocks can return either
2074 * leaf or node blocks and calls the correct verifier. If we get
2075 * a node block, then we'll skip it below based on a magic
2078 error
= dir_read_buf(ip
, da_bno
, &bp
, &xfs_da3_node_buf_ops
,
2082 _("can't read leaf block %u for directory inode %" PRIu64
", error %d\n"),
2083 da_bno
, ip
->i_ino
, error
);
2087 libxfs_dir2_leaf_hdr_from_disk(mp
, &leafhdr
, leaf
);
2088 ents
= leafhdr
.ents
;
2089 if (!(leafhdr
.magic
== XFS_DIR2_LEAFN_MAGIC
||
2090 leafhdr
.magic
== XFS_DIR3_LEAFN_MAGIC
||
2091 leafhdr
.magic
== XFS_DA_NODE_MAGIC
||
2092 leafhdr
.magic
== XFS_DA3_NODE_MAGIC
)) {
2094 _("unknown magic number %#x for block %u in directory inode %" PRIu64
"\n"),
2095 leafhdr
.magic
, da_bno
, ip
->i_ino
);
2096 libxfs_buf_relse(bp
);
2100 /* check v5 metadata */
2101 if (leafhdr
.magic
== XFS_DIR3_LEAFN_MAGIC
||
2102 leafhdr
.magic
== XFS_DA3_NODE_MAGIC
) {
2103 error
= check_da3_header(mp
, bp
, ip
->i_ino
);
2105 libxfs_buf_relse(bp
);
2111 if (leafhdr
.magic
== XFS_DA_NODE_MAGIC
||
2112 leafhdr
.magic
== XFS_DA3_NODE_MAGIC
) {
2113 libxfs_buf_relse(bp
);
2118 * If there's a validator error, we need to ensure that we got
2119 * the right ops on the buffer for when we write it back out.
2121 bp
->b_ops
= &xfs_dir3_leafn_buf_ops
;
2122 if (leafhdr
.count
> mp
->m_dir_geo
->leaf_max_ents
||
2123 leafhdr
.count
< leafhdr
.stale
) {
2125 _("leaf block %u for directory inode %" PRIu64
" bad header\n"),
2127 libxfs_buf_relse(bp
);
2130 seeval
= dir_hash_see_all(hashtab
, ents
,
2131 leafhdr
.count
, leafhdr
.stale
);
2132 libxfs_buf_relse(bp
);
2133 if (seeval
!= DIR_HASH_CK_OK
)
2136 if (dir_hash_check(hashtab
, ip
, seeval
))
2139 for (da_bno
= mp
->m_dir_geo
->freeblk
, next_da_bno
= 0;
2140 next_da_bno
!= NULLFILEOFF
;
2141 da_bno
= (xfs_dablk_t
)next_da_bno
) {
2142 next_da_bno
= da_bno
+ mp
->m_dir_geo
->fsbcount
- 1;
2143 if (bmap_next_offset(ip
, &next_da_bno
))
2146 error
= dir_read_buf(ip
, da_bno
, &bp
, &xfs_dir3_free_buf_ops
,
2150 _("can't read freespace block %u for directory inode %" PRIu64
", error %d\n"),
2151 da_bno
, ip
->i_ino
, error
);
2155 libxfs_dir2_free_hdr_from_disk(mp
, &freehdr
, free
);
2156 bests
= freehdr
.bests
;
2157 fdb
= xfs_dir2_da_to_db(mp
->m_dir_geo
, da_bno
);
2158 if (!(freehdr
.magic
== XFS_DIR2_FREE_MAGIC
||
2159 freehdr
.magic
== XFS_DIR3_FREE_MAGIC
) ||
2161 (fdb
- xfs_dir2_byte_to_db(mp
->m_dir_geo
, XFS_DIR2_FREE_OFFSET
)) *
2162 mp
->m_dir_geo
->free_max_bests
||
2163 freehdr
.nvalid
< freehdr
.nused
) {
2165 _("free block %u for directory inode %" PRIu64
" bad header\n"),
2167 libxfs_buf_relse(bp
);
2171 if (freehdr
.magic
== XFS_DIR3_FREE_MAGIC
) {
2172 error
= check_dir3_header(mp
, bp
, ip
->i_ino
);
2174 libxfs_buf_relse(bp
);
2178 for (i
= used
= 0; i
< freehdr
.nvalid
; i
++) {
2179 if (i
+ freehdr
.firstdb
>= freetab
->nents
||
2180 freetab
->ents
[i
+ freehdr
.firstdb
].v
!=
2181 be16_to_cpu(bests
[i
])) {
2183 _("free block %u entry %i for directory ino %" PRIu64
" bad\n"),
2184 da_bno
, i
, ip
->i_ino
);
2185 libxfs_buf_relse(bp
);
2188 used
+= be16_to_cpu(bests
[i
]) != NULLDATAOFF
;
2189 freetab
->ents
[i
+ freehdr
.firstdb
].s
= 1;
2191 if (used
!= freehdr
.nused
) {
2193 _("free block %u for directory inode %" PRIu64
" bad nused\n"),
2195 libxfs_buf_relse(bp
);
2198 libxfs_buf_relse(bp
);
2200 for (i
= 0; i
< freetab
->nents
; i
++) {
2201 if ((freetab
->ents
[i
].s
== 0) &&
2202 (freetab
->ents
[i
].v
!= NULLDATAOFF
)) {
2204 _("missing freetab entry %u for directory inode %" PRIu64
"\n"),
2213 * If a directory is corrupt, we need to read in as many entries as possible,
2214 * destroy the entry and create a new one with recovered name/inode pairs.
2215 * (ie. get libxfs to do all the grunt work)
2218 longform_dir2_entry_check(
2219 struct xfs_mount
*mp
,
2221 struct xfs_inode
*ip
,
2224 struct ino_tree_node
*irec
,
2226 struct dir_hash_tab
*hashtab
)
2228 struct xfs_buf
*bp
= NULL
;
2234 xfs_fileoff_t next_da_bno
;
2237 struct xfs_da_args args
;
2240 freetab
= malloc(FREETAB_SIZE(ip
->i_disk_size
/ mp
->m_dir_geo
->blksize
));
2242 do_error(_("malloc failed in %s (%" PRId64
" bytes)\n"),
2244 FREETAB_SIZE(ip
->i_disk_size
/ mp
->m_dir_geo
->blksize
));
2247 freetab
->naents
= ip
->i_disk_size
/ mp
->m_dir_geo
->blksize
;
2249 for (i
= 0; i
< freetab
->naents
; i
++) {
2250 freetab
->ents
[i
].v
= NULLDATAOFF
;
2251 freetab
->ents
[i
].s
= 0;
2254 /* is this a block, leaf, or node directory? */
2256 args
.geo
= mp
->m_dir_geo
;
2257 libxfs_dir2_isblock(&args
, &isblock
);
2258 libxfs_dir2_isleaf(&args
, &isleaf
);
2260 /* check directory "data" blocks (ie. name/inode pairs) */
2261 for (da_bno
= 0, next_da_bno
= 0;
2262 next_da_bno
!= NULLFILEOFF
&& da_bno
< mp
->m_dir_geo
->leafblk
;
2263 da_bno
= (xfs_dablk_t
)next_da_bno
) {
2264 const struct xfs_buf_ops
*ops
;
2266 struct xfs_dir2_data_hdr
*d
;
2268 next_da_bno
= da_bno
+ mp
->m_dir_geo
->fsbcount
- 1;
2269 if (bmap_next_offset(ip
, &next_da_bno
)) {
2271 * if this is the first block, there isn't anything we
2272 * can recover so we just trash it.
2282 ops
= &xfs_dir3_block_buf_ops
;
2284 ops
= &xfs_dir3_data_buf_ops
;
2286 error
= dir_read_buf(ip
, da_bno
, &bp
, ops
, &fixit
);
2289 _("can't read data block %u for directory inode %" PRIu64
" error %d\n"),
2290 da_bno
, ino
, error
);
2294 * we try to read all "data" blocks, but if we are in
2295 * block form and we fail, there isn't anything else to
2296 * read, and nothing we can do but trash it.
2305 /* check v5 metadata */
2307 if (be32_to_cpu(d
->magic
) == XFS_DIR3_BLOCK_MAGIC
||
2308 be32_to_cpu(d
->magic
) == XFS_DIR3_DATA_MAGIC
) {
2309 error
= check_dir3_header(mp
, bp
, ino
);
2318 longform_dir2_entry_check_data(mp
, ip
, num_illegal
, need_dot
,
2319 irec
, ino_offset
, bp
, hashtab
,
2320 &freetab
, da_bno
, isblock
);
2324 libxfs_buf_relse(bp
);
2326 fixit
|= (*num_illegal
!= 0) || dir2_is_badino(ino
) || *need_dot
;
2328 if (!dotdot_update
) {
2329 /* check btree and freespace */
2331 struct xfs_dir2_data_hdr
*block
;
2332 xfs_dir2_block_tail_t
*btp
;
2333 xfs_dir2_leaf_entry_t
*blp
;
2336 btp
= xfs_dir2_block_tail_p(mp
->m_dir_geo
, block
);
2337 blp
= xfs_dir2_block_leaf_p(btp
);
2338 seeval
= dir_hash_see_all(hashtab
, blp
,
2339 be32_to_cpu(btp
->count
),
2340 be32_to_cpu(btp
->stale
));
2341 if (dir_hash_check(hashtab
, ip
, seeval
))
2343 } else if (isleaf
) {
2344 fixit
|= longform_dir2_check_leaf(mp
, ip
, hashtab
,
2347 fixit
|= longform_dir2_check_node(mp
, ip
, hashtab
,
2353 libxfs_buf_relse(bp
);
2355 if (!no_modify
&& (fixit
|| dotdot_update
)) {
2356 longform_dir2_rebuild(mp
, ino
, ip
, irec
, ino_offset
, hashtab
);
2360 if (fixit
|| dotdot_update
)
2362 _("would rebuild directory inode %" PRIu64
"\n"), ino
);
2369 * shortform directory v2 processing routines -- entry verification and
2370 * bad entry deletion (pruning).
2372 static struct xfs_dir2_sf_entry
*
2373 shortform_dir2_junk(
2374 struct xfs_mount
*mp
,
2375 struct xfs_dir2_sf_hdr
*sfp
,
2376 struct xfs_dir2_sf_entry
*sfep
,
2383 struct xfs_dir2_sf_entry
*next_sfep
;
2387 if (lino
== orphanage_ino
)
2390 next_elen
= libxfs_dir2_sf_entsize(mp
, sfp
, sfep
->namelen
);
2391 next_sfep
= libxfs_dir2_sf_nextentry(mp
, sfp
, sfep
);
2394 * if we are just checking, simply return the pointer to the next entry
2395 * here so that the checking loop can continue.
2398 do_warn(_("would junk entry\n"));
2403 * now move all the remaining entries down over the junked entry and
2404 * clear the newly unused bytes at the tail of the directory region.
2406 next_len
= *max_size
- ((intptr_t)next_sfep
- (intptr_t)sfp
);
2407 *max_size
-= next_elen
;
2408 *bytes_deleted
+= next_elen
;
2410 memmove(sfep
, next_sfep
, next_len
);
2411 memset((void *)((intptr_t)sfep
+ next_len
), 0, next_elen
);
2416 * WARNING: drop the index i by one so it matches the decremented count
2417 * for accurate comparisons in the loop test
2422 do_warn(_("junking entry\n"));
2429 shortform_dir2_entry_check(
2430 struct xfs_mount
*mp
,
2432 struct xfs_inode
*ip
,
2434 struct ino_tree_node
*current_irec
,
2435 int current_ino_offset
,
2436 struct dir_hash_tab
*hashtab
)
2440 struct xfs_dir2_sf_hdr
*sfp
;
2441 struct xfs_dir2_sf_entry
*sfep
;
2442 struct xfs_dir2_sf_entry
*next_sfep
;
2443 struct xfs_ifork
*ifp
;
2444 struct ino_tree_node
*irec
;
2451 char fname
[MAXNAMELEN
+ 1];
2455 sfp
= (struct xfs_dir2_sf_hdr
*) ifp
->if_u1
.if_data
;
2459 max_size
= ifp
->if_bytes
;
2460 ASSERT(ip
->i_disk_size
<= ifp
->if_bytes
);
2463 * if just rebuild a directory due to a "..", update and return
2465 if (dotdot_update
) {
2466 parent
= get_inode_parent(current_irec
, current_ino_offset
);
2469 _("would set .. in sf dir inode %" PRIu64
" to %" PRIu64
"\n"),
2473 _("setting .. in sf dir inode %" PRIu64
" to %" PRIu64
"\n"),
2475 libxfs_dir2_sf_put_parent_ino(sfp
, parent
);
2482 * no '.' entry in shortform dirs, just bump up ref count by 1
2483 * '..' was already (or will be) accounted for and checked when
2484 * the directory is reached or will be taken care of when the
2485 * directory is moved to orphanage.
2487 add_inode_ref(current_irec
, current_ino_offset
);
2490 * Initialise i8 counter -- the parent inode number counts as well.
2492 i8
= libxfs_dir2_sf_get_parent_ino(sfp
) > XFS_DIR2_MAX_SHORT_INUM
;
2495 * now run through entries, stop at first bad entry, don't need
2496 * to skip over '..' since that's encoded in its own field and
2497 * no need to worry about '.' since it doesn't exist.
2499 sfep
= next_sfep
= xfs_dir2_sf_firstentry(sfp
);
2501 for (i
= 0; i
< sfp
->count
&& max_size
>
2502 (intptr_t)next_sfep
- (intptr_t)sfp
;
2503 sfep
= next_sfep
, i
++) {
2506 lino
= libxfs_dir2_sf_get_ino(mp
, sfp
, sfep
);
2508 namelen
= sfep
->namelen
;
2510 ASSERT(no_modify
|| namelen
> 0);
2512 if (no_modify
&& namelen
== 0) {
2514 * if we're really lucky, this is
2515 * the last entry in which case we
2516 * can use the dir size to set the
2517 * namelen value. otherwise, forget
2518 * it because we're not going to be
2519 * able to find the next entry.
2523 if (i
== sfp
->count
- 1) {
2524 namelen
= ip
->i_disk_size
-
2525 ((intptr_t) &sfep
->name
[0] -
2529 * don't process the rest of the directory,
2530 * break out of processing loop
2534 } else if (no_modify
&& (intptr_t) sfep
- (intptr_t) sfp
+
2535 + libxfs_dir2_sf_entsize(mp
, sfp
, sfep
->namelen
)
2536 > ip
->i_disk_size
) {
2539 if (i
== sfp
->count
- 1) {
2540 namelen
= ip
->i_disk_size
-
2541 ((intptr_t) &sfep
->name
[0] -
2545 * don't process the rest of the directory,
2546 * break out of processing loop
2552 memmove(fname
, sfep
->name
, sfep
->namelen
);
2553 fname
[sfep
->namelen
] = '\0';
2555 ASSERT(no_modify
|| (lino
!= NULLFSINO
&& lino
!= 0));
2556 ASSERT(no_modify
|| libxfs_verify_dir_ino(mp
, lino
));
2559 * Also skip entries with bogus inode numbers if we're
2560 * in no modify mode.
2563 if (no_modify
&& !libxfs_verify_dir_ino(mp
, lino
)) {
2564 next_sfep
= libxfs_dir2_sf_nextentry(mp
, sfp
, sfep
);
2568 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
, lino
),
2569 XFS_INO_TO_AGINO(mp
, lino
));
2573 _("entry \"%s\" in shortform directory %" PRIu64
" references non-existent inode %" PRIu64
"\n"),
2575 next_sfep
= shortform_dir2_junk(mp
, sfp
, sfep
, lino
,
2576 &max_size
, &i
, &bytes_deleted
,
2581 ino_offset
= XFS_INO_TO_AGINO(mp
, lino
) - irec
->ino_startnum
;
2584 * if it's a free inode, blow out the entry.
2585 * by now, any inode that we think is free
2588 if (is_inode_free(irec
, ino_offset
)) {
2590 _("entry \"%s\" in shortform directory inode %" PRIu64
" points to free inode %" PRIu64
"\n"),
2592 next_sfep
= shortform_dir2_junk(mp
, sfp
, sfep
, lino
,
2593 &max_size
, &i
, &bytes_deleted
,
2598 * check if this inode is lost+found dir in the root
2600 if (ino
== mp
->m_sb
.sb_rootino
&& strcmp(fname
, ORPHANAGE
) == 0) {
2602 * if it's not a directory, trash it
2604 if (!inode_isadir(irec
, ino_offset
)) {
2606 _("%s (ino %" PRIu64
") in root (%" PRIu64
") is not a directory"),
2607 ORPHANAGE
, lino
, ino
);
2608 next_sfep
= shortform_dir2_junk(mp
, sfp
, sfep
,
2609 lino
, &max_size
, &i
,
2610 &bytes_deleted
, ino_dirty
);
2614 * if this is a dup, it will be picked up below,
2615 * otherwise, mark it as the orphanage for later.
2618 orphanage_ino
= lino
;
2621 * check for duplicate names in directory.
2623 if (!dir_hash_add(mp
, hashtab
, (xfs_dir2_dataptr_t
)
2624 (sfep
- xfs_dir2_sf_firstentry(sfp
)),
2625 lino
, sfep
->namelen
, sfep
->name
,
2626 libxfs_dir2_sf_get_ftype(mp
, sfep
))) {
2628 _("entry \"%s\" (ino %" PRIu64
") in dir %" PRIu64
" is a duplicate name"),
2630 next_sfep
= shortform_dir2_junk(mp
, sfp
, sfep
, lino
,
2631 &max_size
, &i
, &bytes_deleted
,
2636 if (!inode_isadir(irec
, ino_offset
)) {
2638 * check easy case first, regular inode, just bump
2641 add_inode_reached(irec
, ino_offset
);
2643 parent
= get_inode_parent(irec
, ino_offset
);
2646 * bump up the link counts in parent and child.
2647 * directory but if the link doesn't agree with
2648 * the .. in the child, blow out the entry
2650 if (is_inode_reached(irec
, ino_offset
)) {
2652 _("entry \"%s\" in directory inode %" PRIu64
2653 " references already connected inode %" PRIu64
".\n"),
2655 next_sfep
= shortform_dir2_junk(mp
, sfp
, sfep
,
2656 lino
, &max_size
, &i
,
2657 &bytes_deleted
, ino_dirty
);
2659 } else if (parent
== ino
) {
2660 add_inode_reached(irec
, ino_offset
);
2661 add_inode_ref(current_irec
, current_ino_offset
);
2662 } else if (parent
== NULLFSINO
) {
2663 /* ".." was missing, but this entry refers to it,
2664 so, set it as the parent and mark for rebuild */
2666 _("entry \"%s\" in dir ino %" PRIu64
" doesn't have a .. entry, will set it in ino %" PRIu64
".\n"),
2668 set_inode_parent(irec
, ino_offset
, ino
);
2669 add_inode_reached(irec
, ino_offset
);
2670 add_inode_ref(current_irec
, current_ino_offset
);
2671 add_dotdot_update(XFS_INO_TO_AGNO(mp
, lino
),
2675 _("entry \"%s\" in directory inode %" PRIu64
2676 " not consistent with .. value (%" PRIu64
2677 ") in inode %" PRIu64
",\n"),
2678 fname
, ino
, parent
, lino
);
2679 next_sfep
= shortform_dir2_junk(mp
, sfp
, sfep
,
2680 lino
, &max_size
, &i
,
2681 &bytes_deleted
, ino_dirty
);
2686 /* validate ftype field if supported */
2687 if (xfs_has_ftype(mp
)) {
2691 dir_ftype
= libxfs_dir2_sf_get_ftype(mp
, sfep
);
2692 ino_ftype
= get_inode_ftype(irec
, ino_offset
);
2694 if (dir_ftype
!= ino_ftype
) {
2697 _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64
"/%" PRIu64
"\n"),
2698 dir_ftype
, ino_ftype
,
2702 _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64
"/%" PRIu64
"\n"),
2703 dir_ftype
, ino_ftype
,
2705 libxfs_dir2_sf_put_ftype(mp
, sfep
,
2707 dir_hash_update_ftype(hashtab
,
2708 (xfs_dir2_dataptr_t
)(sfep
- xfs_dir2_sf_firstentry(sfp
)),
2715 if (lino
> XFS_DIR2_MAX_SHORT_INUM
)
2719 * go onto next entry - we have to take entries with bad namelen
2720 * into account in no modify mode since we calculate size based
2723 ASSERT(no_modify
|| bad_sfnamelen
== 0);
2724 next_sfep
= (struct xfs_dir2_sf_entry
*)((intptr_t)sfep
+
2726 ? libxfs_dir2_sf_entsize(mp
, sfp
, namelen
)
2727 : libxfs_dir2_sf_entsize(mp
, sfp
, sfep
->namelen
)));
2730 if (sfp
->i8count
!= i8
) {
2732 do_warn(_("would fix i8count in inode %" PRIu64
"\n"),
2736 struct xfs_dir2_sf_entry
*tmp_sfep
;
2738 tmp_sfep
= next_sfep
;
2739 process_sf_dir2_fixi8(mp
, sfp
, &tmp_sfep
);
2741 (intptr_t)next_sfep
-
2743 next_sfep
= tmp_sfep
;
2747 do_warn(_("fixing i8count in inode %" PRIu64
"\n"),
2753 * sync up sizes if required
2755 if (*ino_dirty
&& bytes_deleted
> 0) {
2757 libxfs_idata_realloc(ip
, -bytes_deleted
, XFS_DATA_FORK
);
2758 ip
->i_disk_size
-= bytes_deleted
;
2761 if (ip
->i_disk_size
!= ip
->i_df
.if_bytes
) {
2762 ASSERT(ip
->i_df
.if_bytes
== (xfs_fsize_t
)
2763 ((intptr_t) next_sfep
- (intptr_t) sfp
));
2764 ip
->i_disk_size
= (xfs_fsize_t
)
2765 ((intptr_t) next_sfep
- (intptr_t) sfp
);
2767 _("setting size to %" PRId64
" bytes to reflect junked entries\n"),
2774 * processes all reachable inodes in directories
2778 struct xfs_mount
*mp
,
2779 xfs_agnumber_t agno
,
2780 struct ino_tree_node
*irec
,
2784 struct xfs_inode
*ip
;
2785 struct xfs_trans
*tp
;
2786 struct dir_hash_tab
*hashtab
;
2788 int dirty
, num_illegal
, error
, nres
;
2790 ino
= XFS_AGINO_TO_INO(mp
, agno
, irec
->ino_startnum
+ ino_offset
);
2793 * open up directory inode, check all entries,
2794 * then call prune_dir_entries to remove all
2795 * remaining illegal directory entries.
2798 ASSERT(!is_inode_refchecked(irec
, ino_offset
) || dotdot_update
);
2800 error
= -libxfs_iget(mp
, NULL
, ino
, 0, &ip
);
2804 _("couldn't map inode %" PRIu64
", err = %d\n"),
2808 _("couldn't map inode %" PRIu64
", err = %d\n"),
2811 * see below for what we're doing if this
2812 * is root. Why do we need to do this here?
2813 * to ensure that the root doesn't show up
2814 * as being disconnected in the no_modify case.
2816 if (mp
->m_sb
.sb_rootino
== ino
) {
2817 add_inode_reached(irec
, 0);
2818 add_inode_ref(irec
, 0);
2822 add_inode_refchecked(irec
, 0);
2826 need_dot
= dirty
= num_illegal
= 0;
2828 if (mp
->m_sb
.sb_rootino
== ino
) {
2830 * mark root inode reached and bump up
2831 * link count for root inode to account
2832 * for '..' entry since the root inode is
2833 * never reached by a parent. we know
2834 * that root's '..' is always good --
2835 * guaranteed by phase 3 and/or below.
2837 add_inode_reached(irec
, ino_offset
);
2840 add_inode_refchecked(irec
, ino_offset
);
2842 hashtab
= dir_hash_init(ip
->i_disk_size
);
2845 * look for bogus entries
2847 switch (ip
->i_df
.if_format
) {
2848 case XFS_DINODE_FMT_EXTENTS
:
2849 case XFS_DINODE_FMT_BTREE
:
2851 * also check for missing '.' in longform dirs.
2852 * missing .. entries are added if required when
2853 * the directory is connected to lost+found. but
2854 * we need to create '.' entries here.
2856 longform_dir2_entry_check(mp
, ino
, ip
,
2857 &num_illegal
, &need_dot
,
2862 case XFS_DINODE_FMT_LOCAL
:
2864 * using the remove reservation is overkill
2865 * since at most we'll only need to log the
2866 * inode but it's easier than wedging a
2867 * new define in ourselves.
2869 nres
= no_modify
? 0 : XFS_REMOVE_SPACE_RES(mp
);
2870 error
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_remove
,
2875 libxfs_trans_ijoin(tp
, ip
, 0);
2877 shortform_dir2_entry_check(mp
, ino
, ip
, &dirty
,
2881 ASSERT(dirty
== 0 || (dirty
&& !no_modify
));
2883 libxfs_trans_log_inode(tp
, ip
,
2884 XFS_ILOG_CORE
| XFS_ILOG_DDATA
);
2885 error
= -libxfs_trans_commit(tp
);
2888 _("error %d fixing shortform directory %llu\n"),
2890 (unsigned long long)ip
->i_ino
);
2892 libxfs_trans_cancel(tp
);
2899 dir_hash_done(hashtab
);
2902 * if we have to create a .. for /, do it now *before*
2903 * we delete the bogus entries, otherwise the directory
2904 * could transform into a shortform dir which would
2905 * probably cause the simulation to choke. Even
2906 * if the illegal entries get shifted around, it's ok
2907 * because the entries are structurally intact and in
2908 * in hash-value order so the simulation won't get confused
2909 * if it has to move them around.
2911 if (!no_modify
&& need_root_dotdot
&& ino
== mp
->m_sb
.sb_rootino
) {
2912 ASSERT(ip
->i_df
.if_format
!= XFS_DINODE_FMT_LOCAL
);
2914 do_warn(_("recreating root directory .. entry\n"));
2916 nres
= XFS_MKDIR_SPACE_RES(mp
, 2);
2917 error
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_mkdir
,
2922 libxfs_trans_ijoin(tp
, ip
, 0);
2924 error
= -libxfs_dir_createname(tp
, ip
, &xfs_name_dotdot
,
2928 _("can't make \"..\" entry in root inode %" PRIu64
", createname error %d\n"), ino
, error
);
2930 libxfs_trans_log_inode(tp
, ip
, XFS_ILOG_CORE
);
2931 error
= -libxfs_trans_commit(tp
);
2934 _("root inode \"..\" entry recreation failed (%d)\n"), error
);
2936 need_root_dotdot
= 0;
2937 } else if (need_root_dotdot
&& ino
== mp
->m_sb
.sb_rootino
) {
2938 do_warn(_("would recreate root directory .. entry\n"));
2942 * if we need to create the '.' entry, do so only if
2943 * the directory is a longform dir. if it's been
2944 * turned into a shortform dir, then the inode is ok
2945 * since shortform dirs have no '.' entry and the inode
2946 * has already been committed by prune_lf_dir_entry().
2950 * bump up our link count but don't
2951 * bump up the inode link count. chances
2952 * are good that even though we lost '.'
2953 * the inode link counts reflect '.' so
2954 * leave the inode link count alone and if
2955 * it turns out to be wrong, we'll catch
2958 add_inode_ref(irec
, ino_offset
);
2962 _("would create missing \".\" entry in dir ino %" PRIu64
"\n"),
2964 } else if (ip
->i_df
.if_format
!= XFS_DINODE_FMT_LOCAL
) {
2966 * need to create . entry in longform dir.
2969 _("creating missing \".\" entry in dir ino %" PRIu64
"\n"), ino
);
2971 nres
= XFS_MKDIR_SPACE_RES(mp
, 1);
2972 error
= -libxfs_trans_alloc(mp
, &M_RES(mp
)->tr_mkdir
,
2977 libxfs_trans_ijoin(tp
, ip
, 0);
2979 error
= -libxfs_dir_createname(tp
, ip
, &xfs_name_dot
,
2983 _("can't make \".\" entry in dir ino %" PRIu64
", createname error %d\n"),
2986 libxfs_trans_log_inode(tp
, ip
, XFS_ILOG_CORE
);
2987 error
= -libxfs_trans_commit(tp
);
2990 _("root inode \".\" entry recreation failed (%d)\n"), error
);
2997 * mark realtime bitmap and summary inodes as reached.
2998 * quota inode will be marked here as well
3001 mark_standalone_inodes(xfs_mount_t
*mp
)
3003 ino_tree_node_t
*irec
;
3006 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
, mp
->m_sb
.sb_rbmino
),
3007 XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_rbmino
));
3009 offset
= XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_rbmino
) -
3012 add_inode_reached(irec
, offset
);
3014 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
, mp
->m_sb
.sb_rsumino
),
3015 XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_rsumino
));
3017 offset
= XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_rsumino
) -
3020 add_inode_reached(irec
, offset
);
3023 if (mp
->m_sb
.sb_uquotino
3024 && mp
->m_sb
.sb_uquotino
!= NULLFSINO
) {
3025 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
,
3026 mp
->m_sb
.sb_uquotino
),
3027 XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_uquotino
));
3028 offset
= XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_uquotino
)
3029 - irec
->ino_startnum
;
3030 add_inode_reached(irec
, offset
);
3032 if (mp
->m_sb
.sb_gquotino
3033 && mp
->m_sb
.sb_gquotino
!= NULLFSINO
) {
3034 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
,
3035 mp
->m_sb
.sb_gquotino
),
3036 XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_gquotino
));
3037 offset
= XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_gquotino
)
3038 - irec
->ino_startnum
;
3039 add_inode_reached(irec
, offset
);
3041 if (mp
->m_sb
.sb_pquotino
3042 && mp
->m_sb
.sb_pquotino
!= NULLFSINO
) {
3043 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
,
3044 mp
->m_sb
.sb_pquotino
),
3045 XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_pquotino
));
3046 offset
= XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_pquotino
)
3047 - irec
->ino_startnum
;
3048 add_inode_reached(irec
, offset
);
3054 check_for_orphaned_inodes(
3056 xfs_agnumber_t agno
,
3057 ino_tree_node_t
*irec
)
3062 for (i
= 0; i
< XFS_INODES_PER_CHUNK
; i
++) {
3063 ASSERT(is_inode_confirmed(irec
, i
));
3064 if (is_inode_free(irec
, i
))
3067 if (is_inode_reached(irec
, i
))
3070 ASSERT(inode_isadir(irec
, i
) ||
3071 num_inode_references(irec
, i
) == 0);
3073 ino
= XFS_AGINO_TO_INO(mp
, agno
, i
+ irec
->ino_startnum
);
3074 if (inode_isadir(irec
, i
))
3075 do_warn(_("disconnected dir inode %" PRIu64
", "), ino
);
3077 do_warn(_("disconnected inode %" PRIu64
", "), ino
);
3080 orphanage_ino
= mk_orphanage(mp
);
3081 do_warn(_("moving to %s\n"), ORPHANAGE
);
3082 mv_orphanage(mp
, ino
, inode_isadir(irec
, i
));
3084 do_warn(_("would move to %s\n"), ORPHANAGE
);
3087 * for read-only case, even though the inode isn't
3088 * really reachable, set the flag (and bump our link
3089 * count) anyway to fool phase 7
3091 add_inode_reached(irec
, i
);
3097 struct workqueue
*wq
,
3098 xfs_agnumber_t agno
,
3101 struct ino_tree_node
*irec
= arg
;
3104 for (i
= 0; i
< XFS_INODES_PER_CHUNK
; i
++) {
3105 if (inode_isadir(irec
, i
))
3106 process_dir_inode(wq
->wq_ctx
, agno
, irec
, i
);
3112 struct workqueue
*wq
,
3113 xfs_agnumber_t agno
,
3116 struct ino_tree_node
*irec
;
3117 prefetch_args_t
*pf_args
= arg
;
3118 struct workqueue lwq
;
3119 struct xfs_mount
*mp
= wq
->wq_ctx
;
3121 wait_for_inode_prefetch(pf_args
);
3124 do_log(_(" - agno = %d\n"), agno
);
3127 * The more AGs we have in flight at once, the fewer processing threads
3128 * per AG. This means we don't overwhelm the machine with hundreds of
3129 * threads when we start acting on lots of AGs at once. We just want
3130 * enough that we can keep multiple CPUs busy across multiple AGs.
3132 workqueue_create_bound(&lwq
, mp
, ag_stride
, 1000);
3134 for (irec
= findfirst_inode_rec(agno
); irec
; irec
= next_ino_rec(irec
)) {
3135 if (irec
->ino_isa_dir
== 0)
3139 sem_post(&pf_args
->ra_count
);
3143 sem_getvalue(&pf_args
->ra_count
, &i
);
3145 "processing inode chunk %p in AG %d (sem count = %d)",
3151 queue_work(&lwq
, do_dir_inode
, agno
, irec
);
3153 destroy_work_queue(&lwq
);
3154 cleanup_inode_prefetch(pf_args
);
3158 update_missing_dotdot_entries(
3161 dotdot_update_t
*dir
;
3164 * these entries parents were updated, rebuild them again
3165 * set dotdot_update flag so processing routines do not count links
3168 while (!list_empty(&dotdot_update_list
)) {
3169 dir
= list_entry(dotdot_update_list
.prev
, struct dotdot_update
,
3171 list_del(&dir
->list
);
3172 process_dir_inode(mp
, dir
->agno
, dir
->irec
, dir
->ino_offset
);
3179 struct xfs_mount
*mp
)
3181 do_inode_prefetch(mp
, ag_stride
, traverse_function
, false, true);
3185 phase6(xfs_mount_t
*mp
)
3187 ino_tree_node_t
*irec
;
3190 memset(&zerocr
, 0, sizeof(struct cred
));
3191 memset(&zerofsx
, 0, sizeof(struct fsxattr
));
3194 do_log(_("Phase 6 - check inode connectivity...\n"));
3196 incore_ext_teardown(mp
);
3198 add_ino_ex_data(mp
);
3201 * verify existence of root directory - if we have to
3202 * make one, it's ok for the incore data structs not to
3203 * know about it since everything about it (and the other
3204 * inodes in its chunk if a new chunk was created) are ok
3206 if (need_root_inode
) {
3208 do_warn(_("reinitializing root directory\n"));
3210 need_root_inode
= 0;
3211 need_root_dotdot
= 0;
3213 do_warn(_("would reinitialize root directory\n"));
3219 do_warn(_("reinitializing realtime bitmap inode\n"));
3223 do_warn(_("would reinitialize realtime bitmap inode\n"));
3229 do_warn(_("reinitializing realtime summary inode\n"));
3233 do_warn(_("would reinitialize realtime summary inode\n"));
3239 _(" - resetting contents of realtime bitmap and summary inodes\n"));
3240 if (fill_rbmino(mp
)) {
3242 _("Warning: realtime bitmap may be inconsistent\n"));
3245 if (fill_rsumino(mp
)) {
3247 _("Warning: realtime bitmap may be inconsistent\n"));
3251 mark_standalone_inodes(mp
);
3253 do_log(_(" - traversing filesystem ...\n"));
3255 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
, mp
->m_sb
.sb_rootino
),
3256 XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_rootino
));
3259 * we always have a root inode, even if it's free...
3260 * if the root is free, forget it, lost+found is already gone
3262 if (is_inode_free(irec
, 0) || !inode_isadir(irec
, 0)) {
3263 need_root_inode
= 1;
3267 * then process all inodes by walking incore inode tree
3272 * any directories that had updated ".." entries, rebuild them now
3274 update_missing_dotdot_entries(mp
);
3276 do_log(_(" - traversal finished ...\n"));
3277 do_log(_(" - moving disconnected inodes to %s ...\n"),
3281 * move all disconnected inodes to the orphanage
3283 for (i
= 0; i
< glob_agcount
; i
++) {
3284 irec
= findfirst_inode_rec(i
);
3285 while (irec
!= NULL
) {
3286 check_for_orphaned_inodes(mp
, i
, irec
);
3287 irec
= next_ino_rec(irec
);