2 * rehash.c --- rebuild hash tree directories
4 * Copyright (C) 2002 Theodore Ts'o
7 * This file may be redistributed under the terms of the GNU Public
11 * This algorithm is designed for simplicity of implementation and to
12 * pack the directory as much as possible. It however requires twice
13 * as much memory as the size of the directory. The maximum size
14 * directory supported using a 4k blocksize is roughly a gigabyte, and
15 * so there may very well be problems with machines that don't have
16 * virtual memory, and obscenely large directories.
18 * An alternate algorithm which is much more disk intensive could be
19 * written, and probably will need to be written in the future. The
20 * design goals of such an algorithm are: (a) use (roughly) constant
21 * amounts of memory, no matter how large the directory, (b) the
22 * directory must be safe at all times, even if e2fsck is interrupted
23 * in the middle, (c) we must use minimal amounts of extra disk
24 * blocks. This pretty much requires an incremental approach, where
25 * we are reading from one part of the directory, and inserting into
26 * the front half. So the algorithm will have to keep track of a
27 * moving block boundary between the new tree and the old tree, and
28 * files will need to be moved from the old directory and inserted
29 * into the new tree. If the new directory requires space which isn't
30 * yet available, blocks from the beginning part of the old directory
31 * may need to be moved to the end of the directory to make room for
34 * --------------------------------------------------------
35 * | new tree | | old tree |
36 * --------------------------------------------------------
40 * This is going to be a pain in the tuckus to implement, and will
41 * require a lot more disk accesses. So I'm going to skip it for now;
42 * it's only really going to be an issue for really, really big
43 * filesystems (when we reach the level of tens of millions of files
44 * in a single directory). It will probably be easier to simply
45 * require that e2fsck use VM first.
54 struct fill_dir_struct
{
56 struct ext2_inode
*inode
;
59 struct hash_entry
*harray
;
60 int max_array
, num_array
;
68 ext2_dirhash_t minor_hash
;
70 struct ext2_dir_entry
*dir
;
77 ext2_dirhash_t
*hashes
;
80 static int fill_dir_block(ext2_filsys fs
,
83 blk_t ref_block
EXT2FS_ATTR((unused
)),
84 int ref_offset
EXT2FS_ATTR((unused
)),
87 struct fill_dir_struct
*fd
= (struct fill_dir_struct
*) priv_data
;
88 struct hash_entry
*new_array
, *ent
;
89 struct ext2_dir_entry
*dirent
;
91 unsigned int offset
, dir_offset
;
92 int rec_len
, hash_alg
;
97 offset
= blockcnt
* fs
->blocksize
;
98 if (offset
+ fs
->blocksize
> fd
->inode
->i_size
) {
99 fd
->err
= EXT2_ET_DIR_CORRUPTED
;
102 dir
= (fd
->buf
+offset
);
103 if (HOLE_BLKADDR(*block_nr
)) {
104 memset(dir
, 0, fs
->blocksize
);
105 dirent
= (struct ext2_dir_entry
*) dir
;
106 dirent
->rec_len
= fs
->blocksize
;
108 fd
->err
= ext2fs_read_dir_block(fs
, *block_nr
, dir
);
112 hash_alg
= fs
->super
->s_def_hash_version
;
113 if ((hash_alg
<= EXT2_HASH_TEA
) &&
114 (fs
->super
->s_flags
& EXT2_FLAGS_UNSIGNED_HASH
))
116 /* While the directory block is "hot", index it. */
118 while (dir_offset
< fs
->blocksize
) {
119 dirent
= (struct ext2_dir_entry
*) (dir
+ dir_offset
);
120 rec_len
= (dirent
->rec_len
|| fs
->blocksize
< 65536) ?
121 dirent
->rec_len
: 65536;
122 if (((dir_offset
+ rec_len
) > fs
->blocksize
) ||
124 ((rec_len
% 4) != 0) ||
125 (((dirent
->name_len
& 0xFF)+8) > rec_len
)) {
126 fd
->err
= EXT2_ET_DIR_CORRUPTED
;
129 dir_offset
+= rec_len
;
130 if (dirent
->inode
== 0)
132 if (!fd
->compress
&& ((dirent
->name_len
&0xFF) == 1) &&
133 (dirent
->name
[0] == '.'))
135 if (!fd
->compress
&& ((dirent
->name_len
&0xFF) == 2) &&
136 (dirent
->name
[0] == '.') && (dirent
->name
[1] == '.')) {
137 fd
->parent
= dirent
->inode
;
140 if (fd
->num_array
>= fd
->max_array
) {
141 new_array
= realloc(fd
->harray
,
142 sizeof(struct hash_entry
) * (fd
->max_array
+500));
147 fd
->harray
= new_array
;
148 fd
->max_array
+= 500;
150 ent
= fd
->harray
+ fd
->num_array
++;
152 fd
->dir_size
+= EXT2_DIR_REC_LEN(dirent
->name_len
& 0xFF);
153 ent
->ino
= dirent
->inode
;
155 ent
->hash
= ent
->minor_hash
= 0;
157 fd
->err
= ext2fs_dirhash(hash_alg
, dirent
->name
,
158 dirent
->name_len
& 0xFF,
159 fs
->super
->s_hash_seed
,
160 &ent
->hash
, &ent
->minor_hash
);
169 /* Used for sorting the hash entry */
170 static EXT2_QSORT_TYPE
ino_cmp(const void *a
, const void *b
)
172 const struct hash_entry
*he_a
= (const struct hash_entry
*) a
;
173 const struct hash_entry
*he_b
= (const struct hash_entry
*) b
;
175 return (he_a
->ino
- he_b
->ino
);
178 /* Used for sorting the hash entry */
179 static EXT2_QSORT_TYPE
name_cmp(const void *a
, const void *b
)
181 const struct hash_entry
*he_a
= (const struct hash_entry
*) a
;
182 const struct hash_entry
*he_b
= (const struct hash_entry
*) b
;
186 min_len
= he_a
->dir
->name_len
;
187 if (min_len
> he_b
->dir
->name_len
)
188 min_len
= he_b
->dir
->name_len
;
190 ret
= strncmp(he_a
->dir
->name
, he_b
->dir
->name
, min_len
);
192 if (he_a
->dir
->name_len
> he_b
->dir
->name_len
)
194 else if (he_a
->dir
->name_len
< he_b
->dir
->name_len
)
197 ret
= he_b
->dir
->inode
- he_a
->dir
->inode
;
202 /* Used for sorting the hash entry */
203 static EXT2_QSORT_TYPE
hash_cmp(const void *a
, const void *b
)
205 const struct hash_entry
*he_a
= (const struct hash_entry
*) a
;
206 const struct hash_entry
*he_b
= (const struct hash_entry
*) b
;
209 if (he_a
->hash
> he_b
->hash
)
211 else if (he_a
->hash
< he_b
->hash
)
214 if (he_a
->minor_hash
> he_b
->minor_hash
)
216 else if (he_a
->minor_hash
< he_b
->minor_hash
)
219 ret
= name_cmp(a
, b
);
224 static errcode_t
alloc_size_dir(ext2_filsys fs
, struct out_dir
*outdir
,
230 new_mem
= realloc(outdir
->buf
, blocks
* fs
->blocksize
);
233 outdir
->buf
= new_mem
;
234 new_mem
= realloc(outdir
->hashes
,
235 blocks
* sizeof(ext2_dirhash_t
));
238 outdir
->hashes
= new_mem
;
240 outdir
->buf
= malloc(blocks
* fs
->blocksize
);
241 outdir
->hashes
= malloc(blocks
* sizeof(ext2_dirhash_t
));
244 outdir
->max
= blocks
;
248 static void free_out_dir(struct out_dir
*outdir
)
253 free(outdir
->hashes
);
258 static errcode_t
get_next_block(ext2_filsys fs
, struct out_dir
*outdir
,
263 if (outdir
->num
>= outdir
->max
) {
264 retval
= alloc_size_dir(fs
, outdir
, outdir
->max
+ 50);
268 *ret
= outdir
->buf
+ (outdir
->num
++ * fs
->blocksize
);
269 memset(*ret
, 0, fs
->blocksize
);
274 * This function is used to make a unique filename. We do this by
275 * appending ~0, and then incrementing the number. However, we cannot
276 * expand the length of the filename beyond the padding available in
277 * the directory entry.
279 static void mutate_name(char *str
, __u16
*len
)
282 __u16 l
= *len
& 0xFF, h
= *len
& 0xff00;
285 * First check to see if it looks the name has been mutated
288 for (i
= l
-1; i
> 0; i
--) {
289 if (!isdigit(str
[i
]))
292 if ((i
== l
-1) || (str
[i
] != '~')) {
302 for (i
= l
-1; i
>= 0; i
--) {
303 if (isdigit(str
[i
])) {
315 else if (str
[0] == 'Z') {
333 static int duplicate_search_and_fix(e2fsck_t ctx
, ext2_filsys fs
,
335 struct fill_dir_struct
*fd
)
337 struct problem_context pctx
;
338 struct hash_entry
*ent
, *prev
;
345 clear_problem_context(&pctx
);
348 hash_alg
= fs
->super
->s_def_hash_version
;
349 if ((hash_alg
<= EXT2_HASH_TEA
) &&
350 (fs
->super
->s_flags
& EXT2_FLAGS_UNSIGNED_HASH
))
353 for (i
=1; i
< fd
->num_array
; i
++) {
354 ent
= fd
->harray
+ i
;
356 if (!ent
->dir
->inode
||
357 ((ent
->dir
->name_len
& 0xFF) !=
358 (prev
->dir
->name_len
& 0xFF)) ||
359 (strncmp(ent
->dir
->name
, prev
->dir
->name
,
360 ent
->dir
->name_len
& 0xFF)))
362 pctx
.dirent
= ent
->dir
;
363 if ((ent
->dir
->inode
== prev
->dir
->inode
) &&
364 fix_problem(ctx
, PR_2_DUPLICATE_DIRENT
, &pctx
)) {
365 e2fsck_adjust_inode_count(ctx
, ent
->dir
->inode
, -1);
370 memcpy(new_name
, ent
->dir
->name
, ent
->dir
->name_len
& 0xFF);
371 new_len
= ent
->dir
->name_len
;
372 mutate_name(new_name
, &new_len
);
373 for (j
=0; j
< fd
->num_array
; j
++) {
375 ((ent
->dir
->name_len
& 0xFF) !=
376 (fd
->harray
[j
].dir
->name_len
& 0xFF)) ||
377 (strncmp(new_name
, fd
->harray
[j
].dir
->name
,
380 mutate_name(new_name
, &new_len
);
384 new_name
[new_len
& 0xFF] = 0;
386 if (fix_problem(ctx
, PR_2_NON_UNIQUE_FILE
, &pctx
)) {
387 memcpy(ent
->dir
->name
, new_name
, new_len
& 0xFF);
388 ent
->dir
->name_len
= new_len
;
389 ext2fs_dirhash(hash_alg
, ent
->dir
->name
,
390 ent
->dir
->name_len
& 0xFF,
391 fs
->super
->s_hash_seed
,
392 &ent
->hash
, &ent
->minor_hash
);
400 static errcode_t
copy_dir_entries(ext2_filsys fs
,
401 struct fill_dir_struct
*fd
,
402 struct out_dir
*outdir
)
406 struct hash_entry
*ent
;
407 struct ext2_dir_entry
*dirent
;
408 int i
, rec_len
, left
;
409 ext2_dirhash_t prev_hash
;
413 retval
= alloc_size_dir(fs
, outdir
,
414 (fd
->dir_size
/ fs
->blocksize
) + 2);
417 outdir
->num
= fd
->compress
? 0 : 1;
419 outdir
->hashes
[0] = 0;
421 if ((retval
= get_next_block(fs
, outdir
, &block_start
)))
423 dirent
= (struct ext2_dir_entry
*) block_start
;
424 left
= fs
->blocksize
;
425 for (i
=0; i
< fd
->num_array
; i
++) {
426 ent
= fd
->harray
+ i
;
427 if (ent
->dir
->inode
== 0)
429 rec_len
= EXT2_DIR_REC_LEN(ent
->dir
->name_len
& 0xFF);
430 if (rec_len
> left
) {
432 dirent
->rec_len
+= left
;
433 if ((retval
= get_next_block(fs
, outdir
,
438 left
= fs
->blocksize
- offset
;
439 dirent
= (struct ext2_dir_entry
*) (block_start
+ offset
);
441 if (ent
->hash
== prev_hash
)
442 outdir
->hashes
[outdir
->num
-1] = ent
->hash
| 1;
444 outdir
->hashes
[outdir
->num
-1] = ent
->hash
;
446 dirent
->inode
= ent
->dir
->inode
;
447 dirent
->name_len
= ent
->dir
->name_len
;
448 dirent
->rec_len
= rec_len
;
449 memcpy(dirent
->name
, ent
->dir
->name
, dirent
->name_len
& 0xFF);
453 dirent
->rec_len
+= left
;
457 prev_hash
= ent
->hash
;
460 dirent
->rec_len
+= left
;
466 static struct ext2_dx_root_info
*set_root_node(ext2_filsys fs
, char *buf
,
467 ext2_ino_t ino
, ext2_ino_t parent
)
469 struct ext2_dir_entry
*dir
;
470 struct ext2_dx_root_info
*root
;
471 struct ext2_dx_countlimit
*limits
;
474 if (fs
->super
->s_feature_incompat
& EXT2_FEATURE_INCOMPAT_FILETYPE
)
475 filetype
= EXT2_FT_DIR
<< 8;
477 memset(buf
, 0, fs
->blocksize
);
478 dir
= (struct ext2_dir_entry
*) buf
;
481 dir
->name_len
= 1 | filetype
;
483 dir
= (struct ext2_dir_entry
*) (buf
+ 12);
487 dir
->name_len
= 2 | filetype
;
488 dir
->rec_len
= fs
->blocksize
- 12;
490 root
= (struct ext2_dx_root_info
*) (buf
+24);
491 root
->reserved_zero
= 0;
492 root
->hash_version
= fs
->super
->s_def_hash_version
;
493 root
->info_length
= 8;
494 root
->indirect_levels
= 0;
495 root
->unused_flags
= 0;
497 limits
= (struct ext2_dx_countlimit
*) (buf
+32);
498 limits
->limit
= (fs
->blocksize
- 32) / sizeof(struct ext2_dx_entry
);
505 static struct ext2_dx_entry
*set_int_node(ext2_filsys fs
, char *buf
)
507 struct ext2_dir_entry
*dir
;
508 struct ext2_dx_countlimit
*limits
;
510 memset(buf
, 0, fs
->blocksize
);
511 dir
= (struct ext2_dir_entry
*) buf
;
513 dir
->rec_len
= fs
->blocksize
;
515 limits
= (struct ext2_dx_countlimit
*) (buf
+8);
516 limits
->limit
= (fs
->blocksize
- 8) / sizeof(struct ext2_dx_entry
);
519 return (struct ext2_dx_entry
*) limits
;
523 * This function takes the leaf nodes which have been written in
524 * outdir, and populates the root node and any necessary interior nodes.
526 static errcode_t
calculate_tree(ext2_filsys fs
,
527 struct out_dir
*outdir
,
531 struct ext2_dx_root_info
*root_info
;
532 struct ext2_dx_entry
*root
, *dx_ent
= 0;
533 struct ext2_dx_countlimit
*root_limit
, *limit
;
536 int i
, c1
, c2
, nblks
;
537 int limit_offset
, root_offset
;
539 root_info
= set_root_node(fs
, outdir
->buf
, ino
, parent
);
540 root_offset
= limit_offset
= ((char *) root_info
- outdir
->buf
) +
541 root_info
->info_length
;
542 root_limit
= (struct ext2_dx_countlimit
*) (outdir
->buf
+ limit_offset
);
543 c1
= root_limit
->limit
;
546 /* Write out the pointer blocks */
548 /* Just write out the root block, and we're done */
549 root
= (struct ext2_dx_entry
*) (outdir
->buf
+ root_offset
);
550 for (i
=1; i
< nblks
; i
++) {
551 root
->block
= ext2fs_cpu_to_le32(i
);
554 ext2fs_cpu_to_le32(outdir
->hashes
[i
]);
561 root_info
->indirect_levels
= 1;
562 for (i
=1; i
< nblks
; i
++) {
567 limit
->limit
= limit
->count
=
568 ext2fs_cpu_to_le16(limit
->limit
);
569 root
= (struct ext2_dx_entry
*)
570 (outdir
->buf
+ root_offset
);
571 root
->block
= ext2fs_cpu_to_le32(outdir
->num
);
574 ext2fs_cpu_to_le32(outdir
->hashes
[i
]);
575 if ((retval
= get_next_block(fs
, outdir
,
578 dx_ent
= set_int_node(fs
, block_start
);
579 limit
= (struct ext2_dx_countlimit
*) dx_ent
;
581 root_offset
+= sizeof(struct ext2_dx_entry
);
584 dx_ent
->block
= ext2fs_cpu_to_le32(i
);
585 if (c2
!= limit
->limit
)
587 ext2fs_cpu_to_le32(outdir
->hashes
[i
]);
591 limit
->count
= ext2fs_cpu_to_le16(limit
->limit
- c2
);
592 limit
->limit
= ext2fs_cpu_to_le16(limit
->limit
);
594 root_limit
= (struct ext2_dx_countlimit
*) (outdir
->buf
+ limit_offset
);
595 root_limit
->count
= ext2fs_cpu_to_le16(root_limit
->limit
- c1
);
596 root_limit
->limit
= ext2fs_cpu_to_le16(root_limit
->limit
);
601 struct write_dir_struct
{
602 struct out_dir
*outdir
;
609 * Helper function which writes out a directory block.
611 static int write_dir_block(ext2_filsys fs
,
613 e2_blkcnt_t blockcnt
,
614 blk_t ref_block
EXT2FS_ATTR((unused
)),
615 int ref_offset
EXT2FS_ATTR((unused
)),
618 struct write_dir_struct
*wd
= (struct write_dir_struct
*) priv_data
;
624 if (blockcnt
>= wd
->outdir
->num
) {
625 e2fsck_read_bitmaps(wd
->ctx
);
627 ext2fs_unmark_block_bitmap(wd
->ctx
->block_found_map
, blk
);
628 ext2fs_block_alloc_stats(fs
, blk
, -1);
631 return BLOCK_CHANGED
;
636 dir
= wd
->outdir
->buf
+ (blockcnt
* fs
->blocksize
);
637 wd
->err
= ext2fs_write_dir_block(fs
, *block_nr
, dir
);
643 static errcode_t
write_directory(e2fsck_t ctx
, ext2_filsys fs
,
644 struct out_dir
*outdir
,
645 ext2_ino_t ino
, int compress
)
647 struct write_dir_struct wd
;
649 struct ext2_inode inode
;
651 retval
= e2fsck_expand_directory(ctx
, ino
, -1, outdir
->num
);
660 retval
= ext2fs_block_iterate2(fs
, ino
, 0, 0,
661 write_dir_block
, &wd
);
667 e2fsck_read_inode(ctx
, ino
, &inode
, "rehash_dir");
669 inode
.i_flags
&= ~EXT2_INDEX_FL
;
671 inode
.i_flags
|= EXT2_INDEX_FL
;
672 inode
.i_size
= outdir
->num
* fs
->blocksize
;
673 ext2fs_iblk_sub_blocks(fs
, &inode
, wd
.cleared
);
674 e2fsck_write_inode(ctx
, ino
, &inode
, "rehash_dir");
679 errcode_t
e2fsck_rehash_dir(e2fsck_t ctx
, ext2_ino_t ino
)
681 ext2_filsys fs
= ctx
->fs
;
683 struct ext2_inode inode
;
685 struct fill_dir_struct fd
;
686 struct out_dir outdir
;
688 outdir
.max
= outdir
.num
= 0;
691 e2fsck_read_inode(ctx
, ino
, &inode
, "rehash_dir");
695 dir_buf
= malloc(inode
.i_size
);
699 fd
.max_array
= inode
.i_size
/ 32;
701 fd
.harray
= malloc(fd
.max_array
* sizeof(struct hash_entry
));
711 if (!(fs
->super
->s_feature_compat
& EXT2_FEATURE_COMPAT_DIR_INDEX
) ||
712 (inode
.i_size
/ fs
->blocksize
) < 2)
716 /* Read in the entire directory into memory */
717 retval
= ext2fs_block_iterate2(fs
, ino
, 0, 0,
718 fill_dir_block
, &fd
);
725 printf("%d entries (%d bytes) found in inode %d\n",
726 fd
.num_array
, fd
.dir_size
, ino
);
732 qsort(fd
.harray
+2, fd
.num_array
-2,
733 sizeof(struct hash_entry
), ino_cmp
);
735 qsort(fd
.harray
, fd
.num_array
,
736 sizeof(struct hash_entry
), hash_cmp
);
739 * Look for duplicates
741 if (duplicate_search_and_fix(ctx
, fs
, ino
, &fd
))
744 if (ctx
->options
& E2F_OPT_NO
) {
750 * Copy the directory entries. In a htree directory these
751 * will become the leaf nodes.
753 retval
= copy_dir_entries(fs
, &fd
, &outdir
);
757 free(dir_buf
); dir_buf
= 0;
760 /* Calculate the interior nodes */
761 retval
= calculate_tree(fs
, &outdir
, ino
, fd
.parent
);
766 retval
= write_directory(ctx
, fs
, &outdir
, ino
, fd
.compress
);
776 free_out_dir(&outdir
);
780 void e2fsck_rehash_directories(e2fsck_t ctx
)
782 struct problem_context pctx
;
783 #ifdef RESOURCE_TRACK
784 struct resource_track rtrack
;
786 struct dir_info
*dir
;
787 ext2_u32_iterate iter
;
788 struct dir_info_iter
* dirinfo_iter
= 0;
791 int cur
, max
, all_dirs
, dir_index
, first
= 1;
793 #ifdef RESOURCE_TRACK
794 init_resource_track(&rtrack
, ctx
->fs
->io
);
797 all_dirs
= ctx
->options
& E2F_OPT_COMPRESS_DIRS
;
799 if (!ctx
->dirs_to_hash
&& !all_dirs
)
802 e2fsck_get_lost_and_found(ctx
, 0);
804 clear_problem_context(&pctx
);
806 dir_index
= ctx
->fs
->super
->s_feature_compat
& EXT2_FEATURE_COMPAT_DIR_INDEX
;
809 dirinfo_iter
= e2fsck_dir_info_iter_begin(ctx
);
810 max
= e2fsck_get_num_dirinfo(ctx
);
812 retval
= ext2fs_u32_list_iterate_begin(ctx
->dirs_to_hash
,
815 pctx
.errcode
= retval
;
816 fix_problem(ctx
, PR_3A_OPTIMIZE_ITER
, &pctx
);
819 max
= ext2fs_u32_list_count(ctx
->dirs_to_hash
);
823 if ((dir
= e2fsck_dir_info_iter(ctx
,
828 if (!ext2fs_u32_list_iterate(iter
, &ino
))
831 if (ino
== ctx
->lost_and_found
)
835 fix_problem(ctx
, PR_3A_PASS_HEADER
, &pctx
);
839 fix_problem(ctx
, PR_3A_OPTIMIZE_DIR
, &pctx
);
841 pctx
.errcode
= e2fsck_rehash_dir(ctx
, ino
);
843 end_problem_latch(ctx
, PR_LATCH_OPTIMIZE_DIR
);
844 fix_problem(ctx
, PR_3A_OPTIMIZE_DIR_ERR
, &pctx
);
846 if (ctx
->progress
&& !ctx
->progress_fd
)
847 e2fsck_simple_progress(ctx
, "Rebuilding directory",
848 100.0 * (float) (++cur
) / (float) max
, ino
);
850 end_problem_latch(ctx
, PR_LATCH_OPTIMIZE_DIR
);
852 e2fsck_dir_info_iter_end(ctx
, dirinfo_iter
);
854 ext2fs_u32_list_iterate_end(iter
);
856 if (ctx
->dirs_to_hash
)
857 ext2fs_u32_list_free(ctx
->dirs_to_hash
);
858 ctx
->dirs_to_hash
= 0;
860 #ifdef RESOURCE_TRACK
861 if (ctx
->options
& E2F_OPT_TIME2
) {
862 e2fsck_clear_progbar(ctx
);
863 print_resource_track("Pass 3A", &rtrack
, ctx
->fs
->io
);