From: Andreas Dilger Date: Mon, 30 Nov 2015 20:26:35 +0000 (-0500) Subject: e2fsck: fix e2fsck -fD directory truncation X-Git-Tag: v1.43-WIP-2016-03-15~41^2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=19961cd0003564c63c33ec14e69dfec6d81a2238;p=thirdparty%2Fe2fsprogs.git e2fsck: fix e2fsck -fD directory truncation When an extent-mapped directory is compacted by "e2fsck -fD" and frees enough leaf blocks that it loses an extent tree index block, the old e2fsck_rehash_dir->ext2fs_block_iterate3->write_dir_block() code would not free the extent block, which would result in the extent tree becoming corrupted when it is written out. Pass 1: Checking inodes, blocks, and sizes Inode 17825800, end of extent exceeds allowed value (logical block 710, physical block 570459684, len 1019) This results in loss of a whole index block of directory leaf blocks and maybe thousands or millions of files in lost+found. Fix e2fsck_rehash_dir() to call ext2fs_punch() to free the blocks at the end of the directory instead of trying to handle this itself while writing out the directory. That properly handles all of the cases of updating the extent tree as well as accounting for blocks that are released (both leaf blocks and index blocks). Add a test case for compacting the directory to be smaller than the index block that originally caused the corruption. Signed-off-by: Andreas Dilger Signed-off-by: Theodore Ts'o --- diff --git a/e2fsck/rehash.c b/e2fsck/rehash.c index 8ff488381..52d99a3cc 100644 --- a/e2fsck/rehash.c +++ b/e2fsck/rehash.c @@ -52,10 +52,13 @@ #include "e2fsck.h" #include "problem.h" +#undef REHASH_DEBUG + struct fill_dir_struct { char *buf; struct ext2_inode *inode; errcode_t err; + ext2_ino_t ino; e2fsck_t ctx; struct hash_entry *harray; int max_array, num_array; @@ -625,8 +628,8 @@ static errcode_t calculate_tree(ext2_filsys fs, struct write_dir_struct { struct out_dir *outdir; errcode_t err; + ext2_ino_t ino; e2fsck_t ctx; - blk64_t cleared; }; /* @@ -643,28 +646,35 @@ static int write_dir_block(ext2_filsys fs, blk64_t blk; char *dir; - if (*block_nr == 0) +#ifdef REHASH_DEBUG + printf("%u: write_dir_block %lld:%lld", wd->ino, blockcnt, *block_nr); +#endif + if (*block_nr == 0) { +#ifdef REHASH_DEBUG + printf(" - skip\n"); +#endif return 0; + } + /* Don't free blocks at the end of the directory, they will be + * truncated by the caller. */ if (blockcnt >= wd->outdir->num) { - e2fsck_read_bitmaps(wd->ctx); - blk = *block_nr; - /* - * In theory, we only release blocks from the end of the - * directory file, so it's fine to clobber a whole cluster at - * once. - */ - if (blk % EXT2FS_CLUSTER_RATIO(fs) == 0) { - ext2fs_block_alloc_stats2(fs, blk, -1); - wd->cleared++; - } - *block_nr = 0; - return BLOCK_CHANGED; +#ifdef REHASH_DEBUG + printf(" - not freed\n"); +#endif + return 0; } - if (blockcnt < 0) + if (blockcnt < 0) { +#ifdef REHASH_DEBUG + printf(" - skip\n"); +#endif return 0; + } dir = wd->outdir->buf + (blockcnt * fs->blocksize); wd->err = ext2fs_write_dir_block3(fs, *block_nr, dir, 0); +#ifdef REHASH_DEBUG + printf(" - write (%d)\n", wd->err); +#endif if (wd->err) return BLOCK_ABORT; return 0; @@ -684,10 +694,10 @@ static errcode_t write_directory(e2fsck_t ctx, ext2_filsys fs, wd.outdir = outdir; wd.err = 0; + wd.ino = ino; wd.ctx = ctx; - wd.cleared = 0; - retval = ext2fs_block_iterate3(fs, ino, 0, 0, + retval = ext2fs_block_iterate3(fs, ino, 0, NULL, write_dir_block, &wd); if (retval) return retval; @@ -699,14 +709,17 @@ static errcode_t write_directory(e2fsck_t ctx, ext2_filsys fs, inode.i_flags &= ~EXT2_INDEX_FL; else inode.i_flags |= EXT2_INDEX_FL; - retval = ext2fs_inode_size_set(fs, &inode, - outdir->num * fs->blocksize); +#ifdef REHASH_DEBUG + printf("%u: set inode size to %u blocks = %u bytes\n", + ino, outdir->num, outdir->num * fs->blocksize); +#endif + retval = ext2fs_inode_size_set(fs, &inode, (ext2_off64_t)outdir->num * + fs->blocksize); if (retval) return retval; - ext2fs_iblk_sub_blocks(fs, &inode, wd.cleared); - e2fsck_write_inode(ctx, ino, &inode, "rehash_dir"); - return 0; + /* ext2fs_punch() calls ext2fs_write_inode() which writes the size */ + return ext2fs_punch(fs, ino, &inode, NULL, outdir->num, ~0ULL); } errcode_t e2fsck_rehash_dir(e2fsck_t ctx, ext2_ino_t ino) @@ -715,32 +728,25 @@ errcode_t e2fsck_rehash_dir(e2fsck_t ctx, ext2_ino_t ino) errcode_t retval; struct ext2_inode inode; char *dir_buf = 0; - struct fill_dir_struct fd; - struct out_dir outdir; + struct fill_dir_struct fd = { NULL }; + struct out_dir outdir = { 0 }; - outdir.max = outdir.num = 0; - outdir.buf = 0; - outdir.hashes = 0; e2fsck_read_inode(ctx, ino, &inode, "rehash_dir"); retval = ENOMEM; - fd.harray = 0; dir_buf = malloc(inode.i_size); if (!dir_buf) goto errout; fd.max_array = inode.i_size / 32; - fd.num_array = 0; fd.harray = malloc(fd.max_array * sizeof(struct hash_entry)); if (!fd.harray) goto errout; + fd.ino = ino; fd.ctx = ctx; fd.buf = dir_buf; fd.inode = &inode; - fd.err = 0; - fd.dir_size = 0; - fd.compress = 0; if (!(fs->super->s_feature_compat & EXT2_FEATURE_COMPAT_DIR_INDEX) || (inode.i_size / fs->blocksize) < 2) fd.compress = 1; diff --git a/tests/f_extent_htree/expect.1 b/tests/f_extent_htree/expect.1 new file mode 100644 index 000000000..223ca6979 --- /dev/null +++ b/tests/f_extent_htree/expect.1 @@ -0,0 +1,29 @@ +Pass 1: Checking inodes, blocks, and sizes +Pass 2: Checking directory structure +Pass 3: Checking directory connectivity +Pass 3A: Optimizing directories +Pass 4: Checking reference counts +Pass 5: Checking group summary information + +test_filesys: ***** FILE SYSTEM WAS MODIFIED ***** + + 352 inodes used (41.12%, out of 856) + 0 non-contiguous files (0.0%) + 1 non-contiguous directory (0.3%) + # of inodes with ind/dind/tind blocks: 0/0/0 + Extent depth histogram: 342/1 + 586 blocks used (68.94%, out of 850) + 0 bad blocks + 0 large files + + 340 regular files + 3 directories + 0 character device files + 0 block device files + 0 fifos + 0 links + 0 symbolic links (0 fast symbolic links) + 0 sockets +------------ + 343 files +Exit status is 1 diff --git a/tests/f_extent_htree/expect.2 b/tests/f_extent_htree/expect.2 new file mode 100644 index 000000000..860b491e5 --- /dev/null +++ b/tests/f_extent_htree/expect.2 @@ -0,0 +1,7 @@ +Pass 1: Checking inodes, blocks, and sizes +Pass 2: Checking directory structure +Pass 3: Checking directory connectivity +Pass 4: Checking reference counts +Pass 5: Checking group summary information +test_filesys: 352/856 files (0.3% non-contiguous), 586/850 blocks +Exit status is 0 diff --git a/tests/f_extent_htree/image.gz b/tests/f_extent_htree/image.gz new file mode 100644 index 000000000..284207efb Binary files /dev/null and b/tests/f_extent_htree/image.gz differ diff --git a/tests/f_extent_htree/name b/tests/f_extent_htree/name new file mode 100644 index 000000000..fc3812d52 --- /dev/null +++ b/tests/f_extent_htree/name @@ -0,0 +1 @@ +htree extent compression diff --git a/tests/f_extent_htree/script b/tests/f_extent_htree/script new file mode 100644 index 000000000..60854c650 --- /dev/null +++ b/tests/f_extent_htree/script @@ -0,0 +1,69 @@ +#!/bin/bash + +FSCK_OPT="-fyvD" +. $cmd_dir/run_e2fsck + +exit $? +# This script depends on "mke2fs -d", which is only in master and not maint, +# to populate the file directory tree poorly (namely that there are no +# contiguous blocks in the directory leaf and the extent tree is large). + +# Once the "mke2fs -d" option is available on the "maint" branch, the +# above few lines should be deleted, along with the "image.gz" file. + +TMPDIR=${TMPDIR:-"/tmp"} +OUT=$test_name.log + +FSCK_OPT="-fyvD" +SKIP_GUNZIP="true" + +NAMELEN=250 +SRC=$TMPDIR/$test_name.tmp +SUB=subdir +BASE=$SRC/$SUB/$(yes | tr -d '\n' | dd bs=$NAMELEN count=1 2> /dev/null) +TMPFILE=${TMPFILE:-"$TMPDIR/image"} +BSIZE=1024 + +> $OUT +mkdir -p $SRC/$SUB +# calculate the number of files needed to create the directory extent tree +# deep enough to exceed the in-inode index and spill into an index block. +# +# dirents per block * extents per block * (index blocks > i_blocks) +NUM=$(((BSIZE / (NAMELEN + 8)) * (BSIZE / 12) * 2)) +# Create source files. Unfortunately hard links will be copied as links, +# and blocks with only NULs will be turned into holes. +if [ ! -f $BASE.1 ]; then + for N in $(seq $NUM); do + echo "foo" > $BASE.$N + done >> $OUT +fi + +# make filesystem with enough inodes and blocks to hold all the test files +> $TMPFILE +NUM=$((NUM * 5 / 3)) +echo "mke2fs -b $BSIZE -O dir_index,extent -d$SRC -N$NUM $TMPFILE $NUM" >> $OUT +$MKE2FS -b $BSIZE -O dir_index,extent -d$SRC -N$NUM $TMPFILE $NUM >> $OUT 2>&1 +rm -r $SRC + +# Run e2fsck to convert dir to htree before deleting the files, as mke2fs +# doesn't do this. Run second e2fsck to verify there is no corruption yet. +( + EXP1=$test_dir/expect.pre.1 + EXP2=$test_dir/expect.pre.2 + OUT1=$test_name.pre.1.log + OUT2=$test_name.pre.2.log + DESCRIPTION="$(cat $test_dir/name) setup" + . $cmd_dir/run_e2fsck +) + +# generate a list of filenames for debugfs to delete, one from each leaf block +DELETE_LIST=$TMPDIR/delete.$$ +$DEBUGFS -c -R "htree subdir" $TMPFILE 2>> $OUT | + grep -A2 "Reading directory block" | + awk '/yyyyy/ { print "rm '$SUB'/"$4 }' > $DELETE_LIST +$DEBUGFS -w -f $DELETE_LIST $TMPFILE >> $OUT 2>&1 +rm $DELETE_LIST +cp $TMPFILE $TMPFILE.sav + +. $cmd_dir/run_e2fsck diff --git a/tests/f_h_badnode/expect.1 b/tests/f_h_badnode/expect.1 index ce2adb3f4..95b1cee8a 100644 --- a/tests/f_h_badnode/expect.1 +++ b/tests/f_h_badnode/expect.1 @@ -14,5 +14,5 @@ Pass 4: Checking reference counts Pass 5: Checking group summary information test_filesys: ***** FILE SYSTEM WAS MODIFIED ***** -test_filesys: 47730/100192 files (0.0% non-contiguous), 13551/31745 blocks +test_filesys: 47730/100192 files (0.0% non-contiguous), 13550/31745 blocks Exit status is 1 diff --git a/tests/f_h_badnode/expect.2 b/tests/f_h_badnode/expect.2 index b9dadb739..65985d140 100644 --- a/tests/f_h_badnode/expect.2 +++ b/tests/f_h_badnode/expect.2 @@ -3,5 +3,5 @@ Pass 2: Checking directory structure Pass 3: Checking directory connectivity Pass 4: Checking reference counts Pass 5: Checking group summary information -test_filesys: 47730/100192 files (0.0% non-contiguous), 13551/31745 blocks +test_filesys: 47730/100192 files (0.0% non-contiguous), 13550/31745 blocks Exit status is 0