2 * journal.c --- code for handling the "ext3" journal
4 * Copyright (C) 2000 Andreas Dilger
5 * Copyright (C) 2000 Theodore Ts'o
7 * Parts of the code are based on fs/jfs/journal.c by Stephen C. Tweedie
8 * Copyright (C) 1999 Red Hat Software
10 * This file may be redistributed under the terms of the
11 * GNU General Public License version 2 or at your discretion
16 #ifdef HAVE_SYS_MOUNT_H
17 #include <sys/param.h>
18 #include <sys/mount.h>
19 #define MNT_FL (MS_MGC_VAL | MS_RDONLY)
21 #ifdef HAVE_SYS_STAT_H
25 #define E2FSCK_INCLUDE_INLINE_FUNCS
28 #include "uuid/uuid.h"
30 #ifdef CONFIG_JBD_DEBUG /* Enabled by configure --enable-jfs-debug */
31 static int bh_count
= 0;
35 * Define USE_INODE_IO to use the inode_io.c / fileio.c codepaths.
36 * This creates a larger static binary, and a smaller binary using
37 * shared libraries. It's also probably slightly less CPU-efficient,
38 * which is why it's not on by default. But, it's a good way of
39 * testing the functions in inode_io.c and fileio.c.
43 /* Checksumming functions */
44 static int e2fsck_journal_verify_csum_type(journal_t
*j
,
45 journal_superblock_t
*jsb
)
47 if (!journal_has_csum_v2or3(j
))
50 return jsb
->s_checksum_type
== JBD2_CRC32C_CHKSUM
;
53 static __u32
e2fsck_journal_sb_csum(journal_superblock_t
*jsb
)
57 old_crc
= jsb
->s_checksum
;
59 crc
= ext2fs_crc32c_le(~0, (unsigned char *)jsb
,
60 sizeof(journal_superblock_t
));
61 jsb
->s_checksum
= old_crc
;
66 static int e2fsck_journal_sb_csum_verify(journal_t
*j
,
67 journal_superblock_t
*jsb
)
69 __u32 provided
, calculated
;
71 if (!journal_has_csum_v2or3(j
))
74 provided
= ext2fs_be32_to_cpu(jsb
->s_checksum
);
75 calculated
= e2fsck_journal_sb_csum(jsb
);
77 return provided
== calculated
;
80 static errcode_t
e2fsck_journal_sb_csum_set(journal_t
*j
,
81 journal_superblock_t
*jsb
)
85 if (!journal_has_csum_v2or3(j
))
88 crc
= e2fsck_journal_sb_csum(jsb
);
89 jsb
->s_checksum
= ext2fs_cpu_to_be32(crc
);
93 /* Kernel compatibility functions for handling the journal. These allow us
94 * to use the recovery.c file virtually unchanged from the kernel, so we
95 * don't have to do much to keep kernel and user recovery in sync.
97 int journal_bmap(journal_t
*journal
, blk64_t block
, unsigned long long *phys
)
103 struct inode
*inode
= journal
->j_inode
;
112 retval
= ext2fs_bmap2(inode
->i_ctx
->fs
, inode
->i_ino
,
113 &inode
->i_ext2
, NULL
, 0, block
, 0, &pblk
);
119 struct buffer_head
*getblk(kdev_t kdev
, blk64_t blocknr
, int blocksize
)
121 struct buffer_head
*bh
;
122 int bufsize
= sizeof(*bh
) + kdev
->k_ctx
->fs
->blocksize
-
125 bh
= e2fsck_allocate_memory(kdev
->k_ctx
, bufsize
, "block buffer");
129 #ifdef CONFIG_JBD_DEBUG
130 if (journal_enable_debug
>= 3)
133 jfs_debug(4, "getblk for block %llu (%d bytes)(total %d)\n",
134 (unsigned long long) blocknr
, blocksize
, bh_count
);
136 bh
->b_ctx
= kdev
->k_ctx
;
137 if (kdev
->k_dev
== K_DEV_FS
)
138 bh
->b_io
= kdev
->k_ctx
->fs
->io
;
140 bh
->b_io
= kdev
->k_ctx
->journal_io
;
141 bh
->b_size
= blocksize
;
142 bh
->b_blocknr
= blocknr
;
147 int sync_blockdev(kdev_t kdev
)
151 if (kdev
->k_dev
== K_DEV_FS
)
152 io
= kdev
->k_ctx
->fs
->io
;
154 io
= kdev
->k_ctx
->journal_io
;
156 return io_channel_flush(io
) ? EIO
: 0;
159 void ll_rw_block(int rw
, int nr
, struct buffer_head
*bhp
[])
162 struct buffer_head
*bh
;
164 for (; nr
> 0; --nr
) {
166 if (rw
== READ
&& !bh
->b_uptodate
) {
167 jfs_debug(3, "reading block %llu/%p\n",
168 bh
->b_blocknr
, (void *) bh
);
169 retval
= io_channel_read_blk64(bh
->b_io
,
173 com_err(bh
->b_ctx
->device_name
, retval
,
174 "while reading block %llu\n",
176 bh
->b_err
= (int) retval
;
180 } else if (rw
== WRITE
&& bh
->b_dirty
) {
181 jfs_debug(3, "writing block %llu/%p\n",
184 retval
= io_channel_write_blk64(bh
->b_io
,
188 com_err(bh
->b_ctx
->device_name
, retval
,
189 "while writing block %llu\n",
191 bh
->b_err
= (int) retval
;
197 jfs_debug(3, "no-op %s for block %llu\n",
198 rw
== READ
? "read" : "write",
204 void mark_buffer_dirty(struct buffer_head
*bh
)
209 static void mark_buffer_clean(struct buffer_head
* bh
)
214 void brelse(struct buffer_head
*bh
)
217 ll_rw_block(WRITE
, 1, &bh
);
218 jfs_debug(3, "freeing block %llu/%p (total %d)\n",
219 bh
->b_blocknr
, (void *) bh
, --bh_count
);
220 ext2fs_free_mem(&bh
);
223 int buffer_uptodate(struct buffer_head
*bh
)
225 return bh
->b_uptodate
;
228 void mark_buffer_uptodate(struct buffer_head
*bh
, int val
)
230 bh
->b_uptodate
= val
;
233 void wait_on_buffer(struct buffer_head
*bh
)
236 ll_rw_block(READ
, 1, &bh
);
240 static void e2fsck_clear_recover(e2fsck_t ctx
, int error
)
242 ext2fs_clear_feature_journal_needs_recovery(ctx
->fs
->super
);
244 /* if we had an error doing journal recovery, we need a full fsck */
246 ctx
->fs
->super
->s_state
&= ~EXT2_VALID_FS
;
247 ext2fs_mark_super_dirty(ctx
->fs
);
251 * This is a helper function to check the validity of the journal.
253 struct process_block_struct
{
254 e2_blkcnt_t last_block
;
257 static int process_journal_block(ext2_filsys fs
,
259 e2_blkcnt_t blockcnt
,
260 blk64_t ref_block
EXT2FS_ATTR((unused
)),
261 int ref_offset
EXT2FS_ATTR((unused
)),
264 struct process_block_struct
*p
;
265 blk64_t blk
= *block_nr
;
267 p
= (struct process_block_struct
*) priv_data
;
269 if (!blk
|| blk
< fs
->super
->s_first_data_block
||
270 blk
>= ext2fs_blocks_count(fs
->super
))
274 p
->last_block
= blockcnt
;
278 static errcode_t
e2fsck_get_journal(e2fsck_t ctx
, journal_t
**ret_journal
)
280 struct process_block_struct pb
;
281 struct ext2_super_block
*sb
= ctx
->fs
->super
;
282 struct ext2_super_block jsuper
;
283 struct problem_context pctx
;
284 struct buffer_head
*bh
;
285 struct inode
*j_inode
= NULL
;
286 struct kdev_s
*dev_fs
= NULL
, *dev_journal
;
287 const char *journal_name
= 0;
288 journal_t
*journal
= NULL
;
289 errcode_t retval
= 0;
290 io_manager io_ptr
= 0;
291 unsigned long long start
= 0;
293 int tried_backup_jnl
= 0;
295 clear_problem_context(&pctx
);
297 journal
= e2fsck_allocate_memory(ctx
, sizeof(journal_t
), "journal");
299 return EXT2_ET_NO_MEMORY
;
302 dev_fs
= e2fsck_allocate_memory(ctx
, 2*sizeof(struct kdev_s
), "kdev");
304 retval
= EXT2_ET_NO_MEMORY
;
307 dev_journal
= dev_fs
+1;
309 dev_fs
->k_ctx
= dev_journal
->k_ctx
= ctx
;
310 dev_fs
->k_dev
= K_DEV_FS
;
311 dev_journal
->k_dev
= K_DEV_JOURNAL
;
313 journal
->j_dev
= dev_journal
;
314 journal
->j_fs_dev
= dev_fs
;
315 journal
->j_inode
= NULL
;
316 journal
->j_blocksize
= ctx
->fs
->blocksize
;
318 if (uuid_is_null(sb
->s_journal_uuid
)) {
319 if (!sb
->s_journal_inum
) {
320 retval
= EXT2_ET_BAD_INODE_NUM
;
323 j_inode
= e2fsck_allocate_memory(ctx
, sizeof(*j_inode
),
326 retval
= EXT2_ET_NO_MEMORY
;
330 j_inode
->i_ctx
= ctx
;
331 j_inode
->i_ino
= sb
->s_journal_inum
;
333 if ((retval
= ext2fs_read_inode(ctx
->fs
,
335 &j_inode
->i_ext2
))) {
337 if (sb
->s_jnl_backup_type
!= EXT3_JNL_BACKUP_BLOCKS
||
340 memset(&j_inode
->i_ext2
, 0, sizeof(struct ext2_inode
));
341 memcpy(&j_inode
->i_ext2
.i_block
[0], sb
->s_jnl_blocks
,
343 j_inode
->i_ext2
.i_size_high
= sb
->s_jnl_blocks
[15];
344 j_inode
->i_ext2
.i_size
= sb
->s_jnl_blocks
[16];
345 j_inode
->i_ext2
.i_links_count
= 1;
346 j_inode
->i_ext2
.i_mode
= LINUX_S_IFREG
| 0600;
347 e2fsck_use_inode_shortcuts(ctx
, 1);
348 ctx
->stashed_ino
= j_inode
->i_ino
;
349 ctx
->stashed_inode
= &j_inode
->i_ext2
;
352 if (!j_inode
->i_ext2
.i_links_count
||
353 !LINUX_S_ISREG(j_inode
->i_ext2
.i_mode
)) {
354 retval
= EXT2_ET_NO_JOURNAL
;
355 goto try_backup_journal
;
357 if (EXT2_I_SIZE(&j_inode
->i_ext2
) / journal
->j_blocksize
<
358 JFS_MIN_JOURNAL_BLOCKS
) {
359 retval
= EXT2_ET_JOURNAL_TOO_SMALL
;
360 goto try_backup_journal
;
363 retval
= ext2fs_block_iterate3(ctx
->fs
, j_inode
->i_ino
,
365 process_journal_block
, &pb
);
366 if ((pb
.last_block
+ 1) * ctx
->fs
->blocksize
<
367 (int) EXT2_I_SIZE(&j_inode
->i_ext2
)) {
368 retval
= EXT2_ET_JOURNAL_TOO_SMALL
;
369 goto try_backup_journal
;
371 if (tried_backup_jnl
&& !(ctx
->options
& E2F_OPT_READONLY
)) {
372 retval
= ext2fs_write_inode(ctx
->fs
, sb
->s_journal_inum
,
378 journal
->j_maxlen
= EXT2_I_SIZE(&j_inode
->i_ext2
) /
379 journal
->j_blocksize
;
382 retval
= ext2fs_inode_io_intern2(ctx
->fs
, sb
->s_journal_inum
,
388 io_ptr
= inode_io_manager
;
390 journal
->j_inode
= j_inode
;
391 ctx
->journal_io
= ctx
->fs
->io
;
392 if ((retval
= (errcode_t
) journal_bmap(journal
, 0, &start
)) != 0)
397 if (!ctx
->journal_name
) {
400 uuid_unparse(sb
->s_journal_uuid
, uuid
);
401 ctx
->journal_name
= blkid_get_devname(ctx
->blkid
,
403 if (!ctx
->journal_name
)
404 ctx
->journal_name
= blkid_devno_to_devname(sb
->s_journal_dev
);
406 journal_name
= ctx
->journal_name
;
409 fix_problem(ctx
, PR_0_CANT_FIND_JOURNAL
, &pctx
);
410 retval
= EXT2_ET_LOAD_EXT_JOURNAL
;
414 jfs_debug(1, "Using journal file %s\n", journal_name
);
415 io_ptr
= unix_io_manager
;
419 test_io_backing_manager
= io_ptr
;
420 io_ptr
= test_io_manager
;
426 int flags
= IO_FLAG_RW
;
427 if (!(ctx
->mount_flags
& EXT2_MF_ISROOT
&&
428 ctx
->mount_flags
& EXT2_MF_READONLY
))
429 flags
|= IO_FLAG_EXCLUSIVE
;
430 if ((ctx
->mount_flags
& EXT2_MF_READONLY
) &&
431 (ctx
->options
& E2F_OPT_FORCE
))
432 flags
&= ~IO_FLAG_EXCLUSIVE
;
435 retval
= io_ptr
->open(journal_name
, flags
,
441 io_channel_set_blksize(ctx
->journal_io
, ctx
->fs
->blocksize
);
446 start
= ext2fs_journal_sb_start(ctx
->fs
->blocksize
) - 1;
447 bh
= getblk(dev_journal
, start
, ctx
->fs
->blocksize
);
449 retval
= EXT2_ET_NO_MEMORY
;
452 ll_rw_block(READ
, 1, &bh
);
453 if ((retval
= bh
->b_err
) != 0) {
457 memcpy(&jsuper
, start
? bh
->b_data
: bh
->b_data
+ SUPERBLOCK_OFFSET
,
459 #ifdef WORDS_BIGENDIAN
460 if (jsuper
.s_magic
== ext2fs_swab16(EXT2_SUPER_MAGIC
))
461 ext2fs_swap_super(&jsuper
);
463 if (jsuper
.s_magic
!= EXT2_SUPER_MAGIC
||
464 !ext2fs_has_feature_journal_dev(&jsuper
)) {
465 fix_problem(ctx
, PR_0_EXT_JOURNAL_BAD_SUPER
, &pctx
);
466 retval
= EXT2_ET_LOAD_EXT_JOURNAL
;
470 /* Make sure the journal UUID is correct */
471 if (memcmp(jsuper
.s_uuid
, ctx
->fs
->super
->s_journal_uuid
,
472 sizeof(jsuper
.s_uuid
))) {
473 fix_problem(ctx
, PR_0_JOURNAL_BAD_UUID
, &pctx
);
474 retval
= EXT2_ET_LOAD_EXT_JOURNAL
;
479 /* Check the superblock checksum */
480 if (ext2fs_has_feature_metadata_csum(&jsuper
)) {
481 struct struct_ext2_filsys fsx
;
482 struct ext2_super_block superx
;
485 p
= start
? bh
->b_data
: bh
->b_data
+ SUPERBLOCK_OFFSET
;
486 memcpy(&fsx
, ctx
->fs
, sizeof(fsx
));
487 memcpy(&superx
, ctx
->fs
->super
, sizeof(superx
));
489 ext2fs_set_feature_metadata_csum(fsx
.super
);
490 if (!ext2fs_superblock_csum_verify(&fsx
, p
) &&
491 fix_problem(ctx
, PR_0_EXT_JOURNAL_SUPER_CSUM_INVALID
,
493 ext2fs_superblock_csum_set(&fsx
, p
);
494 mark_buffer_dirty(bh
);
499 maxlen
= ext2fs_blocks_count(&jsuper
);
500 journal
->j_maxlen
= (maxlen
< 1ULL << 32) ? maxlen
: (1ULL << 32) - 1;
504 if (!(bh
= getblk(dev_journal
, start
, journal
->j_blocksize
))) {
505 retval
= EXT2_ET_NO_MEMORY
;
509 journal
->j_sb_buffer
= bh
;
510 journal
->j_superblock
= (journal_superblock_t
*)bh
->b_data
;
514 ext2fs_free_mem(&j_inode
);
517 *ret_journal
= journal
;
518 e2fsck_use_inode_shortcuts(ctx
, 0);
522 e2fsck_use_inode_shortcuts(ctx
, 0);
524 ext2fs_free_mem(&dev_fs
);
526 ext2fs_free_mem(&j_inode
);
528 ext2fs_free_mem(&journal
);
532 static errcode_t
e2fsck_journal_fix_bad_inode(e2fsck_t ctx
,
533 struct problem_context
*pctx
)
535 struct ext2_super_block
*sb
= ctx
->fs
->super
;
536 int recover
= ext2fs_has_feature_journal_needs_recovery(ctx
->fs
->super
);
537 int has_journal
= ext2fs_has_feature_journal(ctx
->fs
->super
);
539 if (has_journal
|| sb
->s_journal_inum
) {
540 /* The journal inode is bogus, remove and force full fsck */
541 pctx
->ino
= sb
->s_journal_inum
;
542 if (fix_problem(ctx
, PR_0_JOURNAL_BAD_INODE
, pctx
)) {
543 if (has_journal
&& sb
->s_journal_inum
)
544 printf("*** journal has been deleted ***\n\n");
545 ext2fs_clear_feature_journal(sb
);
546 sb
->s_journal_inum
= 0;
547 memset(sb
->s_jnl_blocks
, 0, sizeof(sb
->s_jnl_blocks
));
548 ctx
->flags
|= E2F_FLAG_JOURNAL_INODE
;
549 ctx
->fs
->flags
&= ~EXT2_FLAG_MASTER_SB_ONLY
;
550 e2fsck_clear_recover(ctx
, 1);
553 return EXT2_ET_CORRUPT_JOURNAL_SB
;
554 } else if (recover
) {
555 if (fix_problem(ctx
, PR_0_JOURNAL_RECOVER_SET
, pctx
)) {
556 e2fsck_clear_recover(ctx
, 1);
559 return EXT2_ET_UNSUPP_FEATURE
;
564 #define V1_SB_SIZE 0x0024
565 static void clear_v2_journal_fields(journal_t
*journal
)
567 e2fsck_t ctx
= journal
->j_dev
->k_ctx
;
568 struct problem_context pctx
;
570 clear_problem_context(&pctx
);
572 if (!fix_problem(ctx
, PR_0_CLEAR_V2_JOURNAL
, &pctx
))
575 ctx
->flags
|= E2F_FLAG_PROBLEMS_FIXED
;
576 memset(((char *) journal
->j_superblock
) + V1_SB_SIZE
, 0,
577 ctx
->fs
->blocksize
-V1_SB_SIZE
);
578 mark_buffer_dirty(journal
->j_sb_buffer
);
582 static errcode_t
e2fsck_journal_load(journal_t
*journal
)
584 e2fsck_t ctx
= journal
->j_dev
->k_ctx
;
585 journal_superblock_t
*jsb
;
586 struct buffer_head
*jbh
= journal
->j_sb_buffer
;
587 struct problem_context pctx
;
589 clear_problem_context(&pctx
);
591 ll_rw_block(READ
, 1, &jbh
);
593 com_err(ctx
->device_name
, jbh
->b_err
, "%s",
594 _("reading journal superblock\n"));
598 jsb
= journal
->j_superblock
;
599 /* If we don't even have JFS_MAGIC, we probably have a wrong inode */
600 if (jsb
->s_header
.h_magic
!= htonl(JFS_MAGIC_NUMBER
))
601 return e2fsck_journal_fix_bad_inode(ctx
, &pctx
);
603 switch (ntohl(jsb
->s_header
.h_blocktype
)) {
604 case JFS_SUPERBLOCK_V1
:
605 journal
->j_format_version
= 1;
606 if (jsb
->s_feature_compat
||
607 jsb
->s_feature_incompat
||
608 jsb
->s_feature_ro_compat
||
610 clear_v2_journal_fields(journal
);
613 case JFS_SUPERBLOCK_V2
:
614 journal
->j_format_version
= 2;
615 if (ntohl(jsb
->s_nr_users
) > 1 &&
616 uuid_is_null(ctx
->fs
->super
->s_journal_uuid
))
617 clear_v2_journal_fields(journal
);
618 if (ntohl(jsb
->s_nr_users
) > 1) {
619 fix_problem(ctx
, PR_0_JOURNAL_UNSUPP_MULTIFS
, &pctx
);
620 return EXT2_ET_JOURNAL_UNSUPP_VERSION
;
625 * These should never appear in a journal super block, so if
626 * they do, the journal is badly corrupted.
628 case JFS_DESCRIPTOR_BLOCK
:
629 case JFS_COMMIT_BLOCK
:
630 case JFS_REVOKE_BLOCK
:
631 return EXT2_ET_CORRUPT_JOURNAL_SB
;
633 /* If we don't understand the superblock major type, but there
634 * is a magic number, then it is likely to be a new format we
635 * just don't understand, so leave it alone. */
637 return EXT2_ET_JOURNAL_UNSUPP_VERSION
;
640 if (JFS_HAS_INCOMPAT_FEATURE(journal
, ~JFS_KNOWN_INCOMPAT_FEATURES
))
641 return EXT2_ET_UNSUPP_FEATURE
;
643 if (JFS_HAS_RO_COMPAT_FEATURE(journal
, ~JFS_KNOWN_ROCOMPAT_FEATURES
))
644 return EXT2_ET_RO_UNSUPP_FEATURE
;
646 /* Checksum v1-3 are mutually exclusive features. */
647 if (jfs_has_feature_csum2(journal
) && jfs_has_feature_csum3(journal
))
648 return EXT2_ET_CORRUPT_JOURNAL_SB
;
650 if (journal_has_csum_v2or3(journal
) &&
651 jfs_has_feature_checksum(journal
))
652 return EXT2_ET_CORRUPT_JOURNAL_SB
;
654 if (!e2fsck_journal_verify_csum_type(journal
, jsb
) ||
655 !e2fsck_journal_sb_csum_verify(journal
, jsb
))
656 return EXT2_ET_CORRUPT_JOURNAL_SB
;
658 if (journal_has_csum_v2or3(journal
))
659 journal
->j_csum_seed
= jbd2_chksum(journal
, ~0, jsb
->s_uuid
,
660 sizeof(jsb
->s_uuid
));
662 /* We have now checked whether we know enough about the journal
663 * format to be able to proceed safely, so any other checks that
664 * fail we should attempt to recover from. */
665 if (jsb
->s_blocksize
!= htonl(journal
->j_blocksize
)) {
666 com_err(ctx
->program_name
, EXT2_ET_CORRUPT_JOURNAL_SB
,
667 _("%s: no valid journal superblock found\n"),
669 return EXT2_ET_CORRUPT_JOURNAL_SB
;
672 if (ntohl(jsb
->s_maxlen
) < journal
->j_maxlen
)
673 journal
->j_maxlen
= ntohl(jsb
->s_maxlen
);
674 else if (ntohl(jsb
->s_maxlen
) > journal
->j_maxlen
) {
675 com_err(ctx
->program_name
, EXT2_ET_CORRUPT_JOURNAL_SB
,
676 _("%s: journal too short\n"),
678 return EXT2_ET_CORRUPT_JOURNAL_SB
;
681 journal
->j_tail_sequence
= ntohl(jsb
->s_sequence
);
682 journal
->j_transaction_sequence
= journal
->j_tail_sequence
;
683 journal
->j_tail
= ntohl(jsb
->s_start
);
684 journal
->j_first
= ntohl(jsb
->s_first
);
685 journal
->j_last
= ntohl(jsb
->s_maxlen
);
690 static void e2fsck_journal_reset_super(e2fsck_t ctx
, journal_superblock_t
*jsb
,
701 /* Leave a valid existing V1 superblock signature alone.
702 * Anything unrecognisable we overwrite with a new V2
705 if (jsb
->s_header
.h_magic
!= htonl(JFS_MAGIC_NUMBER
) ||
706 jsb
->s_header
.h_blocktype
!= htonl(JFS_SUPERBLOCK_V1
)) {
707 jsb
->s_header
.h_magic
= htonl(JFS_MAGIC_NUMBER
);
708 jsb
->s_header
.h_blocktype
= htonl(JFS_SUPERBLOCK_V2
);
711 /* Zero out everything else beyond the superblock header */
713 p
= ((char *) jsb
) + sizeof(journal_header_t
);
714 memset (p
, 0, ctx
->fs
->blocksize
-sizeof(journal_header_t
));
716 jsb
->s_blocksize
= htonl(ctx
->fs
->blocksize
);
717 jsb
->s_maxlen
= htonl(journal
->j_maxlen
);
718 jsb
->s_first
= htonl(1);
720 /* Initialize the journal sequence number so that there is "no"
721 * chance we will find old "valid" transactions in the journal.
722 * This avoids the need to zero the whole journal (slow to do,
723 * and risky when we are just recovering the filesystem).
725 uuid_generate(u
.uuid
);
726 for (i
= 0; i
< 4; i
++)
728 jsb
->s_sequence
= htonl(new_seq
);
729 e2fsck_journal_sb_csum_set(journal
, jsb
);
731 mark_buffer_dirty(journal
->j_sb_buffer
);
732 ll_rw_block(WRITE
, 1, &journal
->j_sb_buffer
);
735 static errcode_t
e2fsck_journal_fix_corrupt_super(e2fsck_t ctx
,
737 struct problem_context
*pctx
)
739 struct ext2_super_block
*sb
= ctx
->fs
->super
;
740 int recover
= ext2fs_has_feature_journal_needs_recovery(ctx
->fs
->super
);
742 if (ext2fs_has_feature_journal(sb
)) {
743 if (fix_problem(ctx
, PR_0_JOURNAL_BAD_SUPER
, pctx
)) {
744 e2fsck_journal_reset_super(ctx
, journal
->j_superblock
,
746 journal
->j_transaction_sequence
= 1;
747 e2fsck_clear_recover(ctx
, recover
);
750 return EXT2_ET_CORRUPT_JOURNAL_SB
;
751 } else if (e2fsck_journal_fix_bad_inode(ctx
, pctx
))
752 return EXT2_ET_CORRUPT_JOURNAL_SB
;
757 static void e2fsck_journal_release(e2fsck_t ctx
, journal_t
*journal
,
760 journal_superblock_t
*jsb
;
763 mark_buffer_clean(journal
->j_sb_buffer
);
764 else if (!(ctx
->options
& E2F_OPT_READONLY
)) {
765 jsb
= journal
->j_superblock
;
766 jsb
->s_sequence
= htonl(journal
->j_tail_sequence
);
768 jsb
->s_start
= 0; /* this marks the journal as empty */
769 e2fsck_journal_sb_csum_set(journal
, jsb
);
770 mark_buffer_dirty(journal
->j_sb_buffer
);
772 brelse(journal
->j_sb_buffer
);
774 if (ctx
->journal_io
) {
775 if (ctx
->fs
&& ctx
->fs
->io
!= ctx
->journal_io
)
776 io_channel_close(ctx
->journal_io
);
781 if (journal
->j_inode
)
782 ext2fs_free_mem(&journal
->j_inode
);
784 if (journal
->j_fs_dev
)
785 ext2fs_free_mem(&journal
->j_fs_dev
);
786 ext2fs_free_mem(&journal
);
790 * This function makes sure that the superblock fields regarding the
791 * journal are consistent.
793 errcode_t
e2fsck_check_ext3_journal(e2fsck_t ctx
)
795 struct ext2_super_block
*sb
= ctx
->fs
->super
;
797 int recover
= ext2fs_has_feature_journal_needs_recovery(ctx
->fs
->super
);
798 struct problem_context pctx
;
800 int reset
= 0, force_fsck
= 0;
803 /* If we don't have any journal features, don't do anything more */
804 if (!ext2fs_has_feature_journal(sb
) &&
805 !recover
&& sb
->s_journal_inum
== 0 && sb
->s_journal_dev
== 0 &&
806 uuid_is_null(sb
->s_journal_uuid
))
809 clear_problem_context(&pctx
);
810 pctx
.num
= sb
->s_journal_inum
;
812 retval
= e2fsck_get_journal(ctx
, &journal
);
814 if ((retval
== EXT2_ET_BAD_INODE_NUM
) ||
815 (retval
== EXT2_ET_BAD_BLOCK_NUM
) ||
816 (retval
== EXT2_ET_JOURNAL_TOO_SMALL
) ||
817 (retval
== EXT2_ET_NO_JOURNAL
))
818 return e2fsck_journal_fix_bad_inode(ctx
, &pctx
);
822 retval
= e2fsck_journal_load(journal
);
824 if ((retval
== EXT2_ET_CORRUPT_JOURNAL_SB
) ||
825 ((retval
== EXT2_ET_UNSUPP_FEATURE
) &&
826 (!fix_problem(ctx
, PR_0_JOURNAL_UNSUPP_INCOMPAT
,
828 ((retval
== EXT2_ET_RO_UNSUPP_FEATURE
) &&
829 (!fix_problem(ctx
, PR_0_JOURNAL_UNSUPP_ROCOMPAT
,
831 ((retval
== EXT2_ET_JOURNAL_UNSUPP_VERSION
) &&
832 (!fix_problem(ctx
, PR_0_JOURNAL_UNSUPP_VERSION
, &pctx
))))
833 retval
= e2fsck_journal_fix_corrupt_super(ctx
, journal
,
835 e2fsck_journal_release(ctx
, journal
, 0, 1);
840 * We want to make the flags consistent here. We will not leave with
841 * needs_recovery set but has_journal clear. We can't get in a loop
842 * with -y, -n, or -p, only if a user isn't making up their mind.
845 if (!ext2fs_has_feature_journal(sb
)) {
846 recover
= ext2fs_has_feature_journal_needs_recovery(sb
);
847 if (fix_problem(ctx
, PR_0_JOURNAL_HAS_JOURNAL
, &pctx
)) {
849 !fix_problem(ctx
, PR_0_JOURNAL_RECOVER_SET
, &pctx
))
852 * Need a full fsck if we are releasing a
853 * journal stored on a reserved inode.
855 force_fsck
= recover
||
856 (sb
->s_journal_inum
< EXT2_FIRST_INODE(sb
));
857 /* Clear all of the journal fields */
858 sb
->s_journal_inum
= 0;
859 sb
->s_journal_dev
= 0;
860 memset(sb
->s_journal_uuid
, 0,
861 sizeof(sb
->s_journal_uuid
));
862 e2fsck_clear_recover(ctx
, force_fsck
);
863 } else if (!(ctx
->options
& E2F_OPT_READONLY
)) {
864 ext2fs_set_feature_journal(sb
);
865 ctx
->fs
->flags
&= ~EXT2_FLAG_MASTER_SB_ONLY
;
866 ext2fs_mark_super_dirty(ctx
->fs
);
870 if (ext2fs_has_feature_journal(sb
) &&
871 !ext2fs_has_feature_journal_needs_recovery(sb
) &&
872 journal
->j_superblock
->s_start
!= 0) {
873 /* Print status information */
874 fix_problem(ctx
, PR_0_JOURNAL_RECOVERY_CLEAR
, &pctx
);
876 problem
= PR_0_JOURNAL_RUN_DEFAULT
;
878 problem
= PR_0_JOURNAL_RUN
;
879 if (fix_problem(ctx
, problem
, &pctx
)) {
880 ctx
->options
|= E2F_OPT_FORCE
;
881 ext2fs_set_feature_journal_needs_recovery(sb
);
882 ext2fs_mark_super_dirty(ctx
->fs
);
883 } else if (fix_problem(ctx
,
884 PR_0_JOURNAL_RESET_JOURNAL
, &pctx
)) {
886 sb
->s_state
&= ~EXT2_VALID_FS
;
887 ext2fs_mark_super_dirty(ctx
->fs
);
890 * If the user answers no to the above question, we
891 * ignore the fact that journal apparently has data;
892 * accidentally replaying over valid data would be far
893 * worse than skipping a questionable recovery.
895 * XXX should we abort with a fatal error here? What
896 * will the ext3 kernel code do if a filesystem with
897 * !NEEDS_RECOVERY but with a non-zero
898 * journal->j_superblock->s_start is mounted?
903 * If we don't need to do replay the journal, check to see if
904 * the journal's errno is set; if so, we need to mark the file
905 * system as being corrupt and clear the journal's s_errno.
907 if (!ext2fs_has_feature_journal_needs_recovery(sb
) &&
908 journal
->j_superblock
->s_errno
) {
909 ctx
->fs
->super
->s_state
|= EXT2_ERROR_FS
;
910 ext2fs_mark_super_dirty(ctx
->fs
);
911 journal
->j_superblock
->s_errno
= 0;
912 e2fsck_journal_sb_csum_set(journal
, journal
->j_superblock
);
913 mark_buffer_dirty(journal
->j_sb_buffer
);
916 e2fsck_journal_release(ctx
, journal
, reset
, 0);
920 static errcode_t
recover_ext3_journal(e2fsck_t ctx
)
922 struct problem_context pctx
;
926 clear_problem_context(&pctx
);
928 journal_init_revoke_caches();
929 retval
= e2fsck_get_journal(ctx
, &journal
);
933 retval
= e2fsck_journal_load(journal
);
937 retval
= journal_init_revoke(journal
, 1024);
941 retval
= -journal_recover(journal
);
945 if (journal
->j_failed_commit
) {
946 pctx
.ino
= journal
->j_failed_commit
;
947 fix_problem(ctx
, PR_0_JNL_TXN_CORRUPT
, &pctx
);
948 journal
->j_superblock
->s_errno
= -EINVAL
;
949 mark_buffer_dirty(journal
->j_sb_buffer
);
952 journal
->j_tail_sequence
= journal
->j_transaction_sequence
;
955 journal_destroy_revoke(journal
);
956 journal_destroy_revoke_caches();
957 e2fsck_journal_release(ctx
, journal
, 1, 0);
961 errcode_t
e2fsck_run_ext3_journal(e2fsck_t ctx
)
963 io_manager io_ptr
= ctx
->fs
->io
->manager
;
964 int blocksize
= ctx
->fs
->blocksize
;
965 errcode_t retval
, recover_retval
;
967 unsigned long long kbytes_written
= 0;
969 printf(_("%s: recovering journal\n"), ctx
->device_name
);
970 if (ctx
->options
& E2F_OPT_READONLY
) {
971 printf(_("%s: won't do journal recovery while read-only\n"),
973 return EXT2_ET_FILE_RO
;
976 if (ctx
->fs
->flags
& EXT2_FLAG_DIRTY
)
977 ext2fs_flush(ctx
->fs
); /* Force out any modifications */
979 recover_retval
= recover_ext3_journal(ctx
);
982 * Reload the filesystem context to get up-to-date data from disk
983 * because journal recovery will change the filesystem under us.
985 if (ctx
->fs
->super
->s_kbytes_written
&&
986 ctx
->fs
->io
->manager
->get_stats
)
987 ctx
->fs
->io
->manager
->get_stats(ctx
->fs
->io
, &stats
);
988 if (stats
&& stats
->bytes_written
)
989 kbytes_written
= stats
->bytes_written
>> 10;
991 ext2fs_mmp_stop(ctx
->fs
);
992 ext2fs_free(ctx
->fs
);
993 retval
= ext2fs_open(ctx
->filesystem_name
, ctx
->openfs_flags
,
994 ctx
->superblock
, blocksize
, io_ptr
,
997 com_err(ctx
->program_name
, retval
,
998 _("while trying to re-open %s"),
1000 fatal_error(ctx
, 0);
1002 ctx
->fs
->priv_data
= ctx
;
1003 ctx
->fs
->now
= ctx
->now
;
1004 ctx
->fs
->flags
|= EXT2_FLAG_MASTER_SB_ONLY
;
1005 ctx
->fs
->super
->s_kbytes_written
+= kbytes_written
;
1007 /* Set the superblock flags */
1008 e2fsck_clear_recover(ctx
, recover_retval
!= 0);
1011 * Do one last sanity check, and propagate journal->s_errno to
1012 * the EXT2_ERROR_FS flag in the fs superblock if needed.
1014 retval
= e2fsck_check_ext3_journal(ctx
);
1015 return retval
? retval
: recover_retval
;
1019 * This function will move the journal inode from a visible file in
1020 * the filesystem directory hierarchy to the reserved inode if necessary.
1022 static const char * const journal_names
[] = {
1023 ".journal", "journal", ".journal.dat", "journal.dat", 0 };
1025 void e2fsck_move_ext3_journal(e2fsck_t ctx
)
1027 struct ext2_super_block
*sb
= ctx
->fs
->super
;
1028 struct problem_context pctx
;
1029 struct ext2_inode inode
;
1030 ext2_filsys fs
= ctx
->fs
;
1033 const char * const * cpp
;
1037 clear_problem_context(&pctx
);
1040 * If the filesystem is opened read-only, or there is no
1041 * journal, then do nothing.
1043 if ((ctx
->options
& E2F_OPT_READONLY
) ||
1044 (sb
->s_journal_inum
== 0) ||
1045 !ext2fs_has_feature_journal(sb
))
1049 * Read in the journal inode
1051 if (ext2fs_read_inode(fs
, sb
->s_journal_inum
, &inode
) != 0)
1055 * If it's necessary to backup the journal inode, do so.
1057 if ((sb
->s_jnl_backup_type
== 0) ||
1058 ((sb
->s_jnl_backup_type
== EXT3_JNL_BACKUP_BLOCKS
) &&
1059 memcmp(inode
.i_block
, sb
->s_jnl_blocks
, EXT2_N_BLOCKS
*4))) {
1060 if (fix_problem(ctx
, PR_0_BACKUP_JNL
, &pctx
)) {
1061 memcpy(sb
->s_jnl_blocks
, inode
.i_block
,
1063 sb
->s_jnl_blocks
[15] = inode
.i_size_high
;
1064 sb
->s_jnl_blocks
[16] = inode
.i_size
;
1065 sb
->s_jnl_backup_type
= EXT3_JNL_BACKUP_BLOCKS
;
1066 ext2fs_mark_super_dirty(fs
);
1067 fs
->flags
&= ~EXT2_FLAG_MASTER_SB_ONLY
;
1072 * If the journal is already the hidden inode, then do nothing
1074 if (sb
->s_journal_inum
== EXT2_JOURNAL_INO
)
1078 * The journal inode had better have only one link and not be readable.
1080 if (inode
.i_links_count
!= 1)
1084 * If the filesystem is mounted, or we can't tell whether
1085 * or not it's mounted, do nothing.
1087 retval
= ext2fs_check_if_mounted(ctx
->filesystem_name
, &mount_flags
);
1088 if (retval
|| (mount_flags
& EXT2_MF_MOUNTED
))
1092 * If we can't find the name of the journal inode, then do
1095 for (cpp
= journal_names
; *cpp
; cpp
++) {
1096 retval
= ext2fs_lookup(fs
, EXT2_ROOT_INO
, *cpp
,
1097 strlen(*cpp
), 0, &ino
);
1098 if ((retval
== 0) && (ino
== sb
->s_journal_inum
))
1104 /* We need the inode bitmap to be loaded */
1105 retval
= ext2fs_read_bitmaps(fs
);
1110 if (!fix_problem(ctx
, PR_0_MOVE_JOURNAL
, &pctx
))
1114 * OK, we've done all the checks, let's actually move the
1115 * journal inode. Errors at this point mean we need to force
1116 * an ext2 filesystem check.
1118 if ((retval
= ext2fs_unlink(fs
, EXT2_ROOT_INO
, *cpp
, ino
, 0)) != 0)
1120 if ((retval
= ext2fs_write_inode(fs
, EXT2_JOURNAL_INO
, &inode
)) != 0)
1122 sb
->s_journal_inum
= EXT2_JOURNAL_INO
;
1123 ext2fs_mark_super_dirty(fs
);
1124 fs
->flags
&= ~EXT2_FLAG_MASTER_SB_ONLY
;
1125 inode
.i_links_count
= 0;
1126 inode
.i_dtime
= ctx
->now
;
1127 if ((retval
= ext2fs_write_inode(fs
, ino
, &inode
)) != 0)
1130 group
= ext2fs_group_of_ino(fs
, ino
);
1131 ext2fs_unmark_inode_bitmap2(fs
->inode_map
, ino
);
1132 ext2fs_mark_ib_dirty(fs
);
1133 ext2fs_bg_free_inodes_count_set(fs
, group
, ext2fs_bg_free_inodes_count(fs
, group
) + 1);
1134 ext2fs_group_desc_csum_set(fs
, group
);
1135 fs
->super
->s_free_inodes_count
++;
1139 pctx
.errcode
= retval
;
1140 fix_problem(ctx
, PR_0_ERR_MOVE_JOURNAL
, &pctx
);
1141 fs
->super
->s_state
&= ~EXT2_VALID_FS
;
1142 ext2fs_mark_super_dirty(fs
);
1147 * This function makes sure the superblock hint for the external
1148 * journal is correct.
1150 int e2fsck_fix_ext3_journal_hint(e2fsck_t ctx
)
1152 struct ext2_super_block
*sb
= ctx
->fs
->super
;
1153 struct problem_context pctx
;
1154 char uuid
[37], *journal_name
;
1157 if (!ext2fs_has_feature_journal(sb
) ||
1158 uuid_is_null(sb
->s_journal_uuid
))
1161 uuid_unparse(sb
->s_journal_uuid
, uuid
);
1162 journal_name
= blkid_get_devname(ctx
->blkid
, "UUID", uuid
);
1166 if (stat(journal_name
, &st
) < 0) {
1171 if (st
.st_rdev
!= sb
->s_journal_dev
) {
1172 clear_problem_context(&pctx
);
1173 pctx
.num
= st
.st_rdev
;
1174 if (fix_problem(ctx
, PR_0_EXTERNAL_JOURNAL_HINT
, &pctx
)) {
1175 sb
->s_journal_dev
= st
.st_rdev
;
1176 ext2fs_mark_super_dirty(ctx
->fs
);