2 * journal.c --- code for handling the "ext3" journal
4 * Copyright (C) 2000 Andreas Dilger
5 * Copyright (C) 2000 Theodore Ts'o
7 * Parts of the code are based on fs/jfs/journal.c by Stephen C. Tweedie
8 * Copyright (C) 1999 Red Hat Software
10 * This file may be redistributed under the terms of the
11 * GNU General Public License version 2 or at your discretion
16 #ifdef HAVE_SYS_MOUNT_H
17 #include <sys/param.h>
18 #include <sys/mount.h>
19 #define MNT_FL (MS_MGC_VAL | MS_RDONLY)
21 #ifdef HAVE_SYS_STAT_H
25 #define E2FSCK_INCLUDE_INLINE_FUNCS
28 #include "uuid/uuid.h"
30 static int bh_count
= 0;
33 * Define USE_INODE_IO to use the inode_io.c / fileio.c codepaths.
34 * This creates a larger static binary, and a smaller binary using
35 * shared libraries. It's also probably slightly less CPU-efficient,
36 * which is why it's not on by default. But, it's a good way of
37 * testing the functions in inode_io.c and fileio.c.
41 /* Checksumming functions */
42 static int e2fsck_journal_verify_csum_type(journal_t
*j
,
43 journal_superblock_t
*jsb
)
45 if (!jbd2_journal_has_csum_v2or3(j
))
48 return jsb
->s_checksum_type
== JBD2_CRC32C_CHKSUM
;
51 static __u32
e2fsck_journal_sb_csum(journal_superblock_t
*jsb
)
55 old_crc
= jsb
->s_checksum
;
57 crc
= ext2fs_crc32c_le(~0, (unsigned char *)jsb
,
58 sizeof(journal_superblock_t
));
59 jsb
->s_checksum
= old_crc
;
64 static int e2fsck_journal_sb_csum_verify(journal_t
*j
,
65 journal_superblock_t
*jsb
)
67 __u32 provided
, calculated
;
69 if (!jbd2_journal_has_csum_v2or3(j
))
72 provided
= ext2fs_be32_to_cpu(jsb
->s_checksum
);
73 calculated
= e2fsck_journal_sb_csum(jsb
);
75 return provided
== calculated
;
78 static errcode_t
e2fsck_journal_sb_csum_set(journal_t
*j
,
79 journal_superblock_t
*jsb
)
83 if (!jbd2_journal_has_csum_v2or3(j
))
86 crc
= e2fsck_journal_sb_csum(jsb
);
87 jsb
->s_checksum
= ext2fs_cpu_to_be32(crc
);
91 /* Kernel compatibility functions for handling the journal. These allow us
92 * to use the recovery.c file virtually unchanged from the kernel, so we
93 * don't have to do much to keep kernel and user recovery in sync.
95 int jbd2_journal_bmap(journal_t
*journal
, unsigned long block
,
96 unsigned long long *phys
)
102 struct inode
*inode
= journal
->j_inode
;
111 retval
= ext2fs_bmap2(inode
->i_ctx
->fs
, inode
->i_ino
,
112 &inode
->i_ext2
, NULL
, 0, (blk64_t
) block
,
115 return -1 * ((int) retval
);
119 struct buffer_head
*getblk(kdev_t kdev
, unsigned long long blocknr
,
122 struct buffer_head
*bh
;
123 int bufsize
= sizeof(*bh
) + kdev
->k_ctx
->fs
->blocksize
-
126 bh
= e2fsck_allocate_memory(kdev
->k_ctx
, bufsize
, "block buffer");
130 if (journal_enable_debug
>= 3)
132 jfs_debug(4, "getblk for block %llu (%d bytes)(total %d)\n",
133 blocknr
, blocksize
, bh_count
);
135 bh
->b_ctx
= kdev
->k_ctx
;
136 if (kdev
->k_dev
== K_DEV_FS
)
137 bh
->b_io
= kdev
->k_ctx
->fs
->io
;
139 bh
->b_io
= kdev
->k_ctx
->journal_io
;
140 bh
->b_size
= blocksize
;
141 bh
->b_blocknr
= blocknr
;
146 int sync_blockdev(kdev_t kdev
)
150 if (kdev
->k_dev
== K_DEV_FS
)
151 io
= kdev
->k_ctx
->fs
->io
;
153 io
= kdev
->k_ctx
->journal_io
;
155 return io_channel_flush(io
) ? -EIO
: 0;
158 void ll_rw_block(int rw
, int op_flags
EXT2FS_ATTR((unused
)), int nr
,
159 struct buffer_head
*bhp
[])
162 struct buffer_head
*bh
;
164 for (; nr
> 0; --nr
) {
166 if (rw
== REQ_OP_READ
&& !bh
->b_uptodate
) {
167 jfs_debug(3, "reading block %llu/%p\n",
168 bh
->b_blocknr
, (void *) bh
);
169 retval
= io_channel_read_blk64(bh
->b_io
,
173 com_err(bh
->b_ctx
->device_name
, retval
,
174 "while reading block %llu\n",
176 bh
->b_err
= (int) retval
;
180 } else if (rw
== REQ_OP_WRITE
&& bh
->b_dirty
) {
181 jfs_debug(3, "writing block %llu/%p\n",
184 retval
= io_channel_write_blk64(bh
->b_io
,
188 com_err(bh
->b_ctx
->device_name
, retval
,
189 "while writing block %llu\n",
191 bh
->b_err
= (int) retval
;
197 jfs_debug(3, "no-op %s for block %llu\n",
198 rw
== REQ_OP_READ
? "read" : "write",
204 void mark_buffer_dirty(struct buffer_head
*bh
)
209 static void mark_buffer_clean(struct buffer_head
* bh
)
214 void brelse(struct buffer_head
*bh
)
217 ll_rw_block(REQ_OP_WRITE
, 0, 1, &bh
);
218 jfs_debug(3, "freeing block %llu/%p (total %d)\n",
219 bh
->b_blocknr
, (void *) bh
, --bh_count
);
220 ext2fs_free_mem(&bh
);
223 int buffer_uptodate(struct buffer_head
*bh
)
225 return bh
->b_uptodate
;
228 void mark_buffer_uptodate(struct buffer_head
*bh
, int val
)
230 bh
->b_uptodate
= val
;
233 void wait_on_buffer(struct buffer_head
*bh
)
236 ll_rw_block(REQ_OP_READ
, 0, 1, &bh
);
240 static void e2fsck_clear_recover(e2fsck_t ctx
, int error
)
242 ext2fs_clear_feature_journal_needs_recovery(ctx
->fs
->super
);
244 /* if we had an error doing journal recovery, we need a full fsck */
246 ctx
->fs
->super
->s_state
&= ~EXT2_VALID_FS
;
247 ext2fs_mark_super_dirty(ctx
->fs
);
251 * This is a helper function to check the validity of the journal.
253 struct process_block_struct
{
254 e2_blkcnt_t last_block
;
257 static int process_journal_block(ext2_filsys fs
,
259 e2_blkcnt_t blockcnt
,
260 blk64_t ref_block
EXT2FS_ATTR((unused
)),
261 int ref_offset
EXT2FS_ATTR((unused
)),
264 struct process_block_struct
*p
;
265 blk64_t blk
= *block_nr
;
267 p
= (struct process_block_struct
*) priv_data
;
269 if (!blk
|| blk
< fs
->super
->s_first_data_block
||
270 blk
>= ext2fs_blocks_count(fs
->super
))
274 p
->last_block
= blockcnt
;
278 static int ext4_fc_replay_scan(journal_t
*j
, struct buffer_head
*bh
,
279 int off
, tid_t expected_tid
)
281 e2fsck_t ctx
= j
->j_fs_dev
->k_ctx
;
282 struct e2fsck_fc_replay_state
*state
;
283 int ret
= JBD2_FC_REPLAY_CONTINUE
;
284 struct ext4_fc_add_range ext
;
285 struct ext4_fc_tl tl
;
286 struct ext4_fc_tail tail
;
287 __u8
*start
, *cur
, *end
, *val
;
288 struct ext4_fc_head head
;
289 struct ext2fs_extent ext2fs_ex
= {0};
291 state
= &ctx
->fc_replay_state
;
293 start
= (__u8
*)bh
->b_data
;
294 end
= (__u8
*)bh
->b_data
+ j
->j_blocksize
- 1;
296 jbd_debug(1, "Scan phase starting, expected %d", expected_tid
);
297 if (state
->fc_replay_expected_off
== 0) {
298 memset(state
, 0, sizeof(*state
));
299 /* Check if we can stop early */
300 if (le16_to_cpu(((struct ext4_fc_tl
*)start
)->fc_tag
)
301 != EXT4_FC_TAG_HEAD
) {
302 jbd_debug(1, "Ending early!, not a head tag");
307 if (off
!= state
->fc_replay_expected_off
) {
312 state
->fc_replay_expected_off
++;
313 for (cur
= start
; cur
< end
; cur
= cur
+ le16_to_cpu(tl
.fc_len
) + sizeof(tl
)) {
314 memcpy(&tl
, cur
, sizeof(tl
));
315 val
= cur
+ sizeof(tl
);
317 jbd_debug(3, "Scan phase, tag:%s, blk %lld\n",
318 tag2str(le16_to_cpu(tl
.fc_tag
)), bh
->b_blocknr
);
319 switch (le16_to_cpu(tl
.fc_tag
)) {
320 case EXT4_FC_TAG_ADD_RANGE
:
321 memcpy(&ext
, val
, sizeof(ext
));
322 ret
= ext2fs_decode_extent(&ext2fs_ex
,
326 ret
= JBD2_FC_REPLAY_STOP
;
328 ret
= JBD2_FC_REPLAY_CONTINUE
;
330 case EXT4_FC_TAG_DEL_RANGE
:
331 case EXT4_FC_TAG_LINK
:
332 case EXT4_FC_TAG_UNLINK
:
333 case EXT4_FC_TAG_CREAT
:
334 case EXT4_FC_TAG_INODE
:
335 case EXT4_FC_TAG_PAD
:
337 state
->fc_crc
= jbd2_chksum(j
, state
->fc_crc
, cur
,
338 sizeof(tl
) + ext4_fc_tag_len(&tl
));
340 case EXT4_FC_TAG_TAIL
:
342 memcpy(&tail
, val
, sizeof(tail
));
343 state
->fc_crc
= jbd2_chksum(j
, state
->fc_crc
, cur
,
345 offsetof(struct ext4_fc_tail
,
347 jbd_debug(1, "tail tid %d, expected %d\n",
348 le32_to_cpu(tail
.fc_tid
), expected_tid
);
349 if (le32_to_cpu(tail
.fc_tid
) == expected_tid
&&
350 le32_to_cpu(tail
.fc_crc
) == state
->fc_crc
) {
351 state
->fc_replay_num_tags
= state
->fc_cur_tag
;
353 ret
= state
->fc_replay_num_tags
?
354 JBD2_FC_REPLAY_STOP
: -EFSBADCRC
;
358 case EXT4_FC_TAG_HEAD
:
359 memcpy(&head
, val
, sizeof(head
));
360 if (le32_to_cpu(head
.fc_features
) &
361 ~EXT4_FC_SUPPORTED_FEATURES
) {
365 if (le32_to_cpu(head
.fc_tid
) != expected_tid
) {
370 state
->fc_crc
= jbd2_chksum(j
, state
->fc_crc
, cur
,
371 sizeof(tl
) + ext4_fc_tag_len(&tl
));
374 ret
= state
->fc_replay_num_tags
?
375 JBD2_FC_REPLAY_STOP
: -ECANCELED
;
377 if (ret
< 0 || ret
== JBD2_FC_REPLAY_STOP
)
385 static int __errcode_to_errno(errcode_t err
, const char *func
, int line
)
389 fprintf(stderr
, "Error \"%s\" encountered in function %s at line %d\n",
390 error_message(err
), func
, line
);
396 #define errcode_to_errno(err) __errcode_to_errno(err, __func__, __LINE__)
398 #define ex_end(__ex) ((__ex)->e_lblk + (__ex)->e_len - 1)
399 #define ex_pend(__ex) ((__ex)->e_pblk + (__ex)->e_len - 1)
401 static int make_room(struct extent_list
*list
, int i
)
405 if (list
->count
== list
->size
) {
406 unsigned int new_size
= (list
->size
+ 341) *
407 sizeof(struct ext2fs_extent
);
408 ret
= errcode_to_errno(ext2fs_resize_mem(0, new_size
, &list
->extents
));
414 memmove(&list
->extents
[i
+ 1], &list
->extents
[i
],
415 sizeof(list
->extents
[0]) * (list
->count
- i
));
420 static int ex_compar(const void *arg1
, const void *arg2
)
422 const struct ext2fs_extent
*ex1
= (const struct ext2fs_extent
*)arg1
;
423 const struct ext2fs_extent
*ex2
= (const struct ext2fs_extent
*)arg2
;
425 if (ex1
->e_lblk
< ex2
->e_lblk
)
427 if (ex1
->e_lblk
> ex2
->e_lblk
)
429 return ex1
->e_len
- ex2
->e_len
;
432 static int ex_len_compar(const void *arg1
, const void *arg2
)
434 const struct ext2fs_extent
*ex1
= (const struct ext2fs_extent
*)arg1
;
435 const struct ext2fs_extent
*ex2
= (const struct ext2fs_extent
*)arg2
;
437 if (ex1
->e_len
< ex2
->e_len
)
440 if (ex1
->e_lblk
> ex2
->e_lblk
)
446 static void ex_sort_and_merge(struct extent_list
*list
)
454 * Reverse sort by length, that way we strip off all the 0 length
457 qsort(list
->extents
, list
->count
, sizeof(struct ext2fs_extent
),
460 for (i
= 0; i
< list
->count
; i
++) {
461 if (list
->extents
[i
].e_len
== 0) {
467 if (list
->count
== 0)
470 /* Now sort by logical offset */
471 qsort(list
->extents
, list
->count
, sizeof(list
->extents
[0]),
474 /* Merge adjacent extents if they are logically and physically contiguous */
476 while (i
< list
->count
- 1) {
477 if (ex_end(&list
->extents
[i
]) + 1 != list
->extents
[i
+ 1].e_lblk
||
478 ex_pend(&list
->extents
[i
]) + 1 != list
->extents
[i
+ 1].e_pblk
||
479 (list
->extents
[i
].e_flags
& EXT2_EXTENT_FLAGS_UNINIT
) !=
480 (list
->extents
[i
+ 1].e_flags
& EXT2_EXTENT_FLAGS_UNINIT
)) {
485 list
->extents
[i
].e_len
+= list
->extents
[i
+ 1].e_len
;
486 for (j
= i
+ 1; j
< list
->count
- 1; j
++)
487 list
->extents
[j
] = list
->extents
[j
+ 1];
492 /* must free blocks that are released */
493 static int ext4_modify_extent_list(e2fsck_t ctx
, struct extent_list
*list
,
494 struct ext2fs_extent
*ex
, int del
)
498 struct ext2fs_extent add_ex
= *ex
;
500 /* First let's create a hole from ex->e_lblk of length ex->e_len */
501 for (i
= 0; i
< list
->count
; i
++) {
502 if (ex_end(&list
->extents
[i
]) < add_ex
.e_lblk
)
505 /* Case 1: No overlap */
506 if (list
->extents
[i
].e_lblk
> ex_end(&add_ex
))
509 * Unmark all the blocks in bb now. All the blocks get marked
510 * before we exit this function.
512 ext2fs_unmark_block_bitmap_range2(ctx
->fs
->block_map
,
513 list
->extents
[i
].e_pblk
, list
->extents
[i
].e_len
);
515 if (list
->extents
[i
].e_lblk
< add_ex
.e_lblk
&&
516 ex_end(&list
->extents
[i
]) > ex_end(&add_ex
)) {
517 ret
= make_room(list
, i
+ 1);
520 list
->extents
[i
+ 1] = list
->extents
[i
];
521 offset
= ex_end(&add_ex
) + 1 - list
->extents
[i
].e_lblk
;
522 list
->extents
[i
+ 1].e_lblk
+= offset
;
523 list
->extents
[i
+ 1].e_pblk
+= offset
;
524 list
->extents
[i
+ 1].e_len
-= offset
;
525 list
->extents
[i
].e_len
=
526 add_ex
.e_lblk
- list
->extents
[i
].e_lblk
;
530 /* Case 3: Exact overlap */
531 if (add_ex
.e_lblk
<= list
->extents
[i
].e_lblk
&&
532 ex_end(&list
->extents
[i
]) <= ex_end(&add_ex
)) {
534 list
->extents
[i
].e_len
= 0;
538 /* Case 4: Partial overlap */
539 if (ex_end(&list
->extents
[i
]) > ex_end(&add_ex
)) {
540 offset
= ex_end(&add_ex
) + 1 - list
->extents
[i
].e_lblk
;
541 list
->extents
[i
].e_lblk
+= offset
;
542 list
->extents
[i
].e_pblk
+= offset
;
543 list
->extents
[i
].e_len
-= offset
;
547 if (ex_end(&add_ex
) >= ex_end(&list
->extents
[i
]))
548 list
->extents
[i
].e_len
=
549 add_ex
.e_lblk
> list
->extents
[i
].e_lblk
?
550 add_ex
.e_lblk
- list
->extents
[i
].e_lblk
: 0;
553 if (add_ex
.e_len
&& !del
) {
554 make_room(list
, list
->count
);
555 list
->extents
[list
->count
- 1] = add_ex
;
558 ex_sort_and_merge(list
);
560 /* Mark all occupied blocks allocated */
561 for (i
= 0; i
< list
->count
; i
++)
562 ext2fs_mark_block_bitmap_range2(ctx
->fs
->block_map
,
563 list
->extents
[i
].e_pblk
, list
->extents
[i
].e_len
);
564 ext2fs_mark_bb_dirty(ctx
->fs
);
569 static int ext4_add_extent_to_list(e2fsck_t ctx
, struct extent_list
*list
,
570 struct ext2fs_extent
*ex
)
572 return ext4_modify_extent_list(ctx
, list
, ex
, 0 /* add */);
575 static int ext4_del_extent_from_list(e2fsck_t ctx
, struct extent_list
*list
,
576 struct ext2fs_extent
*ex
)
578 return ext4_modify_extent_list(ctx
, list
, ex
, 1 /* delete */);
581 static int ext4_fc_read_extents(e2fsck_t ctx
, ext2_ino_t ino
)
583 struct extent_list
*extent_list
= &ctx
->fc_replay_state
.fc_extent_list
;
585 if (extent_list
->ino
== ino
)
588 extent_list
->ino
= ino
;
589 return errcode_to_errno(e2fsck_read_extents(ctx
, extent_list
));
593 * Flush extents in replay state on disk. @ino is the inode that is going
594 * to be processed next. So, we hold back flushing of the extent list
595 * if the next inode that's going to be processed is same as the one with
596 * cached extents in our replay state. That allows us to gather multiple extents
597 * for the inode so that we can flush all of them at once and it also saves us
598 * from continuously growing and shrinking the extent tree.
600 static void ext4_fc_flush_extents(e2fsck_t ctx
, ext2_ino_t ino
)
602 struct extent_list
*extent_list
= &ctx
->fc_replay_state
.fc_extent_list
;
604 if (extent_list
->ino
== ino
|| extent_list
->ino
== 0)
606 e2fsck_rewrite_extent_tree(ctx
, extent_list
);
607 ext2fs_free_mem(&extent_list
->extents
);
608 memset(extent_list
, 0, sizeof(*extent_list
));
611 /* Helper struct for dentry replay routines */
612 struct dentry_info_args
{
613 ext2_ino_t parent_ino
;
619 static inline int tl_to_darg(struct dentry_info_args
*darg
,
620 struct ext4_fc_tl
*tl
, __u8
*val
)
622 struct ext4_fc_dentry_info fcd
;
624 memcpy(&fcd
, val
, sizeof(fcd
));
626 darg
->parent_ino
= le32_to_cpu(fcd
.fc_parent_ino
);
627 darg
->ino
= le32_to_cpu(fcd
.fc_ino
);
628 darg
->dname_len
= ext4_fc_tag_len(tl
) -
629 sizeof(struct ext4_fc_dentry_info
);
630 darg
->dname
= malloc(darg
->dname_len
+ 1);
634 val
+ sizeof(struct ext4_fc_dentry_info
),
636 darg
->dname
[darg
->dname_len
] = 0;
637 jbd_debug(1, "%s: %s, ino %u, parent %u\n",
638 le16_to_cpu(tl
->fc_tag
) == EXT4_FC_TAG_CREAT
? "create" :
639 (le16_to_cpu(tl
->fc_tag
) == EXT4_FC_TAG_LINK
? "link" :
640 (le16_to_cpu(tl
->fc_tag
) == EXT4_FC_TAG_UNLINK
? "unlink" :
641 "error")), darg
->dname
, darg
->ino
, darg
->parent_ino
);
645 static int ext4_fc_handle_unlink(e2fsck_t ctx
, struct ext4_fc_tl
*tl
, __u8
*val
)
647 struct dentry_info_args darg
;
650 ret
= tl_to_darg(&darg
, tl
, val
);
653 ext4_fc_flush_extents(ctx
, darg
.ino
);
654 ret
= errcode_to_errno(ext2fs_unlink(ctx
->fs
, darg
.parent_ino
,
655 darg
.dname
, darg
.ino
, 0));
656 /* It's okay if the above call fails */
662 static int ext4_fc_handle_link_and_create(e2fsck_t ctx
, struct ext4_fc_tl
*tl
, __u8
*val
)
664 struct dentry_info_args darg
;
665 ext2_filsys fs
= ctx
->fs
;
666 struct ext2_inode_large inode_large
;
667 int ret
, filetype
, mode
;
669 ret
= tl_to_darg(&darg
, tl
, val
);
672 ext4_fc_flush_extents(ctx
, 0);
673 ret
= errcode_to_errno(ext2fs_read_inode(fs
, darg
.ino
,
674 (struct ext2_inode
*)&inode_large
));
678 mode
= inode_large
.i_mode
;
680 if (LINUX_S_ISREG(mode
))
681 filetype
= EXT2_FT_REG_FILE
;
682 else if (LINUX_S_ISDIR(mode
))
683 filetype
= EXT2_FT_DIR
;
684 else if (LINUX_S_ISCHR(mode
))
685 filetype
= EXT2_FT_CHRDEV
;
686 else if (LINUX_S_ISBLK(mode
))
687 filetype
= EXT2_FT_BLKDEV
;
688 else if (LINUX_S_ISLNK(mode
))
689 return EXT2_FT_SYMLINK
;
690 else if (LINUX_S_ISFIFO(mode
))
691 filetype
= EXT2_FT_FIFO
;
692 else if (LINUX_S_ISSOCK(mode
))
693 filetype
= EXT2_FT_SOCK
;
700 * Forcefully unlink if the same name is present and ignore the error
701 * if any, since this dirent might not exist
703 ext2fs_unlink(fs
, darg
.parent_ino
, darg
.dname
, darg
.ino
,
704 EXT2FS_UNLINK_FORCE
);
706 ret
= errcode_to_errno(
707 ext2fs_link(fs
, darg
.parent_ino
, darg
.dname
, darg
.ino
,
715 /* This function fixes the i_blocks field in the replayed indoe */
716 static void ext4_fc_replay_fixup_iblocks(struct ext2_inode_large
*ondisk_inode
,
717 struct ext2_inode_large
*fc_inode
)
719 if (ondisk_inode
->i_flags
& EXT4_EXTENTS_FL
) {
720 struct ext3_extent_header
*eh
;
722 eh
= (struct ext3_extent_header
*)(&ondisk_inode
->i_block
[0]);
723 if (le16_to_cpu(eh
->eh_magic
) != EXT3_EXT_MAGIC
) {
724 memset(eh
, 0, sizeof(*eh
));
725 eh
->eh_magic
= cpu_to_le16(EXT3_EXT_MAGIC
);
726 eh
->eh_max
= cpu_to_le16(
727 (sizeof(ondisk_inode
->i_block
) -
728 sizeof(struct ext3_extent_header
)) /
729 sizeof(struct ext3_extent
));
731 } else if (ondisk_inode
->i_flags
& EXT4_INLINE_DATA_FL
) {
732 memcpy(ondisk_inode
->i_block
, fc_inode
->i_block
,
733 sizeof(fc_inode
->i_block
));
737 static int ext4_fc_handle_inode(e2fsck_t ctx
, __u8
*val
)
739 int ino
, inode_len
= EXT2_GOOD_OLD_INODE_SIZE
;
740 struct ext2_inode_large
*inode
= NULL
, *fc_inode
= NULL
;
746 memcpy(&fc_ino
, val
, sizeof(fc_ino
));
747 fc_raw_inode
= val
+ sizeof(fc_ino
);
748 ino
= le32_to_cpu(fc_ino
);
750 if (EXT2_INODE_SIZE(ctx
->fs
->super
) > EXT2_GOOD_OLD_INODE_SIZE
) {
751 __u16 extra_isize
= ext2fs_le16_to_cpu(
752 ((struct ext2_inode_large
*)fc_raw_inode
)->i_extra_isize
);
754 if ((extra_isize
< (sizeof(inode
->i_extra_isize
) +
755 sizeof(inode
->i_checksum_hi
))) ||
756 (extra_isize
> (EXT2_INODE_SIZE(ctx
->fs
->super
) -
757 EXT2_GOOD_OLD_INODE_SIZE
))) {
761 inode_len
+= extra_isize
;
763 err
= ext2fs_get_mem(inode_len
, &inode
);
766 err
= ext2fs_get_mem(inode_len
, &fc_inode
);
769 ext4_fc_flush_extents(ctx
, ino
);
771 err
= ext2fs_read_inode_full(ctx
->fs
, ino
, (struct ext2_inode
*)inode
,
775 memcpy(fc_inode
, fc_raw_inode
, inode_len
);
776 #ifdef WORDS_BIGENDIAN
777 ext2fs_swap_inode_full(ctx
->fs
, fc_inode
, fc_inode
, 0, inode_len
);
779 memcpy(inode
, fc_inode
, offsetof(struct ext2_inode_large
, i_block
));
780 memcpy(&inode
->i_generation
, &fc_inode
->i_generation
,
781 inode_len
- offsetof(struct ext2_inode_large
, i_generation
));
782 ext4_fc_replay_fixup_iblocks(inode
, fc_inode
);
783 err
= ext2fs_count_blocks(ctx
->fs
, ino
, EXT2_INODE(inode
), &blks
);
786 ext2fs_iblk_set(ctx
->fs
, EXT2_INODE(inode
), blks
);
787 ext2fs_inode_csum_set(ctx
->fs
, ino
, inode
);
789 err
= ext2fs_write_inode_full(ctx
->fs
, ino
, (struct ext2_inode
*)inode
,
793 if (inode
->i_links_count
)
794 ext2fs_mark_inode_bitmap2(ctx
->fs
->inode_map
, ino
);
796 ext2fs_unmark_inode_bitmap2(ctx
->fs
->inode_map
, ino
);
797 ext2fs_mark_ib_dirty(ctx
->fs
);
800 ext2fs_free_mem(&inode
);
801 ext2fs_free_mem(&fc_inode
);
802 return errcode_to_errno(err
);
806 * Handle add extent replay tag.
808 static int ext4_fc_handle_add_extent(e2fsck_t ctx
, __u8
*val
)
810 struct ext2fs_extent extent
;
811 struct ext4_fc_add_range add_range
;
815 memcpy(&add_range
, val
, sizeof(add_range
));
816 ino
= le32_to_cpu(add_range
.fc_ino
);
817 ext4_fc_flush_extents(ctx
, ino
);
819 ret
= ext4_fc_read_extents(ctx
, ino
);
822 memset(&extent
, 0, sizeof(extent
));
823 ret
= errcode_to_errno(ext2fs_decode_extent(
824 &extent
, (void *)add_range
.fc_ex
,
825 sizeof(add_range
.fc_ex
)));
828 return ext4_add_extent_to_list(ctx
,
829 &ctx
->fc_replay_state
.fc_extent_list
, &extent
);
833 * Handle delete logical range replay tag.
835 static int ext4_fc_handle_del_range(e2fsck_t ctx
, __u8
*val
)
837 struct ext2fs_extent extent
;
838 struct ext4_fc_del_range del_range
;
841 memcpy(&del_range
, val
, sizeof(del_range
));
842 ino
= le32_to_cpu(del_range
.fc_ino
);
843 ext4_fc_flush_extents(ctx
, ino
);
845 memset(&extent
, 0, sizeof(extent
));
846 extent
.e_lblk
= le32_to_cpu(del_range
.fc_lblk
);
847 extent
.e_len
= le32_to_cpu(del_range
.fc_len
);
848 ret
= ext4_fc_read_extents(ctx
, ino
);
851 return ext4_del_extent_from_list(ctx
,
852 &ctx
->fc_replay_state
.fc_extent_list
, &extent
);
856 * Main recovery path entry point. This function returns JBD2_FC_REPLAY_CONTINUE
857 * to indicate that it is expecting more fast commit blocks. It returns
858 * JBD2_FC_REPLAY_STOP to indicate that replay is done.
860 static int ext4_fc_replay(journal_t
*journal
, struct buffer_head
*bh
,
861 enum passtype pass
, int off
, tid_t expected_tid
)
863 e2fsck_t ctx
= journal
->j_fs_dev
->k_ctx
;
864 struct e2fsck_fc_replay_state
*state
= &ctx
->fc_replay_state
;
865 int ret
= JBD2_FC_REPLAY_CONTINUE
;
866 struct ext4_fc_tl tl
;
867 __u8
*start
, *end
, *cur
, *val
;
869 if (pass
== PASS_SCAN
) {
870 state
->fc_current_pass
= PASS_SCAN
;
871 return ext4_fc_replay_scan(journal
, bh
, off
, expected_tid
);
874 if (state
->fc_replay_num_tags
== 0)
877 if (state
->fc_current_pass
!= pass
) {
878 /* Starting replay phase */
879 state
->fc_current_pass
= pass
;
880 /* We will reset checksums */
881 ctx
->fs
->flags
|= EXT2_FLAG_IGNORE_CSUM_ERRORS
;
882 ret
= errcode_to_errno(ext2fs_read_bitmaps(ctx
->fs
));
884 jbd_debug(1, "Error %d while reading bitmaps\n", ret
);
887 state
->fc_super_state
= ctx
->fs
->super
->s_state
;
889 * Mark the file system to indicate it contains errors. That's
890 * because the updates performed by fast commit replay code are
891 * not atomic and may result in inconsistent file system if it
892 * crashes before the replay is complete.
894 ctx
->fs
->super
->s_state
|= EXT2_ERROR_FS
;
895 ctx
->fs
->super
->s_state
|= EXT4_FC_REPLAY
;
896 ext2fs_mark_super_dirty(ctx
->fs
);
897 ext2fs_flush(ctx
->fs
);
900 start
= (__u8
*)bh
->b_data
;
901 end
= (__u8
*)bh
->b_data
+ journal
->j_blocksize
- 1;
903 for (cur
= start
; cur
< end
; cur
= cur
+ le16_to_cpu(tl
.fc_len
) + sizeof(tl
)) {
904 memcpy(&tl
, cur
, sizeof(tl
));
905 val
= cur
+ sizeof(tl
);
907 if (state
->fc_replay_num_tags
== 0)
909 jbd_debug(3, "Replay phase processing %s tag\n",
910 tag2str(le16_to_cpu(tl
.fc_tag
)));
911 state
->fc_replay_num_tags
--;
912 switch (le16_to_cpu(tl
.fc_tag
)) {
913 case EXT4_FC_TAG_CREAT
:
914 case EXT4_FC_TAG_LINK
:
915 ret
= ext4_fc_handle_link_and_create(ctx
, &tl
, val
);
917 case EXT4_FC_TAG_UNLINK
:
918 ret
= ext4_fc_handle_unlink(ctx
, &tl
, val
);
920 case EXT4_FC_TAG_ADD_RANGE
:
921 ret
= ext4_fc_handle_add_extent(ctx
, val
);
923 case EXT4_FC_TAG_DEL_RANGE
:
924 ret
= ext4_fc_handle_del_range(ctx
, val
);
926 case EXT4_FC_TAG_INODE
:
927 ret
= ext4_fc_handle_inode(ctx
, val
);
929 case EXT4_FC_TAG_TAIL
:
930 ext4_fc_flush_extents(ctx
, 0);
931 case EXT4_FC_TAG_PAD
:
932 case EXT4_FC_TAG_HEAD
:
940 ret
= JBD2_FC_REPLAY_CONTINUE
;
944 jbd_debug(1, "End of fast commit replay\n");
945 if (state
->fc_current_pass
!= pass
)
946 return JBD2_FC_REPLAY_STOP
;
948 ext2fs_calculate_summary_stats(ctx
->fs
, 0 /* update bg also */);
949 ext2fs_write_block_bitmap(ctx
->fs
);
950 ext2fs_write_inode_bitmap(ctx
->fs
);
951 ext2fs_mark_super_dirty(ctx
->fs
);
952 ext2fs_set_gdt_csum(ctx
->fs
);
953 ctx
->fs
->super
->s_state
= state
->fc_super_state
;
954 ext2fs_flush(ctx
->fs
);
956 return JBD2_FC_REPLAY_STOP
;
959 static errcode_t
e2fsck_get_journal(e2fsck_t ctx
, journal_t
**ret_journal
)
961 struct process_block_struct pb
;
962 struct ext2_super_block
*sb
= ctx
->fs
->super
;
963 struct ext2_super_block jsuper
;
964 struct problem_context pctx
;
965 struct buffer_head
*bh
;
966 struct inode
*j_inode
= NULL
;
967 struct kdev_s
*dev_fs
= NULL
, *dev_journal
;
968 const char *journal_name
= 0;
969 journal_t
*journal
= NULL
;
970 errcode_t retval
= 0;
971 io_manager io_ptr
= 0;
972 unsigned long long start
= 0;
975 int tried_backup_jnl
= 0;
977 clear_problem_context(&pctx
);
979 journal
= e2fsck_allocate_memory(ctx
, sizeof(journal_t
), "journal");
981 return EXT2_ET_NO_MEMORY
;
984 dev_fs
= e2fsck_allocate_memory(ctx
, 2*sizeof(struct kdev_s
), "kdev");
986 retval
= EXT2_ET_NO_MEMORY
;
989 dev_journal
= dev_fs
+1;
991 dev_fs
->k_ctx
= dev_journal
->k_ctx
= ctx
;
992 dev_fs
->k_dev
= K_DEV_FS
;
993 dev_journal
->k_dev
= K_DEV_JOURNAL
;
995 journal
->j_dev
= dev_journal
;
996 journal
->j_fs_dev
= dev_fs
;
997 journal
->j_inode
= NULL
;
998 journal
->j_blocksize
= ctx
->fs
->blocksize
;
1000 if (uuid_is_null(sb
->s_journal_uuid
)) {
1002 * The full set of superblock sanity checks haven't
1003 * been performed yet, so we need to do some basic
1004 * checks here to avoid potential array overruns.
1006 if (!sb
->s_journal_inum
||
1007 (sb
->s_journal_inum
>
1008 (ctx
->fs
->group_desc_count
* sb
->s_inodes_per_group
))) {
1009 retval
= EXT2_ET_BAD_INODE_NUM
;
1012 j_inode
= e2fsck_allocate_memory(ctx
, sizeof(*j_inode
),
1015 retval
= EXT2_ET_NO_MEMORY
;
1019 j_inode
->i_ctx
= ctx
;
1020 j_inode
->i_ino
= sb
->s_journal_inum
;
1022 if ((retval
= ext2fs_read_inode(ctx
->fs
,
1024 &j_inode
->i_ext2
))) {
1026 if (sb
->s_jnl_backup_type
!= EXT3_JNL_BACKUP_BLOCKS
||
1029 memset(&j_inode
->i_ext2
, 0, sizeof(struct ext2_inode
));
1030 memcpy(&j_inode
->i_ext2
.i_block
[0], sb
->s_jnl_blocks
,
1032 j_inode
->i_ext2
.i_size_high
= sb
->s_jnl_blocks
[15];
1033 j_inode
->i_ext2
.i_size
= sb
->s_jnl_blocks
[16];
1034 j_inode
->i_ext2
.i_links_count
= 1;
1035 j_inode
->i_ext2
.i_mode
= LINUX_S_IFREG
| 0600;
1036 e2fsck_use_inode_shortcuts(ctx
, 1);
1037 ctx
->stashed_ino
= j_inode
->i_ino
;
1038 ctx
->stashed_inode
= &j_inode
->i_ext2
;
1041 if (!j_inode
->i_ext2
.i_links_count
||
1042 !LINUX_S_ISREG(j_inode
->i_ext2
.i_mode
) ||
1043 (j_inode
->i_ext2
.i_flags
& EXT4_ENCRYPT_FL
)) {
1044 retval
= EXT2_ET_NO_JOURNAL
;
1045 goto try_backup_journal
;
1047 if (EXT2_I_SIZE(&j_inode
->i_ext2
) / journal
->j_blocksize
<
1048 JBD2_MIN_JOURNAL_BLOCKS
) {
1049 retval
= EXT2_ET_JOURNAL_TOO_SMALL
;
1050 goto try_backup_journal
;
1053 retval
= ext2fs_block_iterate3(ctx
->fs
, j_inode
->i_ino
,
1055 process_journal_block
, &pb
);
1056 if ((pb
.last_block
+ 1) * ctx
->fs
->blocksize
<
1057 (int) EXT2_I_SIZE(&j_inode
->i_ext2
)) {
1058 retval
= EXT2_ET_JOURNAL_TOO_SMALL
;
1059 goto try_backup_journal
;
1061 if (tried_backup_jnl
&& !(ctx
->options
& E2F_OPT_READONLY
)) {
1062 retval
= ext2fs_write_inode(ctx
->fs
, sb
->s_journal_inum
,
1068 journal
->j_total_len
= EXT2_I_SIZE(&j_inode
->i_ext2
) /
1069 journal
->j_blocksize
;
1072 retval
= ext2fs_inode_io_intern2(ctx
->fs
, sb
->s_journal_inum
,
1078 io_ptr
= inode_io_manager
;
1080 journal
->j_inode
= j_inode
;
1081 ctx
->journal_io
= ctx
->fs
->io
;
1082 if ((ret
= jbd2_journal_bmap(journal
, 0, &start
)) != 0) {
1083 retval
= (errcode_t
) (-1 * ret
);
1089 if (!ctx
->journal_name
) {
1092 uuid_unparse(sb
->s_journal_uuid
, uuid
);
1093 ctx
->journal_name
= blkid_get_devname(ctx
->blkid
,
1095 if (!ctx
->journal_name
)
1096 ctx
->journal_name
= blkid_devno_to_devname(sb
->s_journal_dev
);
1098 journal_name
= ctx
->journal_name
;
1100 if (!journal_name
) {
1101 fix_problem(ctx
, PR_0_CANT_FIND_JOURNAL
, &pctx
);
1102 retval
= EXT2_ET_LOAD_EXT_JOURNAL
;
1106 jfs_debug(1, "Using journal file %s\n", journal_name
);
1107 io_ptr
= unix_io_manager
;
1111 test_io_backing_manager
= io_ptr
;
1112 io_ptr
= test_io_manager
;
1114 #ifndef USE_INODE_IO
1118 int flags
= IO_FLAG_RW
;
1119 if (!(ctx
->mount_flags
& EXT2_MF_ISROOT
&&
1120 ctx
->mount_flags
& EXT2_MF_READONLY
))
1121 flags
|= IO_FLAG_EXCLUSIVE
;
1122 if ((ctx
->mount_flags
& EXT2_MF_READONLY
) &&
1123 (ctx
->options
& E2F_OPT_FORCE
))
1124 flags
&= ~IO_FLAG_EXCLUSIVE
;
1127 retval
= io_ptr
->open(journal_name
, flags
,
1133 io_channel_set_blksize(ctx
->journal_io
, ctx
->fs
->blocksize
);
1138 start
= ext2fs_journal_sb_start(ctx
->fs
->blocksize
) - 1;
1139 bh
= getblk(dev_journal
, start
, ctx
->fs
->blocksize
);
1141 retval
= EXT2_ET_NO_MEMORY
;
1144 ll_rw_block(REQ_OP_READ
, 0, 1, &bh
);
1145 if ((retval
= bh
->b_err
) != 0) {
1149 memcpy(&jsuper
, start
? bh
->b_data
: bh
->b_data
+ SUPERBLOCK_OFFSET
,
1151 #ifdef WORDS_BIGENDIAN
1152 if (jsuper
.s_magic
== ext2fs_swab16(EXT2_SUPER_MAGIC
))
1153 ext2fs_swap_super(&jsuper
);
1155 if (jsuper
.s_magic
!= EXT2_SUPER_MAGIC
||
1156 !ext2fs_has_feature_journal_dev(&jsuper
)) {
1157 fix_problem(ctx
, PR_0_EXT_JOURNAL_BAD_SUPER
, &pctx
);
1158 retval
= EXT2_ET_LOAD_EXT_JOURNAL
;
1162 /* Make sure the journal UUID is correct */
1163 if (memcmp(jsuper
.s_uuid
, ctx
->fs
->super
->s_journal_uuid
,
1164 sizeof(jsuper
.s_uuid
))) {
1165 fix_problem(ctx
, PR_0_JOURNAL_BAD_UUID
, &pctx
);
1166 retval
= EXT2_ET_LOAD_EXT_JOURNAL
;
1171 /* Check the superblock checksum */
1172 if (ext2fs_has_feature_metadata_csum(&jsuper
)) {
1173 struct struct_ext2_filsys fsx
;
1174 struct ext2_super_block superx
;
1177 p
= start
? bh
->b_data
: bh
->b_data
+ SUPERBLOCK_OFFSET
;
1178 memcpy(&fsx
, ctx
->fs
, sizeof(fsx
));
1179 memcpy(&superx
, ctx
->fs
->super
, sizeof(superx
));
1180 fsx
.super
= &superx
;
1181 ext2fs_set_feature_metadata_csum(fsx
.super
);
1182 if (!ext2fs_superblock_csum_verify(&fsx
, p
) &&
1183 fix_problem(ctx
, PR_0_EXT_JOURNAL_SUPER_CSUM_INVALID
,
1185 ext2fs_superblock_csum_set(&fsx
, p
);
1186 mark_buffer_dirty(bh
);
1191 maxlen
= ext2fs_blocks_count(&jsuper
);
1192 journal
->j_total_len
= (maxlen
< 1ULL << 32) ? maxlen
: (1ULL << 32) - 1;
1196 if (!(bh
= getblk(dev_journal
, start
, journal
->j_blocksize
))) {
1197 retval
= EXT2_ET_NO_MEMORY
;
1201 journal
->j_sb_buffer
= bh
;
1202 journal
->j_superblock
= (journal_superblock_t
*)bh
->b_data
;
1203 if (ext2fs_has_feature_fast_commit(ctx
->fs
->super
))
1204 journal
->j_fc_replay_callback
= ext4_fc_replay
;
1206 journal
->j_fc_replay_callback
= NULL
;
1210 ext2fs_free_mem(&j_inode
);
1213 *ret_journal
= journal
;
1214 e2fsck_use_inode_shortcuts(ctx
, 0);
1218 e2fsck_use_inode_shortcuts(ctx
, 0);
1220 ext2fs_free_mem(&dev_fs
);
1222 ext2fs_free_mem(&j_inode
);
1224 ext2fs_free_mem(&journal
);
1228 static errcode_t
e2fsck_journal_fix_bad_inode(e2fsck_t ctx
,
1229 struct problem_context
*pctx
)
1231 struct ext2_super_block
*sb
= ctx
->fs
->super
;
1232 int recover
= ext2fs_has_feature_journal_needs_recovery(ctx
->fs
->super
);
1233 int has_journal
= ext2fs_has_feature_journal(ctx
->fs
->super
);
1235 if (has_journal
|| sb
->s_journal_inum
) {
1236 /* The journal inode is bogus, remove and force full fsck */
1237 pctx
->ino
= sb
->s_journal_inum
;
1238 if (fix_problem(ctx
, PR_0_JOURNAL_BAD_INODE
, pctx
)) {
1239 if (has_journal
&& sb
->s_journal_inum
)
1240 printf("*** journal has been deleted ***\n\n");
1241 ext2fs_clear_feature_journal(sb
);
1242 sb
->s_journal_inum
= 0;
1243 memset(sb
->s_jnl_blocks
, 0, sizeof(sb
->s_jnl_blocks
));
1244 ctx
->flags
|= E2F_FLAG_JOURNAL_INODE
;
1245 ctx
->fs
->flags
&= ~EXT2_FLAG_MASTER_SB_ONLY
;
1246 e2fsck_clear_recover(ctx
, 1);
1249 return EXT2_ET_CORRUPT_JOURNAL_SB
;
1250 } else if (recover
) {
1251 if (fix_problem(ctx
, PR_0_JOURNAL_RECOVER_SET
, pctx
)) {
1252 e2fsck_clear_recover(ctx
, 1);
1255 return EXT2_ET_UNSUPP_FEATURE
;
1260 #define V1_SB_SIZE 0x0024
1261 static void clear_v2_journal_fields(journal_t
*journal
)
1263 e2fsck_t ctx
= journal
->j_dev
->k_ctx
;
1264 struct problem_context pctx
;
1266 clear_problem_context(&pctx
);
1268 if (!fix_problem(ctx
, PR_0_CLEAR_V2_JOURNAL
, &pctx
))
1271 ctx
->flags
|= E2F_FLAG_PROBLEMS_FIXED
;
1272 memset(((char *) journal
->j_superblock
) + V1_SB_SIZE
, 0,
1273 ctx
->fs
->blocksize
-V1_SB_SIZE
);
1274 mark_buffer_dirty(journal
->j_sb_buffer
);
1278 static errcode_t
e2fsck_journal_load(journal_t
*journal
)
1280 e2fsck_t ctx
= journal
->j_dev
->k_ctx
;
1281 journal_superblock_t
*jsb
;
1282 struct buffer_head
*jbh
= journal
->j_sb_buffer
;
1283 struct problem_context pctx
;
1285 clear_problem_context(&pctx
);
1287 ll_rw_block(REQ_OP_READ
, 0, 1, &jbh
);
1289 com_err(ctx
->device_name
, jbh
->b_err
, "%s",
1290 _("reading journal superblock\n"));
1294 jsb
= journal
->j_superblock
;
1295 /* If we don't even have JBD2_MAGIC, we probably have a wrong inode */
1296 if (jsb
->s_header
.h_magic
!= htonl(JBD2_MAGIC_NUMBER
))
1297 return e2fsck_journal_fix_bad_inode(ctx
, &pctx
);
1299 switch (ntohl(jsb
->s_header
.h_blocktype
)) {
1300 case JBD2_SUPERBLOCK_V1
:
1301 journal
->j_format_version
= 1;
1302 if (jsb
->s_feature_compat
||
1303 jsb
->s_feature_incompat
||
1304 jsb
->s_feature_ro_compat
||
1306 clear_v2_journal_fields(journal
);
1309 case JBD2_SUPERBLOCK_V2
:
1310 journal
->j_format_version
= 2;
1311 if (ntohl(jsb
->s_nr_users
) > 1 &&
1312 uuid_is_null(ctx
->fs
->super
->s_journal_uuid
))
1313 clear_v2_journal_fields(journal
);
1314 if (ntohl(jsb
->s_nr_users
) > 1) {
1315 fix_problem(ctx
, PR_0_JOURNAL_UNSUPP_MULTIFS
, &pctx
);
1316 return EXT2_ET_JOURNAL_UNSUPP_VERSION
;
1321 * These should never appear in a journal super block, so if
1322 * they do, the journal is badly corrupted.
1324 case JBD2_DESCRIPTOR_BLOCK
:
1325 case JBD2_COMMIT_BLOCK
:
1326 case JBD2_REVOKE_BLOCK
:
1327 return EXT2_ET_CORRUPT_JOURNAL_SB
;
1329 /* If we don't understand the superblock major type, but there
1330 * is a magic number, then it is likely to be a new format we
1331 * just don't understand, so leave it alone. */
1333 return EXT2_ET_JOURNAL_UNSUPP_VERSION
;
1336 if (JBD2_HAS_INCOMPAT_FEATURE(journal
, ~JBD2_KNOWN_INCOMPAT_FEATURES
))
1337 return EXT2_ET_UNSUPP_FEATURE
;
1339 if (JBD2_HAS_RO_COMPAT_FEATURE(journal
, ~JBD2_KNOWN_ROCOMPAT_FEATURES
))
1340 return EXT2_ET_RO_UNSUPP_FEATURE
;
1342 /* Checksum v1-3 are mutually exclusive features. */
1343 if (jbd2_has_feature_csum2(journal
) && jbd2_has_feature_csum3(journal
))
1344 return EXT2_ET_CORRUPT_JOURNAL_SB
;
1346 if (jbd2_journal_has_csum_v2or3(journal
) &&
1347 jbd2_has_feature_checksum(journal
))
1348 return EXT2_ET_CORRUPT_JOURNAL_SB
;
1350 if (!e2fsck_journal_verify_csum_type(journal
, jsb
) ||
1351 !e2fsck_journal_sb_csum_verify(journal
, jsb
))
1352 return EXT2_ET_CORRUPT_JOURNAL_SB
;
1354 if (jbd2_journal_has_csum_v2or3(journal
))
1355 journal
->j_csum_seed
= jbd2_chksum(journal
, ~0, jsb
->s_uuid
,
1356 sizeof(jsb
->s_uuid
));
1358 /* We have now checked whether we know enough about the journal
1359 * format to be able to proceed safely, so any other checks that
1360 * fail we should attempt to recover from. */
1361 if (jsb
->s_blocksize
!= htonl(journal
->j_blocksize
)) {
1362 com_err(ctx
->program_name
, EXT2_ET_CORRUPT_JOURNAL_SB
,
1363 _("%s: no valid journal superblock found\n"),
1365 return EXT2_ET_CORRUPT_JOURNAL_SB
;
1368 if (ntohl(jsb
->s_maxlen
) < journal
->j_total_len
)
1369 journal
->j_total_len
= ntohl(jsb
->s_maxlen
);
1370 else if (ntohl(jsb
->s_maxlen
) > journal
->j_total_len
) {
1371 com_err(ctx
->program_name
, EXT2_ET_CORRUPT_JOURNAL_SB
,
1372 _("%s: journal too short\n"),
1374 return EXT2_ET_CORRUPT_JOURNAL_SB
;
1377 journal
->j_tail_sequence
= ntohl(jsb
->s_sequence
);
1378 journal
->j_transaction_sequence
= journal
->j_tail_sequence
;
1379 journal
->j_tail
= ntohl(jsb
->s_start
);
1380 journal
->j_first
= ntohl(jsb
->s_first
);
1381 if (jbd2_has_feature_fast_commit(journal
)) {
1382 if (ntohl(jsb
->s_maxlen
) - jbd2_journal_get_num_fc_blks(jsb
)
1383 < JBD2_MIN_JOURNAL_BLOCKS
) {
1384 com_err(ctx
->program_name
, EXT2_ET_CORRUPT_JOURNAL_SB
,
1385 _("%s: incorrect fast commit blocks\n"),
1387 return EXT2_ET_CORRUPT_JOURNAL_SB
;
1389 journal
->j_fc_last
= ntohl(jsb
->s_maxlen
);
1390 journal
->j_last
= journal
->j_fc_last
-
1391 jbd2_journal_get_num_fc_blks(jsb
);
1392 journal
->j_fc_first
= journal
->j_last
+ 1;
1394 journal
->j_last
= ntohl(jsb
->s_maxlen
);
1400 static void e2fsck_journal_reset_super(e2fsck_t ctx
, journal_superblock_t
*jsb
,
1411 /* Leave a valid existing V1 superblock signature alone.
1412 * Anything unrecognisable we overwrite with a new V2
1415 if (jsb
->s_header
.h_magic
!= htonl(JBD2_MAGIC_NUMBER
) ||
1416 jsb
->s_header
.h_blocktype
!= htonl(JBD2_SUPERBLOCK_V1
)) {
1417 jsb
->s_header
.h_magic
= htonl(JBD2_MAGIC_NUMBER
);
1418 jsb
->s_header
.h_blocktype
= htonl(JBD2_SUPERBLOCK_V2
);
1421 /* Zero out everything else beyond the superblock header */
1423 p
= ((char *) jsb
) + sizeof(journal_header_t
);
1424 memset (p
, 0, ctx
->fs
->blocksize
-sizeof(journal_header_t
));
1426 jsb
->s_blocksize
= htonl(ctx
->fs
->blocksize
);
1427 jsb
->s_maxlen
= htonl(journal
->j_total_len
);
1428 jsb
->s_first
= htonl(1);
1430 /* Initialize the journal sequence number so that there is "no"
1431 * chance we will find old "valid" transactions in the journal.
1432 * This avoids the need to zero the whole journal (slow to do,
1433 * and risky when we are just recovering the filesystem).
1435 uuid_generate(u
.uuid
);
1436 for (i
= 0; i
< 4; i
++)
1437 new_seq
^= u
.val
[i
];
1438 jsb
->s_sequence
= htonl(new_seq
);
1439 e2fsck_journal_sb_csum_set(journal
, jsb
);
1441 mark_buffer_dirty(journal
->j_sb_buffer
);
1442 ll_rw_block(REQ_OP_WRITE
, 0, 1, &journal
->j_sb_buffer
);
1445 static errcode_t
e2fsck_journal_fix_corrupt_super(e2fsck_t ctx
,
1447 struct problem_context
*pctx
)
1449 struct ext2_super_block
*sb
= ctx
->fs
->super
;
1450 int recover
= ext2fs_has_feature_journal_needs_recovery(ctx
->fs
->super
);
1452 if (ext2fs_has_feature_journal(sb
)) {
1453 if (fix_problem(ctx
, PR_0_JOURNAL_BAD_SUPER
, pctx
)) {
1454 e2fsck_journal_reset_super(ctx
, journal
->j_superblock
,
1456 journal
->j_transaction_sequence
= 1;
1457 e2fsck_clear_recover(ctx
, recover
);
1460 return EXT2_ET_CORRUPT_JOURNAL_SB
;
1461 } else if (e2fsck_journal_fix_bad_inode(ctx
, pctx
))
1462 return EXT2_ET_CORRUPT_JOURNAL_SB
;
1467 static void e2fsck_journal_release(e2fsck_t ctx
, journal_t
*journal
,
1468 int reset
, int drop
)
1470 journal_superblock_t
*jsb
;
1473 mark_buffer_clean(journal
->j_sb_buffer
);
1474 else if (!(ctx
->options
& E2F_OPT_READONLY
)) {
1475 jsb
= journal
->j_superblock
;
1476 jsb
->s_sequence
= htonl(journal
->j_tail_sequence
);
1478 jsb
->s_start
= 0; /* this marks the journal as empty */
1479 e2fsck_journal_sb_csum_set(journal
, jsb
);
1480 mark_buffer_dirty(journal
->j_sb_buffer
);
1482 brelse(journal
->j_sb_buffer
);
1484 if (ctx
->journal_io
) {
1485 if (ctx
->fs
&& ctx
->fs
->io
!= ctx
->journal_io
)
1486 io_channel_close(ctx
->journal_io
);
1487 ctx
->journal_io
= 0;
1490 #ifndef USE_INODE_IO
1491 if (journal
->j_inode
)
1492 ext2fs_free_mem(&journal
->j_inode
);
1494 if (journal
->j_fs_dev
)
1495 ext2fs_free_mem(&journal
->j_fs_dev
);
1496 ext2fs_free_mem(&journal
);
1500 * This function makes sure that the superblock fields regarding the
1501 * journal are consistent.
1503 errcode_t
e2fsck_check_ext3_journal(e2fsck_t ctx
)
1505 struct ext2_super_block
*sb
= ctx
->fs
->super
;
1507 int recover
= ext2fs_has_feature_journal_needs_recovery(ctx
->fs
->super
);
1508 struct problem_context pctx
;
1510 int reset
= 0, force_fsck
= 0;
1513 /* If we don't have any journal features, don't do anything more */
1514 if (!ext2fs_has_feature_journal(sb
) &&
1515 !recover
&& sb
->s_journal_inum
== 0 && sb
->s_journal_dev
== 0 &&
1516 uuid_is_null(sb
->s_journal_uuid
))
1519 clear_problem_context(&pctx
);
1520 pctx
.num
= sb
->s_journal_inum
;
1522 retval
= e2fsck_get_journal(ctx
, &journal
);
1524 if ((retval
== EXT2_ET_BAD_INODE_NUM
) ||
1525 (retval
== EXT2_ET_BAD_BLOCK_NUM
) ||
1526 (retval
== EXT2_ET_JOURNAL_TOO_SMALL
) ||
1527 (retval
== EXT2_ET_NO_JOURNAL
))
1528 return e2fsck_journal_fix_bad_inode(ctx
, &pctx
);
1532 retval
= e2fsck_journal_load(journal
);
1534 if ((retval
== EXT2_ET_CORRUPT_JOURNAL_SB
) ||
1535 ((retval
== EXT2_ET_UNSUPP_FEATURE
) &&
1536 (!fix_problem(ctx
, PR_0_JOURNAL_UNSUPP_INCOMPAT
,
1538 ((retval
== EXT2_ET_RO_UNSUPP_FEATURE
) &&
1539 (!fix_problem(ctx
, PR_0_JOURNAL_UNSUPP_ROCOMPAT
,
1541 ((retval
== EXT2_ET_JOURNAL_UNSUPP_VERSION
) &&
1542 (!fix_problem(ctx
, PR_0_JOURNAL_UNSUPP_VERSION
, &pctx
))))
1543 retval
= e2fsck_journal_fix_corrupt_super(ctx
, journal
,
1545 e2fsck_journal_release(ctx
, journal
, 0, 1);
1550 * We want to make the flags consistent here. We will not leave with
1551 * needs_recovery set but has_journal clear. We can't get in a loop
1552 * with -y, -n, or -p, only if a user isn't making up their mind.
1555 if (!ext2fs_has_feature_journal(sb
)) {
1556 recover
= ext2fs_has_feature_journal_needs_recovery(sb
);
1557 if (fix_problem(ctx
, PR_0_JOURNAL_HAS_JOURNAL
, &pctx
)) {
1559 !fix_problem(ctx
, PR_0_JOURNAL_RECOVER_SET
, &pctx
))
1560 goto no_has_journal
;
1562 * Need a full fsck if we are releasing a
1563 * journal stored on a reserved inode.
1565 force_fsck
= recover
||
1566 (sb
->s_journal_inum
< EXT2_FIRST_INODE(sb
));
1567 /* Clear all of the journal fields */
1568 sb
->s_journal_inum
= 0;
1569 sb
->s_journal_dev
= 0;
1570 memset(sb
->s_journal_uuid
, 0,
1571 sizeof(sb
->s_journal_uuid
));
1572 e2fsck_clear_recover(ctx
, force_fsck
);
1573 } else if (!(ctx
->options
& E2F_OPT_READONLY
)) {
1574 ext2fs_set_feature_journal(sb
);
1575 ctx
->fs
->flags
&= ~EXT2_FLAG_MASTER_SB_ONLY
;
1576 ext2fs_mark_super_dirty(ctx
->fs
);
1580 if (ext2fs_has_feature_journal(sb
) &&
1581 !ext2fs_has_feature_journal_needs_recovery(sb
) &&
1582 journal
->j_superblock
->s_start
!= 0) {
1583 /* Print status information */
1584 fix_problem(ctx
, PR_0_JOURNAL_RECOVERY_CLEAR
, &pctx
);
1585 if (ctx
->superblock
)
1586 problem
= PR_0_JOURNAL_RUN_DEFAULT
;
1588 problem
= PR_0_JOURNAL_RUN
;
1589 if (fix_problem(ctx
, problem
, &pctx
)) {
1590 ctx
->options
|= E2F_OPT_FORCE
;
1591 ext2fs_set_feature_journal_needs_recovery(sb
);
1592 ext2fs_mark_super_dirty(ctx
->fs
);
1593 } else if (fix_problem(ctx
,
1594 PR_0_JOURNAL_RESET_JOURNAL
, &pctx
)) {
1596 sb
->s_state
&= ~EXT2_VALID_FS
;
1597 ext2fs_mark_super_dirty(ctx
->fs
);
1600 * If the user answers no to the above question, we
1601 * ignore the fact that journal apparently has data;
1602 * accidentally replaying over valid data would be far
1603 * worse than skipping a questionable recovery.
1605 * XXX should we abort with a fatal error here? What
1606 * will the ext3 kernel code do if a filesystem with
1607 * !NEEDS_RECOVERY but with a non-zero
1608 * journal->j_superblock->s_start is mounted?
1613 * If we don't need to do replay the journal, check to see if
1614 * the journal's errno is set; if so, we need to mark the file
1615 * system as being corrupt and clear the journal's s_errno.
1617 if (!ext2fs_has_feature_journal_needs_recovery(sb
) &&
1618 journal
->j_superblock
->s_errno
) {
1619 ctx
->fs
->super
->s_state
|= EXT2_ERROR_FS
;
1620 ext2fs_mark_super_dirty(ctx
->fs
);
1621 journal
->j_superblock
->s_errno
= 0;
1622 e2fsck_journal_sb_csum_set(journal
, journal
->j_superblock
);
1623 mark_buffer_dirty(journal
->j_sb_buffer
);
1626 e2fsck_journal_release(ctx
, journal
, reset
, 0);
1630 static errcode_t
recover_ext3_journal(e2fsck_t ctx
)
1632 struct problem_context pctx
;
1636 clear_problem_context(&pctx
);
1638 retval
= jbd2_journal_init_revoke_record_cache();
1642 retval
= jbd2_journal_init_revoke_table_cache();
1646 retval
= e2fsck_get_journal(ctx
, &journal
);
1650 retval
= e2fsck_journal_load(journal
);
1654 retval
= jbd2_journal_init_revoke(journal
, 1024);
1658 retval
= -jbd2_journal_recover(journal
);
1662 if (journal
->j_failed_commit
) {
1663 pctx
.ino
= journal
->j_failed_commit
;
1664 fix_problem(ctx
, PR_0_JNL_TXN_CORRUPT
, &pctx
);
1665 journal
->j_superblock
->s_errno
= -EINVAL
;
1666 mark_buffer_dirty(journal
->j_sb_buffer
);
1669 journal
->j_tail_sequence
= journal
->j_transaction_sequence
;
1672 jbd2_journal_destroy_revoke(journal
);
1673 jbd2_journal_destroy_revoke_record_cache();
1674 jbd2_journal_destroy_revoke_table_cache();
1675 e2fsck_journal_release(ctx
, journal
, 1, 0);
1679 errcode_t
e2fsck_run_ext3_journal(e2fsck_t ctx
)
1681 io_manager io_ptr
= ctx
->fs
->io
->manager
;
1682 int blocksize
= ctx
->fs
->blocksize
;
1683 errcode_t retval
, recover_retval
;
1685 unsigned long long kbytes_written
= 0;
1687 printf(_("%s: recovering journal\n"), ctx
->device_name
);
1688 if (ctx
->options
& E2F_OPT_READONLY
) {
1689 printf(_("%s: won't do journal recovery while read-only\n"),
1691 return EXT2_ET_FILE_RO
;
1694 if (ctx
->fs
->flags
& EXT2_FLAG_DIRTY
)
1695 ext2fs_flush(ctx
->fs
); /* Force out any modifications */
1697 recover_retval
= recover_ext3_journal(ctx
);
1700 * Reload the filesystem context to get up-to-date data from disk
1701 * because journal recovery will change the filesystem under us.
1703 if (ctx
->fs
->super
->s_kbytes_written
&&
1704 ctx
->fs
->io
->manager
->get_stats
)
1705 ctx
->fs
->io
->manager
->get_stats(ctx
->fs
->io
, &stats
);
1706 if (stats
&& stats
->bytes_written
)
1707 kbytes_written
= stats
->bytes_written
>> 10;
1709 ext2fs_mmp_stop(ctx
->fs
);
1710 ext2fs_free(ctx
->fs
);
1711 retval
= ext2fs_open(ctx
->filesystem_name
, ctx
->openfs_flags
,
1712 ctx
->superblock
, blocksize
, io_ptr
,
1715 com_err(ctx
->program_name
, retval
,
1716 _("while trying to re-open %s"),
1718 fatal_error(ctx
, 0);
1720 ctx
->fs
->priv_data
= ctx
;
1721 ctx
->fs
->now
= ctx
->now
;
1722 ctx
->fs
->flags
|= EXT2_FLAG_MASTER_SB_ONLY
;
1723 ctx
->fs
->super
->s_kbytes_written
+= kbytes_written
;
1725 /* Set the superblock flags */
1726 e2fsck_clear_recover(ctx
, recover_retval
!= 0);
1729 * Do one last sanity check, and propagate journal->s_errno to
1730 * the EXT2_ERROR_FS flag in the fs superblock if needed.
1732 retval
= e2fsck_check_ext3_journal(ctx
);
1733 return retval
? retval
: recover_retval
;
1737 * This function will move the journal inode from a visible file in
1738 * the filesystem directory hierarchy to the reserved inode if necessary.
1740 static const char * const journal_names
[] = {
1741 ".journal", "journal", ".journal.dat", "journal.dat", 0 };
1743 void e2fsck_move_ext3_journal(e2fsck_t ctx
)
1745 struct ext2_super_block
*sb
= ctx
->fs
->super
;
1746 struct problem_context pctx
;
1747 struct ext2_inode inode
;
1748 ext2_filsys fs
= ctx
->fs
;
1751 const char * const * cpp
;
1755 clear_problem_context(&pctx
);
1758 * If the filesystem is opened read-only, or there is no
1759 * journal, then do nothing.
1761 if ((ctx
->options
& E2F_OPT_READONLY
) ||
1762 (sb
->s_journal_inum
== 0) ||
1763 !ext2fs_has_feature_journal(sb
))
1767 * Read in the journal inode
1769 if (ext2fs_read_inode(fs
, sb
->s_journal_inum
, &inode
) != 0)
1773 * If it's necessary to backup the journal inode, do so.
1775 if ((sb
->s_jnl_backup_type
== 0) ||
1776 ((sb
->s_jnl_backup_type
== EXT3_JNL_BACKUP_BLOCKS
) &&
1777 memcmp(inode
.i_block
, sb
->s_jnl_blocks
, EXT2_N_BLOCKS
*4))) {
1778 if (fix_problem(ctx
, PR_0_BACKUP_JNL
, &pctx
)) {
1779 memcpy(sb
->s_jnl_blocks
, inode
.i_block
,
1781 sb
->s_jnl_blocks
[15] = inode
.i_size_high
;
1782 sb
->s_jnl_blocks
[16] = inode
.i_size
;
1783 sb
->s_jnl_backup_type
= EXT3_JNL_BACKUP_BLOCKS
;
1784 ext2fs_mark_super_dirty(fs
);
1785 fs
->flags
&= ~EXT2_FLAG_MASTER_SB_ONLY
;
1790 * If the journal is already the hidden inode, then do nothing
1792 if (sb
->s_journal_inum
== EXT2_JOURNAL_INO
)
1796 * The journal inode had better have only one link and not be readable.
1798 if (inode
.i_links_count
!= 1)
1802 * If the filesystem is mounted, or we can't tell whether
1803 * or not it's mounted, do nothing.
1805 retval
= ext2fs_check_if_mounted(ctx
->filesystem_name
, &mount_flags
);
1806 if (retval
|| (mount_flags
& EXT2_MF_MOUNTED
))
1810 * If we can't find the name of the journal inode, then do
1813 for (cpp
= journal_names
; *cpp
; cpp
++) {
1814 retval
= ext2fs_lookup(fs
, EXT2_ROOT_INO
, *cpp
,
1815 strlen(*cpp
), 0, &ino
);
1816 if ((retval
== 0) && (ino
== sb
->s_journal_inum
))
1822 /* We need the inode bitmap to be loaded */
1823 retval
= ext2fs_read_bitmaps(fs
);
1828 if (!fix_problem(ctx
, PR_0_MOVE_JOURNAL
, &pctx
))
1832 * OK, we've done all the checks, let's actually move the
1833 * journal inode. Errors at this point mean we need to force
1834 * an ext2 filesystem check.
1836 if ((retval
= ext2fs_unlink(fs
, EXT2_ROOT_INO
, *cpp
, ino
, 0)) != 0)
1838 if ((retval
= ext2fs_write_inode(fs
, EXT2_JOURNAL_INO
, &inode
)) != 0)
1840 sb
->s_journal_inum
= EXT2_JOURNAL_INO
;
1841 ext2fs_mark_super_dirty(fs
);
1842 fs
->flags
&= ~EXT2_FLAG_MASTER_SB_ONLY
;
1843 inode
.i_links_count
= 0;
1844 inode
.i_dtime
= ctx
->now
;
1845 if ((retval
= ext2fs_write_inode(fs
, ino
, &inode
)) != 0)
1848 group
= ext2fs_group_of_ino(fs
, ino
);
1849 ext2fs_unmark_inode_bitmap2(fs
->inode_map
, ino
);
1850 ext2fs_mark_ib_dirty(fs
);
1851 ext2fs_bg_free_inodes_count_set(fs
, group
, ext2fs_bg_free_inodes_count(fs
, group
) + 1);
1852 ext2fs_group_desc_csum_set(fs
, group
);
1853 fs
->super
->s_free_inodes_count
++;
1857 pctx
.errcode
= retval
;
1858 fix_problem(ctx
, PR_0_ERR_MOVE_JOURNAL
, &pctx
);
1859 fs
->super
->s_state
&= ~EXT2_VALID_FS
;
1860 ext2fs_mark_super_dirty(fs
);
1865 * This function makes sure the superblock hint for the external
1866 * journal is correct.
1868 int e2fsck_fix_ext3_journal_hint(e2fsck_t ctx
)
1870 struct ext2_super_block
*sb
= ctx
->fs
->super
;
1871 struct problem_context pctx
;
1872 char uuid
[37], *journal_name
;
1875 if (!ext2fs_has_feature_journal(sb
) ||
1876 uuid_is_null(sb
->s_journal_uuid
))
1879 uuid_unparse(sb
->s_journal_uuid
, uuid
);
1880 journal_name
= blkid_get_devname(ctx
->blkid
, "UUID", uuid
);
1884 if (stat(journal_name
, &st
) < 0) {
1889 if (st
.st_rdev
!= sb
->s_journal_dev
) {
1890 clear_problem_context(&pctx
);
1891 pctx
.num
= st
.st_rdev
;
1892 if (fix_problem(ctx
, PR_0_EXTERNAL_JOURNAL_HINT
, &pctx
)) {
1893 sb
->s_journal_dev
= st
.st_rdev
;
1894 ext2fs_mark_super_dirty(ctx
->fs
);