From: Yu Watanabe Date: Wed, 27 Sep 2023 21:04:53 +0000 (+0900) Subject: journalctl: find boot ID more gracefully in corrupted journal X-Git-Tag: v255-rc1~369 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=185e201690bd1fff63f99239b2ee7585557445bf;p=thirdparty%2Fsystemd.git journalctl: find boot ID more gracefully in corrupted journal In discover_next_boot(), first we find a new boot ID based on the value stored in the entry object. Then, find the tail (or head when we are going upwards) entry of the boot based on the _BOOT_ID= field data. If boot IDs of an entry in the entry object and _BOOT_ID field data are inconsistent, which may happen on corrupted journal, then previously discover_next_boot() failed with -ENODATA. This makes the function check if the two boot IDs in each entry are consistent, and skip the entry if not. Fixes the failure of `journalctl -b -1` for 'truncated' journal: https://github.com/systemd/systemd/pull/29334#issuecomment-1736567951 --- diff --git a/src/shared/logs-show.c b/src/shared/logs-show.c index 77452a81bad..c90400b8195 100644 --- a/src/shared/logs-show.c +++ b/src/shared/logs-show.c @@ -1836,7 +1836,7 @@ static int discover_next_boot( bool advance_older, BootId *ret) { - BootId boot; + _cleanup_set_free_ Set *broken_ids = NULL; int r; assert(j); @@ -1853,7 +1853,10 @@ static int discover_next_boot( * we can actually advance to a *different* boot. */ sd_journal_flush_matches(j); - do { + for (;;) { + sd_id128_t *id_dup; + BootId boot; + r = sd_journal_step_one(j, !advance_older); if (r < 0) return r; @@ -1874,38 +1877,106 @@ static int discover_next_boot( * speed things up, but let's not trust that it is complete, and hence, manually advance as * necessary. */ - } while (sd_id128_equal(boot.id, previous_boot_id)); + if (!sd_id128_is_null(previous_boot_id) && sd_id128_equal(boot.id, previous_boot_id)) + continue; - r = sd_journal_get_realtime_usec(j, &boot.first_usec); - if (r < 0) - return r; + if (set_contains(broken_ids, &boot.id)) + continue; - /* Now seek to the last occurrence of this boot ID. */ - r = add_match_boot_id(j, boot.id); - if (r < 0) - return r; + /* Yay, we found a new boot ID from the entry object. Let's check there exist corresponding + * entries matching with the _BOOT_ID= data. */ - if (advance_older) - r = sd_journal_seek_head(j); - else - r = sd_journal_seek_tail(j); - if (r < 0) - return r; + r = add_match_boot_id(j, boot.id); + if (r < 0) + return r; - r = sd_journal_step_one(j, advance_older); - if (r < 0) - return r; - if (r == 0) - return log_debug_errno(SYNTHETIC_ERRNO(ENODATA), - "Whoopsie! We found a boot ID but can't read its last entry."); /* This shouldn't happen. We just came from this very boot ID. */ + /* First, seek to the first (or the last when we are going upwards) occurrence of this boot ID. + * You may think this is redundant. Yes, that's redundant unless the journal is corrupted. + * But when the journal is corrupted, especially, badly 'truncated', then the below may fail. + * See https://github.com/systemd/systemd/pull/29334#issuecomment-1736567951. */ + if (advance_older) + r = sd_journal_seek_tail(j); + else + r = sd_journal_seek_head(j); + if (r < 0) + return r; - r = sd_journal_get_realtime_usec(j, &boot.last_usec); - if (r < 0) - return r; + r = sd_journal_step_one(j, 0); + if (r < 0) + return r; + if (r == 0) { + log_debug("Whoopsie! We found a boot ID %s but can't read its first entry. " + "The journal seems to be corrupted. Ignoring the boot ID.", + SD_ID128_TO_STRING(boot.id)); + goto try_again; + } - sd_journal_flush_matches(j); - *ret = boot; - return 1; + r = sd_journal_get_realtime_usec(j, &boot.first_usec); + if (r < 0) + return r; + + /* Next, seek to the last occurrence of this boot ID. */ + if (advance_older) + r = sd_journal_seek_head(j); + else + r = sd_journal_seek_tail(j); + if (r < 0) + return r; + + r = sd_journal_step_one(j, 0); + if (r < 0) + return r; + if (r == 0) { + log_debug("Whoopsie! We found a boot ID %s but can't read its last entry. " + "The journal seems to be corrupted. Ignoring the boot ID.", + SD_ID128_TO_STRING(boot.id)); + goto try_again; + } + + r = sd_journal_get_realtime_usec(j, &boot.last_usec); + if (r < 0) + return r; + + sd_journal_flush_matches(j); + *ret = boot; + return 1; + + try_again: + /* Save the bad boot ID. */ + id_dup = newdup(sd_id128_t, &boot.id, 1); + if (!id_dup) + return -ENOMEM; + + r = set_ensure_consume(&broken_ids, &id128_hash_ops_free, id_dup); + if (r < 0) + return r; + + /* Move to the previous position again. */ + sd_journal_flush_matches(j); + + if (!sd_id128_is_null(previous_boot_id)) { + r = add_match_boot_id(j, previous_boot_id); + if (r < 0) + return r; + } + + if (advance_older) + r = sd_journal_seek_head(j); + else + r = sd_journal_seek_tail(j); + if (r < 0) + return r; + + r = sd_journal_step_one(j, 0); + if (r < 0) + return r; + if (r == 0) + return log_debug_errno(SYNTHETIC_ERRNO(ENODATA), + "Whoopsie! Cannot seek to the last entry of boot %s.", + SD_ID128_TO_STRING(previous_boot_id)); + + sd_journal_flush_matches(j); + } } int journal_find_boot_by_id(sd_journal *j, sd_id128_t boot_id) { diff --git a/test/units/testsuite-04.journal.sh b/test/units/testsuite-04.journal.sh index 2b3929324cc..1e105a05664 100755 --- a/test/units/testsuite-04.journal.sh +++ b/test/units/testsuite-04.journal.sh @@ -233,6 +233,6 @@ done < <(find /test-journals/no-rtc -name "*.zst") journalctl --directory="$JOURNAL_DIR" --list-boots --output=json >/tmp/lb1 diff -u /tmp/lb1 - <<'EOF' -[{"index":-3,"boot_id":"5ea5fc4f82a14186b5332a788ef9435e","first_entry":1666569600994371,"last_entry":1666584266223608},{"index":-2,"boot_id":"bea6864f21ad4c9594c04a99d89948b0","first_entry":1666584266731785,"last_entry":1666584347230411},{"index":-1,"boot_id":"4c708e1fd0744336be16f3931aa861fb","first_entry":1666584348378271,"last_entry":1666584354649355},{"index":0,"boot_id":"35e8501129134edd9df5267c49f744a4","first_entry":1666584356661527,"last_entry":1666584438086856}] +[{"index":-3,"boot_id":"5ea5fc4f82a14186b5332a788ef9435e","first_entry":1666569600994371,"last_entry":1666584266223608},{"index":-2,"boot_id":"bea6864f21ad4c9594c04a99d89948b0","first_entry":1666569601005945,"last_entry":1666584347230411},{"index":-1,"boot_id":"4c708e1fd0744336be16f3931aa861fb","first_entry":1666569601017222,"last_entry":1666584354649355},{"index":0,"boot_id":"35e8501129134edd9df5267c49f744a4","first_entry":1666569601009823,"last_entry":1666584438086856}] EOF rm -rf "$JOURNAL_DIR" /tmp/lb1