]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/shared/logs-show.c
journalctl: find boot ID more gracefully in corrupted journal
[thirdparty/systemd.git] / src / shared / logs-show.c
index 77452a81bad92e17df08d832385423b6dcc4e6ac..c90400b81955c1cfe39d6e6211bccb0c9c44e788 100644 (file)
@@ -1836,7 +1836,7 @@ static int discover_next_boot(
                 bool advance_older,
                 BootId *ret) {
 
-        BootId boot;
+        _cleanup_set_free_ Set *broken_ids = NULL;
         int r;
 
         assert(j);
@@ -1853,7 +1853,10 @@ static int discover_next_boot(
          * we can actually advance to a *different* boot. */
         sd_journal_flush_matches(j);
 
-        do {
+        for (;;) {
+                sd_id128_t *id_dup;
+                BootId boot;
+
                 r = sd_journal_step_one(j, !advance_older);
                 if (r < 0)
                         return r;
@@ -1874,38 +1877,106 @@ static int discover_next_boot(
                  * speed things up, but let's not trust that it is complete, and hence, manually advance as
                  * necessary. */
 
-        } while (sd_id128_equal(boot.id, previous_boot_id));
+                if (!sd_id128_is_null(previous_boot_id) && sd_id128_equal(boot.id, previous_boot_id))
+                        continue;
 
-        r = sd_journal_get_realtime_usec(j, &boot.first_usec);
-        if (r < 0)
-                return r;
+                if (set_contains(broken_ids, &boot.id))
+                        continue;
 
-        /* Now seek to the last occurrence of this boot ID. */
-        r = add_match_boot_id(j, boot.id);
-        if (r < 0)
-                return r;
+                /* Yay, we found a new boot ID from the entry object. Let's check there exist corresponding
+                 * entries matching with the _BOOT_ID= data. */
 
-        if (advance_older)
-                r = sd_journal_seek_head(j);
-        else
-                r = sd_journal_seek_tail(j);
-        if (r < 0)
-                return r;
+                r = add_match_boot_id(j, boot.id);
+                if (r < 0)
+                        return r;
 
-        r = sd_journal_step_one(j, advance_older);
-        if (r < 0)
-                return r;
-        if (r == 0)
-                return log_debug_errno(SYNTHETIC_ERRNO(ENODATA),
-                                       "Whoopsie! We found a boot ID but can't read its last entry."); /* This shouldn't happen. We just came from this very boot ID. */
+                /* First, seek to the first (or the last when we are going upwards) occurrence of this boot ID.
+                 * You may think this is redundant. Yes, that's redundant unless the journal is corrupted.
+                 * But when the journal is corrupted, especially, badly 'truncated', then the below may fail.
+                 * See https://github.com/systemd/systemd/pull/29334#issuecomment-1736567951. */
+                if (advance_older)
+                        r = sd_journal_seek_tail(j);
+                else
+                        r = sd_journal_seek_head(j);
+                if (r < 0)
+                        return r;
 
-        r = sd_journal_get_realtime_usec(j, &boot.last_usec);
-        if (r < 0)
-                return r;
+                r = sd_journal_step_one(j, 0);
+                if (r < 0)
+                        return r;
+                if (r == 0) {
+                        log_debug("Whoopsie! We found a boot ID %s but can't read its first entry. "
+                                  "The journal seems to be corrupted. Ignoring the boot ID.",
+                                  SD_ID128_TO_STRING(boot.id));
+                        goto try_again;
+                }
 
-        sd_journal_flush_matches(j);
-        *ret = boot;
-        return 1;
+                r = sd_journal_get_realtime_usec(j, &boot.first_usec);
+                if (r < 0)
+                        return r;
+
+                /* Next, seek to the last occurrence of this boot ID. */
+                if (advance_older)
+                        r = sd_journal_seek_head(j);
+                else
+                        r = sd_journal_seek_tail(j);
+                if (r < 0)
+                        return r;
+
+                r = sd_journal_step_one(j, 0);
+                if (r < 0)
+                        return r;
+                if (r == 0) {
+                        log_debug("Whoopsie! We found a boot ID %s but can't read its last entry. "
+                                  "The journal seems to be corrupted. Ignoring the boot ID.",
+                                  SD_ID128_TO_STRING(boot.id));
+                        goto try_again;
+                }
+
+                r = sd_journal_get_realtime_usec(j, &boot.last_usec);
+                if (r < 0)
+                        return r;
+
+                sd_journal_flush_matches(j);
+                *ret = boot;
+                return 1;
+
+        try_again:
+                /* Save the bad boot ID. */
+                id_dup = newdup(sd_id128_t, &boot.id, 1);
+                if (!id_dup)
+                        return -ENOMEM;
+
+                r = set_ensure_consume(&broken_ids, &id128_hash_ops_free, id_dup);
+                if (r < 0)
+                        return r;
+
+                /* Move to the previous position again. */
+                sd_journal_flush_matches(j);
+
+                if (!sd_id128_is_null(previous_boot_id)) {
+                        r = add_match_boot_id(j, previous_boot_id);
+                        if (r < 0)
+                                return r;
+                }
+
+                if (advance_older)
+                        r = sd_journal_seek_head(j);
+                else
+                        r = sd_journal_seek_tail(j);
+                if (r < 0)
+                        return r;
+
+                r = sd_journal_step_one(j, 0);
+                if (r < 0)
+                        return r;
+                if (r == 0)
+                        return log_debug_errno(SYNTHETIC_ERRNO(ENODATA),
+                                               "Whoopsie! Cannot seek to the last entry of boot %s.",
+                                               SD_ID128_TO_STRING(previous_boot_id));
+
+                sd_journal_flush_matches(j);
+        }
 }
 
 int journal_find_boot_by_id(sd_journal *j, sd_id128_t boot_id) {