]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
sd-journal: rate-limit tail timestamp refresh during iteration
authordongshengyuan <545258830@qq.com>
Wed, 24 Jun 2026 05:01:32 +0000 (13:01 +0800)
committerLuca Boccassi <luca.boccassi@gmail.com>
Tue, 30 Jun 2026 12:46:05 +0000 (13:46 +0100)
journal_file_read_tail_timestamp() is called unconditionally in
next_beyond_location() for every file on every iteration step,
resulting in O(N x files) volatile mmap reads. For large queries
like 'journalctl -n 1000000' this makes the command unusably slow
(~5 minutes on systems with many journal files).

Rather than suppressing the call entirely (which would make the
inotify path fully load-bearing for cross-boot ordering), rate-limit
it to at most once per second per file. This reduces the overhead
from O(N x files) to O(T x files) where T is the iteration time in
seconds, while still providing periodic refresh as a fallback for
any missed inotify events and keeping cross-boot ordering
reasonably fresh.

Embed a RateLimit struct in JournalFile for this purpose.

Measured improvement on a real system: 5:24 -> 2:39 (-51%) for
'journalctl -n 1000000'.

Fixes: #42521
Regression introduced in: #26355 (34af74946e)

Signed-off-by: dongshengyuan <dongshengyuan@uniontech.com>
src/libsystemd/sd-journal/journal-file.c
src/libsystemd/sd-journal/journal-file.h
src/libsystemd/sd-journal/sd-journal.c

index 41e06086d209e8dfdeddcdfcd93fe61e7f44a720..93b2c90c15214f47466b2c5731bc9cff37b3bd45 100644 (file)
@@ -4172,6 +4172,7 @@ int journal_file_open(
                 .strict_order = FLAGS_SET(file_flags, JOURNAL_STRICT_ORDER),
                 .newest_boot_id_prioq_idx = PRIOQ_IDX_NULL,
                 .last_direction = _DIRECTION_INVALID,
+                .tail_timestamp_ratelimit = { .interval = USEC_PER_SEC, .burst = 1 },
         };
 
         if (f->fd < 0) {
index 8499fa2b662f937f2ba5085e055cfaec51650e6c..03a65644bf20cc514f90d3618556feb3f63a773e 100644 (file)
@@ -8,6 +8,7 @@
 #include "sd-forward.h"
 #include "journal-def.h"
 #include "mmap-cache.h"
+#include "ratelimit.h"
 #include "sparse-endian.h"
 
 typedef struct JournalMetrics {
@@ -107,6 +108,7 @@ typedef struct JournalFile {
         unsigned newest_boot_id_prioq_idx;
         uint64_t newest_entry_offset;
         uint8_t newest_state;
+        RateLimit tail_timestamp_ratelimit; /* rate-limits journal_file_read_tail_timestamp() calls during iteration */
 } JournalFile;
 
 typedef enum JournalFileFlags {
index 01bca7e82a19e2fc40a1d6de0ec761bd454f1eae..d67d269f1f690576ad74e07f676041ade3e26bdf 100644 (file)
@@ -1004,7 +1004,12 @@ static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direc
         assert(j);
         assert(f);
 
-        (void) journal_file_read_tail_timestamp(j, f);
+        /* Rate-limit tail timestamp refreshes during iteration. Calling this unconditionally is
+         * O(N x files) volatile mmap overhead that makes large 'journalctl -n N' queries unusably
+         * slow. Periodic refresh keeps cross-boot ordering reasonably fresh and provides a fallback
+         * for any missed inotify events. */
+        if (ratelimit_below(&f->tail_timestamp_ratelimit))
+                (void) journal_file_read_tail_timestamp(j, f);
 
         n_entries = le64toh(f->header->n_entries);