journal_file_read_tail_timestamp() is called unconditionally in
next_beyond_location() for every file on every iteration step,
resulting in O(N x files) volatile mmap reads. For large queries
like 'journalctl -n
1000000' this makes the command unusably slow
(~5 minutes on systems with many journal files).
Rather than suppressing the call entirely (which would make the
inotify path fully load-bearing for cross-boot ordering), rate-limit
it to at most once per second per file. This reduces the overhead
from O(N x files) to O(T x files) where T is the iteration time in
seconds, while still providing periodic refresh as a fallback for
any missed inotify events and keeping cross-boot ordering
reasonably fresh.
Embed a RateLimit struct in JournalFile for this purpose.
Measured improvement on a real system: 5:24 -> 2:39 (-51%) for
'journalctl -n
1000000'.
Fixes: #42521
Regression introduced in: #26355 (
34af74946e)
Signed-off-by: dongshengyuan <dongshengyuan@uniontech.com>
.strict_order = FLAGS_SET(file_flags, JOURNAL_STRICT_ORDER),
.newest_boot_id_prioq_idx = PRIOQ_IDX_NULL,
.last_direction = _DIRECTION_INVALID,
+ .tail_timestamp_ratelimit = { .interval = USEC_PER_SEC, .burst = 1 },
};
if (f->fd < 0) {
#include "sd-forward.h"
#include "journal-def.h"
#include "mmap-cache.h"
+#include "ratelimit.h"
#include "sparse-endian.h"
typedef struct JournalMetrics {
unsigned newest_boot_id_prioq_idx;
uint64_t newest_entry_offset;
uint8_t newest_state;
+ RateLimit tail_timestamp_ratelimit; /* rate-limits journal_file_read_tail_timestamp() calls during iteration */
} JournalFile;
typedef enum JournalFileFlags {
assert(j);
assert(f);
- (void) journal_file_read_tail_timestamp(j, f);
+ /* Rate-limit tail timestamp refreshes during iteration. Calling this unconditionally is
+ * O(N x files) volatile mmap overhead that makes large 'journalctl -n N' queries unusably
+ * slow. Periodic refresh keeps cross-boot ordering reasonably fresh and provides a fallback
+ * for any missed inotify events. */
+ if (ratelimit_below(&f->tail_timestamp_ratelimit))
+ (void) journal_file_read_tail_timestamp(j, f);
n_entries = le64toh(f->header->n_entries);