]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/journal/journal-file.c
tree-wide: make use of new relative time events in sd-event.h
[thirdparty/systemd.git] / src / journal / journal-file.c
index 91ca53e5ebedbb1d4746455ac2c2e102068ca9f5..feff3bd9f03669033185088766b1a9ad2f3ef0d6 100644 (file)
@@ -16,6 +16,7 @@
 #include "btrfs-util.h"
 #include "chattr-util.h"
 #include "compress.h"
+#include "env-util.h"
 #include "fd-util.h"
 #include "format-util.h"
 #include "fs-util.h"
 #define MIN_COMPRESS_THRESHOLD (8ULL)
 
 /* This is the minimum journal file size */
-#define JOURNAL_FILE_SIZE_MIN (512ULL*1024ULL)                 /* 512 KiB */
+#define JOURNAL_FILE_SIZE_MIN (512 * 1024ULL)             /* 512 KiB */
 
 /* These are the lower and upper bounds if we deduce the max_use value
  * from the file system size */
-#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
-#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
+#define MAX_USE_LOWER (1 * 1024 * 1024ULL)                /* 1 MiB */
+#define MAX_USE_UPPER (4 * 1024 * 1024 * 1024ULL)         /* 4 GiB */
 
-/* This is the default minimal use limit, how much we'll use even if keep_free suggests otherwise. */
-#define DEFAULT_MIN_USE (1ULL*1024ULL*1024ULL)                 /* 1 MiB */
+/* Those are the lower and upper bounds for the minimal use limit,
+ * i.e. how much we'll use even if keep_free suggests otherwise. */
+#define MIN_USE_LOW (1 * 1024 * 1024ULL)                  /* 1 MiB */
+#define MIN_USE_HIGH (16 * 1024 * 1024ULL)                /* 16 MiB */
 
 /* This is the upper bound if we deduce max_size from max_use */
-#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
+#define MAX_SIZE_UPPER (128 * 1024 * 1024ULL)             /* 128 MiB */
 
 /* This is the upper bound if we deduce the keep_free value from the
  * file system size */
-#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
+#define KEEP_FREE_UPPER (4 * 1024 * 1024 * 1024ULL)       /* 4 GiB */
 
 /* This is the keep_free value when we can't determine the system
  * size */
-#define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
+#define DEFAULT_KEEP_FREE (1024 * 1024ULL)                /* 1 MB */
 
 /* This is the default maximum number of journal files to keep around. */
-#define DEFAULT_N_MAX_FILES (100)
+#define DEFAULT_N_MAX_FILES 100
 
 /* n_data was the first entry we added after the initial file format design */
 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
@@ -71,7 +74,7 @@
 #define CHAIN_CACHE_MAX 20
 
 /* How much to increase the journal file size at once each time we allocate something new. */
-#define FILE_SIZE_INCREASE (8ULL*1024ULL*1024ULL)              /* 8MB */
+#define FILE_SIZE_INCREASE (8 * 1024 * 1024ULL)          /* 8MB */
 
 /* Reread fstat() of the file for detecting deletions at least this often */
 #define LAST_STAT_REFRESH_USEC (5*USEC_PER_SEC)
@@ -79,6 +82,9 @@
 /* The mmap context to use for the header we pick as one above the last defined typed */
 #define CONTEXT_HEADER _OBJECT_TYPE_MAX
 
+/* Longest hash chain to rotate after */
+#define HASH_CHAIN_DEPTH_MAX 100
+
 #ifdef __clang__
 #  pragma GCC diagnostic ignored "-Waddress-of-packed-member"
 #endif
@@ -386,7 +392,7 @@ JournalFile* journal_file_close(JournalFile *f) {
 
         ordered_hashmap_free_free(f->chain_cache);
 
-#if HAVE_XZ || HAVE_LZ4
+#if HAVE_COMPRESSION
         free(f->compress_buffer);
 #endif
 
@@ -417,7 +423,9 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) {
 
         h.incompatible_flags |= htole32(
                 f->compress_xz * HEADER_INCOMPATIBLE_COMPRESSED_XZ |
-                f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4);
+                f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4 |
+                f->compress_zstd * HEADER_INCOMPATIBLE_COMPRESSED_ZSTD |
+                f->keyed_hash * HEADER_INCOMPATIBLE_KEYED_HASH);
 
         h.compatible_flags = htole32(
                 f->seal * HEADER_COMPATIBLE_SEALED);
@@ -443,7 +451,6 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) {
 }
 
 static int journal_file_refresh_header(JournalFile *f) {
-        sd_id128_t boot_id;
         int r;
 
         assert(f);
@@ -456,12 +463,10 @@ static int journal_file_refresh_header(JournalFile *f) {
         else if (r < 0)
                 return r;
 
-        r = sd_id128_get_boot(&boot_id);
+        r = sd_id128_get_boot(&f->header->boot_id);
         if (r < 0)
                 return r;
 
-        f->header->boot_id = boot_id;
-
         r = journal_file_set_online(f);
 
         /* Sync the online state to disk */
@@ -487,16 +492,23 @@ static bool warn_wrong_flags(const JournalFile *f, bool compatible) {
                                   f->path, type, flags & ~any);
                 flags = (flags & any) & ~supported;
                 if (flags) {
-                        const char* strv[3];
+                        const char* strv[5];
                         unsigned n = 0;
                         _cleanup_free_ char *t = NULL;
 
-                        if (compatible && (flags & HEADER_COMPATIBLE_SEALED))
-                                strv[n++] = "sealed";
-                        if (!compatible && (flags & HEADER_INCOMPATIBLE_COMPRESSED_XZ))
-                                strv[n++] = "xz-compressed";
-                        if (!compatible && (flags & HEADER_INCOMPATIBLE_COMPRESSED_LZ4))
-                                strv[n++] = "lz4-compressed";
+                        if (compatible) {
+                                if (flags & HEADER_COMPATIBLE_SEALED)
+                                        strv[n++] = "sealed";
+                        } else {
+                                if (flags & HEADER_INCOMPATIBLE_COMPRESSED_XZ)
+                                        strv[n++] = "xz-compressed";
+                                if (flags & HEADER_INCOMPATIBLE_COMPRESSED_LZ4)
+                                        strv[n++] = "lz4-compressed";
+                                if (flags & HEADER_INCOMPATIBLE_COMPRESSED_ZSTD)
+                                        strv[n++] = "zstd-compressed";
+                                if (flags & HEADER_INCOMPATIBLE_KEYED_HASH)
+                                        strv[n++] = "keyed-hash";
+                        }
                         strv[n] = NULL;
                         assert(n < ELEMENTSOF(strv));
 
@@ -531,7 +543,7 @@ static int journal_file_verify_header(JournalFile *f) {
         if (f->header->state >= _STATE_MAX)
                 return -EBADMSG;
 
-        header_size = le64toh(f->header->header_size);
+        header_size = le64toh(READ_NOW(f->header->header_size));
 
         /* The first addition was n_data, so check that we are at least this large */
         if (header_size < HEADER_SIZE_MIN)
@@ -540,7 +552,7 @@ static int journal_file_verify_header(JournalFile *f) {
         if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
                 return -EBADMSG;
 
-        arena_size = le64toh(f->header->arena_size);
+        arena_size = le64toh(READ_NOW(f->header->arena_size));
 
         if (UINT64_MAX - header_size < arena_size || header_size + arena_size > (uint64_t) f->last_stat.st_size)
                 return -ENODATA;
@@ -593,13 +605,16 @@ static int journal_file_verify_header(JournalFile *f) {
 
         f->compress_xz = JOURNAL_HEADER_COMPRESSED_XZ(f->header);
         f->compress_lz4 = JOURNAL_HEADER_COMPRESSED_LZ4(f->header);
+        f->compress_zstd = JOURNAL_HEADER_COMPRESSED_ZSTD(f->header);
 
         f->seal = JOURNAL_HEADER_SEALED(f->header);
 
+        f->keyed_hash = JOURNAL_HEADER_KEYED_HASH(f->header);
+
         return 0;
 }
 
-static int journal_file_fstat(JournalFile *f) {
+int journal_file_fstat(JournalFile *f) {
         int r;
 
         assert(f);
@@ -610,7 +625,7 @@ static int journal_file_fstat(JournalFile *f) {
 
         f->last_stat_usec = now(CLOCK_MONOTONIC);
 
-        /* Refuse dealing with with files that aren't regular */
+        /* Refuse dealing with files that aren't regular */
         r = stat_verify_regular(&f->last_stat);
         if (r < 0)
                 return r;
@@ -623,26 +638,29 @@ static int journal_file_fstat(JournalFile *f) {
 }
 
 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
-        uint64_t old_size, new_size;
+        uint64_t old_size, new_size, old_header_size, old_arena_size;
         int r;
 
         assert(f);
         assert(f->header);
 
-        /* We assume that this file is not sparse, and we know that
-         * for sure, since we always call posix_fallocate()
-         * ourselves */
+        /* We assume that this file is not sparse, and we know that for sure, since we always call
+         * posix_fallocate() ourselves */
+
+        if (size > PAGE_ALIGN_DOWN(UINT64_MAX) - offset)
+                return -EINVAL;
 
         if (mmap_cache_got_sigbus(f->mmap, f->cache_fd))
                 return -EIO;
 
-        old_size =
-                le64toh(f->header->header_size) +
-                le64toh(f->header->arena_size);
+        old_header_size = le64toh(READ_NOW(f->header->header_size));
+        old_arena_size = le64toh(READ_NOW(f->header->arena_size));
+        if (old_arena_size > PAGE_ALIGN_DOWN(UINT64_MAX) - old_header_size)
+                return -EBADMSG;
+
+        old_size = old_header_size + old_arena_size;
 
-        new_size = PAGE_ALIGN(offset + size);
-        if (new_size < le64toh(f->header->header_size))
-                new_size = le64toh(f->header->header_size);
+        new_size = MAX(PAGE_ALIGN(offset + size), old_header_size);
 
         if (new_size <= old_size) {
 
@@ -688,7 +706,7 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size)
         if (r != 0)
                 return -r;
 
-        f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
+        f->header->arena_size = htole64(new_size - old_header_size);
 
         return journal_file_fstat(f);
 }
@@ -700,7 +718,15 @@ static unsigned type_to_context(ObjectType type) {
         return type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX ? type : 0;
 }
 
-static int journal_file_move_to(JournalFile *f, ObjectType type, bool keep_always, uint64_t offset, uint64_t size, void **ret, size_t *ret_size) {
+static int journal_file_move_to(
+                JournalFile *f,
+                ObjectType type,
+                bool keep_always,
+                uint64_t offset,
+                uint64_t size,
+                void **ret,
+                size_t *ret_size) {
+
         int r;
 
         assert(f);
@@ -709,6 +735,9 @@ static int journal_file_move_to(JournalFile *f, ObjectType type, bool keep_alway
         if (size <= 0)
                 return -EINVAL;
 
+        if (size > UINT64_MAX - offset)
+                return -EBADMSG;
+
         /* Avoid SIGBUS on invalid accesses */
         if (offset + size > (uint64_t) f->last_stat.st_size) {
                 /* Hmm, out of range? Let's refresh the fstat() data
@@ -751,14 +780,14 @@ static int journal_file_check_object(JournalFile *f, uint64_t offset, Object *o)
 
         switch (o->object.type) {
 
-        case OBJECT_DATA: {
+        case OBJECT_DATA:
                 if ((le64toh(o->data.entry_offset) == 0) ^ (le64toh(o->data.n_entries) == 0))
                         return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
                                                "Bad n_entries: %" PRIu64 ": %" PRIu64,
                                                le64toh(o->data.n_entries),
                                                offset);
 
-                if (le64toh(o->object.size) - offsetof(DataObject, payload) <= 0)
+                if (le64toh(o->object.size) <= offsetof(DataObject, payload))
                         return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
                                                "Bad object size (<= %zu): %" PRIu64 ": %" PRIu64,
                                                offsetof(DataObject, payload),
@@ -778,10 +807,9 @@ static int journal_file_check_object(JournalFile *f, uint64_t offset, Object *o)
                                                offset);
 
                 break;
-        }
 
         case OBJECT_FIELD:
-                if (le64toh(o->object.size) - offsetof(FieldObject, payload) <= 0)
+                if (le64toh(o->object.size) <= offsetof(FieldObject, payload))
                         return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
                                                "Bad field size (<= %zu): %" PRIu64 ": %" PRIu64,
                                                offsetof(FieldObject, payload),
@@ -797,18 +825,22 @@ static int journal_file_check_object(JournalFile *f, uint64_t offset, Object *o)
                                                offset);
                 break;
 
-        case OBJECT_ENTRY:
-                if ((le64toh(o->object.size) - offsetof(EntryObject, items)) % sizeof(EntryItem) != 0)
+        case OBJECT_ENTRY: {
+                uint64_t sz;
+
+                sz = le64toh(READ_NOW(o->object.size));
+                if (sz < offsetof(EntryObject, items) ||
+                    (sz - offsetof(EntryObject, items)) % sizeof(EntryItem) != 0)
                         return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
                                                "Bad entry size (<= %zu): %" PRIu64 ": %" PRIu64,
                                                offsetof(EntryObject, items),
-                                               le64toh(o->object.size),
+                                               sz,
                                                offset);
 
-                if ((le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem) <= 0)
+                if ((sz - offsetof(EntryObject, items)) / sizeof(EntryItem) <= 0)
                         return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
                                                "Invalid number items in entry: %" PRIu64 ": %" PRIu64,
-                                               (le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem),
+                                               (sz - offsetof(EntryObject, items)) / sizeof(EntryItem),
                                                offset);
 
                 if (le64toh(o->entry.seqnum) <= 0)
@@ -830,25 +862,35 @@ static int journal_file_check_object(JournalFile *f, uint64_t offset, Object *o)
                                                offset);
 
                 break;
+        }
 
         case OBJECT_DATA_HASH_TABLE:
-        case OBJECT_FIELD_HASH_TABLE:
-                if ((le64toh(o->object.size) - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0 ||
-                    (le64toh(o->object.size) - offsetof(HashTableObject, items)) / sizeof(HashItem) <= 0)
+        case OBJECT_FIELD_HASH_TABLE: {
+                uint64_t sz;
+
+                sz = le64toh(READ_NOW(o->object.size));
+                if (sz < offsetof(HashTableObject, items) ||
+                    (sz - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0 ||
+                    (sz - offsetof(HashTableObject, items)) / sizeof(HashItem) <= 0)
                         return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
                                                "Invalid %s hash table size: %" PRIu64 ": %" PRIu64,
                                                o->object.type == OBJECT_DATA_HASH_TABLE ? "data" : "field",
-                                               le64toh(o->object.size),
+                                               sz,
                                                offset);
 
                 break;
+        }
 
-        case OBJECT_ENTRY_ARRAY:
-                if ((le64toh(o->object.size) - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0 ||
-                    (le64toh(o->object.size) - offsetof(EntryArrayObject, items)) / sizeof(le64_t) <= 0)
+        case OBJECT_ENTRY_ARRAY: {
+                uint64_t sz;
+
+                sz = le64toh(READ_NOW(o->object.size));
+                if (sz < offsetof(EntryArrayObject, items) ||
+                    (sz - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0 ||
+                    (sz - offsetof(EntryArrayObject, items)) / sizeof(le64_t) <= 0)
                         return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
                                                "Invalid object entry array size: %" PRIu64 ": %" PRIu64,
-                                               le64toh(o->object.size),
+                                               sz,
                                                offset);
 
                 if (!VALID64(le64toh(o->entry_array.next_entry_array_offset)))
@@ -858,6 +900,7 @@ static int journal_file_check_object(JournalFile *f, uint64_t offset, Object *o)
                                                offset);
 
                 break;
+        }
 
         case OBJECT_TAG:
                 if (le64toh(o->object.size) != sizeof(TagObject))
@@ -904,7 +947,7 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset
                 return r;
 
         o = (Object*) t;
-        s = le64toh(o->object.size);
+        s = le64toh(READ_NOW(o->object.size));
 
         if (s == 0)
                 return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
@@ -973,7 +1016,13 @@ static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
         return r;
 }
 
-int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, Object **ret, uint64_t *offset) {
+int journal_file_append_object(
+                JournalFile *f,
+                ObjectType type,
+                uint64_t size,
+                Object **ret,
+                uint64_t *ret_offset) {
+
         int r;
         uint64_t p;
         Object *tail, *o;
@@ -983,8 +1032,6 @@ int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, O
         assert(f->header);
         assert(type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX);
         assert(size >= sizeof(ObjectHeader));
-        assert(offset);
-        assert(ret);
 
         r = journal_file_set_online(f);
         if (r < 0)
@@ -994,11 +1041,21 @@ int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, O
         if (p == 0)
                 p = le64toh(f->header->header_size);
         else {
+                uint64_t sz;
+
                 r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &tail);
                 if (r < 0)
                         return r;
 
-                p += ALIGN64(le64toh(tail->object.size));
+                sz = le64toh(READ_NOW(tail->object.size));
+                if (sz > UINT64_MAX - sizeof(uint64_t) + 1)
+                        return -EBADMSG;
+
+                sz = ALIGN64(sz);
+                if (p > UINT64_MAX - sz)
+                        return -EBADMSG;
+
+                p += sz;
         }
 
         r = journal_file_allocate(f, p, size);
@@ -1010,16 +1067,19 @@ int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, O
                 return r;
 
         o = (Object*) t;
-
-        zero(o->object);
-        o->object.type = type;
-        o->object.size = htole64(size);
+        o->object = (ObjectHeader) {
+                .type = type,
+                .size = htole64(size),
+        };
 
         f->header->tail_object_offset = htole64(p);
         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
 
-        *ret = o;
-        *offset = p;
+        if (ret)
+                *ret = o;
+
+        if (ret_offset)
+                *ret_offset = p;
 
         return 0;
 }
@@ -1041,7 +1101,7 @@ static int journal_file_setup_data_hash_table(JournalFile *f) {
         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
 
-        log_debug("Reserving %"PRIu64" entries in hash table.", s / sizeof(HashItem));
+        log_debug("Reserving %"PRIu64" entries in data hash table.", s / sizeof(HashItem));
 
         r = journal_file_append_object(f,
                                        OBJECT_DATA_HASH_TABLE,
@@ -1070,6 +1130,8 @@ static int journal_file_setup_field_hash_table(JournalFile *f) {
          * number should grow very slowly only */
 
         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
+        log_debug("Reserving %"PRIu64" entries in field hash table.", s / sizeof(HashItem));
+
         r = journal_file_append_object(f,
                                        OBJECT_FIELD_HASH_TABLE,
                                        offsetof(Object, hash_table.items) + s,
@@ -1155,7 +1217,7 @@ static int journal_file_link_field(
         if (o->object.type != OBJECT_FIELD)
                 return -EINVAL;
 
-        m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
+        m = le64toh(READ_NOW(f->header->field_hash_table_size)) / sizeof(HashItem);
         if (m <= 0)
                 return -EBADMSG;
 
@@ -1200,7 +1262,7 @@ static int journal_file_link_data(
         if (o->object.type != OBJECT_DATA)
                 return -EINVAL;
 
-        m = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
+        m = le64toh(READ_NOW(f->header->data_hash_table_size)) / sizeof(HashItem);
         if (m <= 0)
                 return -EBADMSG;
 
@@ -1233,12 +1295,38 @@ static int journal_file_link_data(
         return 0;
 }
 
+static int next_hash_offset(
+                JournalFile *f,
+                uint64_t *p,
+                le64_t *next_hash_offset,
+                uint64_t *depth,
+                le64_t *header_max_depth) {
+
+        uint64_t nextp;
+
+        nextp = le64toh(READ_NOW(*next_hash_offset));
+        if (nextp > 0) {
+                if (nextp <= *p) /* Refuse going in loops */
+                        return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+                                               "Detected hash item loop in %s, refusing.", f->path);
+
+                (*depth)++;
+
+                /* If the depth of this hash chain is larger than all others we have seen so far, record it */
+                if (header_max_depth && f->writable)
+                        *header_max_depth = htole64(MAX(*depth, le64toh(*header_max_depth)));
+        }
+
+        *p = nextp;
+        return 0;
+}
+
 int journal_file_find_field_object_with_hash(
                 JournalFile *f,
                 const void *field, uint64_t size, uint64_t hash,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
 
-        uint64_t p, osize, h, m;
+        uint64_t p, osize, h, m, depth = 0;
         int r;
 
         assert(f);
@@ -1256,13 +1344,12 @@ int journal_file_find_field_object_with_hash(
 
         osize = offsetof(Object, field.payload) + size;
 
-        m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
+        m = le64toh(READ_NOW(f->header->field_hash_table_size)) / sizeof(HashItem);
         if (m <= 0)
                 return -EBADMSG;
 
         h = hash % m;
         p = le64toh(f->field_hash_table[h].head_hash_offset);
-
         while (p > 0) {
                 Object *o;
 
@@ -1276,41 +1363,63 @@ int journal_file_find_field_object_with_hash(
 
                         if (ret)
                                 *ret = o;
-                        if (offset)
-                                *offset = p;
+                        if (ret_offset)
+                                *ret_offset = p;
 
                         return 1;
                 }
 
-                p = le64toh(o->field.next_hash_offset);
+                r = next_hash_offset(
+                                f,
+                                &p,
+                                &o->field.next_hash_offset,
+                                &depth,
+                                JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth) ? &f->header->field_hash_chain_depth : NULL);
+                if (r < 0)
+                        return r;
         }
 
         return 0;
 }
 
+uint64_t journal_file_hash_data(
+                JournalFile *f,
+                const void *data,
+                size_t sz) {
+
+        assert(f);
+        assert(data || sz == 0);
+
+        /* We try to unify our codebase on siphash, hence new-styled journal files utilizing the keyed hash
+         * function use siphash. Old journal files use the Jenkins hash. */
+
+        if (JOURNAL_HEADER_KEYED_HASH(f->header))
+                return siphash24(data, sz, f->header->file_id.bytes);
+
+        return jenkins_hash64(data, sz);
+}
+
 int journal_file_find_field_object(
                 JournalFile *f,
                 const void *field, uint64_t size,
-                Object **ret, uint64_t *offset) {
-
-        uint64_t hash;
+                Object **ret, uint64_t *ret_offset) {
 
         assert(f);
         assert(field && size > 0);
 
-        hash = hash64(field, size);
-
-        return journal_file_find_field_object_with_hash(f,
-                                                        field, size, hash,
-                                                        ret, offset);
+        return journal_file_find_field_object_with_hash(
+                        f,
+                        field, size,
+                        journal_file_hash_data(f, field, size),
+                        ret, ret_offset);
 }
 
 int journal_file_find_data_object_with_hash(
                 JournalFile *f,
                 const void *data, uint64_t size, uint64_t hash,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
 
-        uint64_t p, osize, h, m;
+        uint64_t p, osize, h, m, depth = 0;
         int r;
 
         assert(f);
@@ -1328,7 +1437,7 @@ int journal_file_find_data_object_with_hash(
 
         osize = offsetof(Object, data.payload) + size;
 
-        m = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
+        m = le64toh(READ_NOW(f->header->data_hash_table_size)) / sizeof(HashItem);
         if (m <= 0)
                 return -EBADMSG;
 
@@ -1346,11 +1455,11 @@ int journal_file_find_data_object_with_hash(
                         goto next;
 
                 if (o->object.flags & OBJECT_COMPRESSION_MASK) {
-#if HAVE_XZ || HAVE_LZ4
+#if HAVE_COMPRESSION
                         uint64_t l;
                         size_t rsize = 0;
 
-                        l = le64toh(o->object.size);
+                        l = le64toh(READ_NOW(o->object.size));
                         if (l <= offsetof(Object, data.payload))
                                 return -EBADMSG;
 
@@ -1367,8 +1476,8 @@ int journal_file_find_data_object_with_hash(
                                 if (ret)
                                         *ret = o;
 
-                                if (offset)
-                                        *offset = p;
+                                if (ret_offset)
+                                        *ret_offset = p;
 
                                 return 1;
                         }
@@ -1381,14 +1490,21 @@ int journal_file_find_data_object_with_hash(
                         if (ret)
                                 *ret = o;
 
-                        if (offset)
-                                *offset = p;
+                        if (ret_offset)
+                                *ret_offset = p;
 
                         return 1;
                 }
 
         next:
-                p = le64toh(o->data.next_hash_offset);
+                r = next_hash_offset(
+                                f,
+                                &p,
+                                &o->data.next_hash_offset,
+                                &depth,
+                                JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth) ? &f->header->data_hash_chain_depth : NULL);
+                if (r < 0)
+                        return r;
         }
 
         return 0;
@@ -1397,24 +1513,22 @@ int journal_file_find_data_object_with_hash(
 int journal_file_find_data_object(
                 JournalFile *f,
                 const void *data, uint64_t size,
-                Object **ret, uint64_t *offset) {
-
-        uint64_t hash;
+                Object **ret, uint64_t *ret_offset) {
 
         assert(f);
         assert(data || size == 0);
 
-        hash = hash64(data, size);
-
-        return journal_file_find_data_object_with_hash(f,
-                                                       data, size, hash,
-                                                       ret, offset);
+        return journal_file_find_data_object_with_hash(
+                        f,
+                        data, size,
+                        journal_file_hash_data(f, data, size),
+                        ret, ret_offset);
 }
 
 static int journal_file_append_field(
                 JournalFile *f,
                 const void *field, uint64_t size,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
 
         uint64_t hash, p;
         uint64_t osize;
@@ -1424,7 +1538,7 @@ static int journal_file_append_field(
         assert(f);
         assert(field && size > 0);
 
-        hash = hash64(field, size);
+        hash = journal_file_hash_data(f, field, size);
 
         r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
         if (r < 0)
@@ -1434,8 +1548,8 @@ static int journal_file_append_field(
                 if (ret)
                         *ret = o;
 
-                if (offset)
-                        *offset = p;
+                if (ret_offset)
+                        *ret_offset = p;
 
                 return 0;
         }
@@ -1467,8 +1581,8 @@ static int journal_file_append_field(
         if (ret)
                 *ret = o;
 
-        if (offset)
-                *offset = p;
+        if (ret_offset)
+                *ret_offset = p;
 
         return 0;
 }
@@ -1476,7 +1590,7 @@ static int journal_file_append_field(
 static int journal_file_append_data(
                 JournalFile *f,
                 const void *data, uint64_t size,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
 
         uint64_t hash, p;
         uint64_t osize;
@@ -1487,7 +1601,7 @@ static int journal_file_append_data(
         assert(f);
         assert(data || size == 0);
 
-        hash = hash64(data, size);
+        hash = journal_file_hash_data(f, data, size);
 
         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
         if (r < 0)
@@ -1497,8 +1611,8 @@ static int journal_file_append_data(
                 if (ret)
                         *ret = o;
 
-                if (offset)
-                        *offset = p;
+                if (ret_offset)
+                        *ret_offset = p;
 
                 return 0;
         }
@@ -1510,7 +1624,7 @@ static int journal_file_append_data(
 
         o->data.hash = htole64(hash);
 
-#if HAVE_XZ || HAVE_LZ4
+#if HAVE_COMPRESSION
         if (JOURNAL_FILE_COMPRESS(f) && size >= f->compress_threshold_bytes) {
                 size_t rsize = 0;
 
@@ -1568,37 +1682,54 @@ static int journal_file_append_data(
         if (ret)
                 *ret = o;
 
-        if (offset)
-                *offset = p;
+        if (ret_offset)
+                *ret_offset = p;
 
         return 0;
 }
 
 uint64_t journal_file_entry_n_items(Object *o) {
+        uint64_t sz;
         assert(o);
 
         if (o->object.type != OBJECT_ENTRY)
                 return 0;
 
-        return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
+        sz = le64toh(READ_NOW(o->object.size));
+        if (sz < offsetof(Object, entry.items))
+                return 0;
+
+        return (sz - offsetof(Object, entry.items)) / sizeof(EntryItem);
 }
 
 uint64_t journal_file_entry_array_n_items(Object *o) {
+        uint64_t sz;
+
         assert(o);
 
         if (o->object.type != OBJECT_ENTRY_ARRAY)
                 return 0;
 
-        return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
+        sz = le64toh(READ_NOW(o->object.size));
+        if (sz < offsetof(Object, entry_array.items))
+                return 0;
+
+        return (sz - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
 }
 
 uint64_t journal_file_hash_table_n_items(Object *o) {
+        uint64_t sz;
+
         assert(o);
 
         if (!IN_SET(o->object.type, OBJECT_DATA_HASH_TABLE, OBJECT_FIELD_HASH_TABLE))
                 return 0;
 
-        return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
+        sz = le64toh(READ_NOW(o->object.size));
+        if (sz < offsetof(Object, hash_table.items))
+                return 0;
+
+        return (sz - offsetof(Object, hash_table.items)) / sizeof(HashItem);
 }
 
 static int link_entry_into_array(JournalFile *f,
@@ -1616,7 +1747,7 @@ static int link_entry_into_array(JournalFile *f,
         assert(p > 0);
 
         a = le64toh(*first);
-        i = hidx = le64toh(*idx);
+        i = hidx = le64toh(READ_NOW(*idx));
         while (a > 0) {
 
                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
@@ -1681,6 +1812,7 @@ static int link_entry_into_array_plus_one(JournalFile *f,
                                           le64_t *idx,
                                           uint64_t p) {
 
+        uint64_t hidx;
         int r;
 
         assert(f);
@@ -1689,32 +1821,33 @@ static int link_entry_into_array_plus_one(JournalFile *f,
         assert(idx);
         assert(p > 0);
 
-        if (*idx == 0)
+        hidx = le64toh(READ_NOW(*idx));
+        if (hidx == UINT64_MAX)
+                return -EBADMSG;
+        if (hidx == 0)
                 *extra = htole64(p);
         else {
                 le64_t i;
 
-                i = htole64(le64toh(*idx) - 1);
+                i = htole64(hidx - 1);
                 r = link_entry_into_array(f, first, &i, p);
                 if (r < 0)
                         return r;
         }
 
-        *idx = htole64(le64toh(*idx) + 1);
+        *idx = htole64(hidx + 1);
         return 0;
 }
 
 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
         uint64_t p;
         int r;
+
         assert(f);
         assert(o);
         assert(offset > 0);
 
         p = le64toh(o->entry.items[i].object_offset);
-        if (p == 0)
-                return -EINVAL;
-
         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
         if (r < 0)
                 return r;
@@ -1774,7 +1907,7 @@ static int journal_file_append_entry_internal(
                 uint64_t xor_hash,
                 const EntryItem items[], unsigned n_items,
                 uint64_t *seqnum,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
         uint64_t np;
         uint64_t osize;
         Object *o;
@@ -1813,8 +1946,8 @@ static int journal_file_append_entry_internal(
         if (ret)
                 *ret = o;
 
-        if (offset)
-                *offset = np;
+        if (ret_offset)
+                *ret_offset = np;
 
         return 0;
 }
@@ -1845,7 +1978,6 @@ static int post_change_thunk(sd_event_source *timer, uint64_t usec, void *userda
 }
 
 static void schedule_post_change(JournalFile *f) {
-        uint64_t now;
         int r;
 
         assert(f);
@@ -1859,13 +1991,7 @@ static void schedule_post_change(JournalFile *f) {
         if (r > 0)
                 return;
 
-        r = sd_event_now(sd_event_source_get_event(f->post_change_timer), CLOCK_MONOTONIC, &now);
-        if (r < 0) {
-                log_debug_errno(r, "Failed to get clock's now for scheduling ftruncate: %m");
-                goto fail;
-        }
-
-        r = sd_event_source_set_time(f->post_change_timer, now + f->post_change_timer_period);
+        r = sd_event_source_set_time_relative(f->post_change_timer, f->post_change_timer_period);
         if (r < 0) {
                 log_debug_errno(r, "Failed to set time for scheduling ftruncate: %m");
                 goto fail;
@@ -1918,7 +2044,7 @@ int journal_file_append_entry(
                 const sd_id128_t *boot_id,
                 const struct iovec iovec[], unsigned n_iovec,
                 uint64_t *seqnum,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
 
         unsigned i;
         EntryItem *items;
@@ -1961,7 +2087,20 @@ int journal_file_append_entry(
                 if (r < 0)
                         return r;
 
-                xor_hash ^= le64toh(o->data.hash);
+                /* When calculating the XOR hash field, we need to take special care if the "keyed-hash"
+                 * journal file flag is on. We use the XOR hash field to quickly determine the identity of a
+                 * specific record, and give records with otherwise identical position (i.e. match in seqno,
+                 * timestamp, â€¦) a stable ordering. But for that we can't have it that the hash of the
+                 * objects in each file is different since they are keyed. Hence let's calculate the Jenkins
+                 * hash here for that. This also has the benefit that cursors for old and new journal files
+                 * are completely identical (they include the XOR hash after all). For classic Jenkins-hash
+                 * files things are easier, we can just take the value from the stored record directly. */
+
+                if (JOURNAL_HEADER_KEYED_HASH(f->header))
+                        xor_hash ^= jenkins_hash64(iovec[i].iov_base, iovec[i].iov_len);
+                else
+                        xor_hash ^= le64toh(o->data.hash);
+
                 items[i].object_offset = htole64(p);
                 items[i].hash = o->data.hash;
         }
@@ -1970,7 +2109,7 @@ int journal_file_append_entry(
          * times for rotating media. */
         typesafe_qsort(items, n_iovec, entry_item_cmp);
 
-        r = journal_file_append_entry_internal(f, ts, boot_id, xor_hash, items, n_iovec, seqnum, ret, offset);
+        r = journal_file_append_entry_internal(f, ts, boot_id, xor_hash, items, n_iovec, seqnum, ret, ret_offset);
 
         /* If the memory mapping triggered a SIGBUS then we return an
          * IO error and ignore the error code passed down to us, since
@@ -2039,7 +2178,7 @@ static int generic_array_get(
                 JournalFile *f,
                 uint64_t first,
                 uint64_t i,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
 
         Object *o;
         uint64_t p = 0, a, t = 0;
@@ -2089,8 +2228,8 @@ found:
         if (ret)
                 *ret = o;
 
-        if (offset)
-                *offset = p;
+        if (ret_offset)
+                *ret_offset = p;
 
         return 1;
 }
@@ -2100,7 +2239,7 @@ static int generic_array_get_plus_one(
                 uint64_t extra,
                 uint64_t first,
                 uint64_t i,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
 
         Object *o;
 
@@ -2116,13 +2255,13 @@ static int generic_array_get_plus_one(
                 if (ret)
                         *ret = o;
 
-                if (offset)
-                        *offset = extra;
+                if (ret_offset)
+                        *ret_offset = extra;
 
                 return 1;
         }
 
-        return generic_array_get(f, first, i-1, ret, offset);
+        return generic_array_get(f, first, i-1, ret, ret_offset);
 }
 
 enum {
@@ -2139,8 +2278,8 @@ static int generic_array_bisect(
                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
                 direction_t direction,
                 Object **ret,
-                uint64_t *offset,
-                uint64_t *idx) {
+                uint64_t *ret_offset,
+                uint64_t *ret_idx) {
 
         uint64_t a, p, t = 0, i = 0, last_p = 0, last_index = (uint64_t) -1;
         bool subtract_one = false;
@@ -2339,11 +2478,11 @@ found:
         if (ret)
                 *ret = o;
 
-        if (offset)
-                *offset = p;
+        if (ret_offset)
+                *ret_offset = p;
 
-        if (idx)
-                *idx = t + i + (subtract_one ? -1 : 0);
+        if (ret_idx)
+                *ret_idx = t + i + (subtract_one ? -1 : 0);
 
         return 1;
 }
@@ -2357,8 +2496,8 @@ static int generic_array_bisect_plus_one(
                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
                 direction_t direction,
                 Object **ret,
-                uint64_t *offset,
-                uint64_t *idx) {
+                uint64_t *ret_offset,
+                uint64_t *ret_idx) {
 
         int r;
         bool step_back = false;
@@ -2394,13 +2533,13 @@ static int generic_array_bisect_plus_one(
                         return 0;
         }
 
-        r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
+        r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, ret_offset, ret_idx);
 
         if (r == 0 && step_back)
                 goto found;
 
-        if (r > 0 && idx)
-                (*idx)++;
+        if (r > 0 && ret_idx)
+                (*ret_idx)++;
 
         return r;
 
@@ -2412,11 +2551,11 @@ found:
         if (ret)
                 *ret = o;
 
-        if (offset)
-                *offset = extra;
+        if (ret_offset)
+                *ret_offset = extra;
 
-        if (idx)
-                *idx = 0;
+        if (ret_idx)
+                *ret_idx = 0;
 
         return 1;
 }
@@ -2434,6 +2573,7 @@ _pure_ static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle
 }
 
 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
+        uint64_t sq;
         Object *o;
         int r;
 
@@ -2444,9 +2584,10 @@ static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
         if (r < 0)
                 return r;
 
-        if (le64toh(o->entry.seqnum) == needle)
+        sq = le64toh(READ_NOW(o->entry.seqnum));
+        if (sq == needle)
                 return TEST_FOUND;
-        else if (le64toh(o->entry.seqnum) < needle)
+        else if (sq < needle)
                 return TEST_LEFT;
         else
                 return TEST_RIGHT;
@@ -2457,21 +2598,23 @@ int journal_file_move_to_entry_by_seqnum(
                 uint64_t seqnum,
                 direction_t direction,
                 Object **ret,
-                uint64_t *offset) {
+                uint64_t *ret_offset) {
         assert(f);
         assert(f->header);
 
-        return generic_array_bisect(f,
-                                    le64toh(f->header->entry_array_offset),
-                                    le64toh(f->header->n_entries),
-                                    seqnum,
-                                    test_object_seqnum,
-                                    direction,
-                                    ret, offset, NULL);
+        return generic_array_bisect(
+                        f,
+                        le64toh(f->header->entry_array_offset),
+                        le64toh(f->header->n_entries),
+                        seqnum,
+                        test_object_seqnum,
+                        direction,
+                        ret, ret_offset, NULL);
 }
 
 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
         Object *o;
+        uint64_t rt;
         int r;
 
         assert(f);
@@ -2481,9 +2624,10 @@ static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
         if (r < 0)
                 return r;
 
-        if (le64toh(o->entry.realtime) == needle)
+        rt = le64toh(READ_NOW(o->entry.realtime));
+        if (rt == needle)
                 return TEST_FOUND;
-        else if (le64toh(o->entry.realtime) < needle)
+        else if (rt < needle)
                 return TEST_LEFT;
         else
                 return TEST_RIGHT;
@@ -2494,21 +2638,23 @@ int journal_file_move_to_entry_by_realtime(
                 uint64_t realtime,
                 direction_t direction,
                 Object **ret,
-                uint64_t *offset) {
+                uint64_t *ret_offset) {
         assert(f);
         assert(f->header);
 
-        return generic_array_bisect(f,
-                                    le64toh(f->header->entry_array_offset),
-                                    le64toh(f->header->n_entries),
-                                    realtime,
-                                    test_object_realtime,
-                                    direction,
-                                    ret, offset, NULL);
+        return generic_array_bisect(
+                        f,
+                        le64toh(f->header->entry_array_offset),
+                        le64toh(f->header->n_entries),
+                        realtime,
+                        test_object_realtime,
+                        direction,
+                        ret, ret_offset, NULL);
 }
 
 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
         Object *o;
+        uint64_t m;
         int r;
 
         assert(f);
@@ -2518,9 +2664,10 @@ static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
         if (r < 0)
                 return r;
 
-        if (le64toh(o->entry.monotonic) == needle)
+        m = le64toh(READ_NOW(o->entry.monotonic));
+        if (m == needle)
                 return TEST_FOUND;
-        else if (le64toh(o->entry.monotonic) < needle)
+        else if (m < needle)
                 return TEST_LEFT;
         else
                 return TEST_RIGHT;
@@ -2544,7 +2691,7 @@ int journal_file_move_to_entry_by_monotonic(
                 uint64_t monotonic,
                 direction_t direction,
                 Object **ret,
-                uint64_t *offset) {
+                uint64_t *ret_offset) {
 
         Object *o;
         int r;
@@ -2557,14 +2704,15 @@ int journal_file_move_to_entry_by_monotonic(
         if (r == 0)
                 return -ENOENT;
 
-        return generic_array_bisect_plus_one(f,
-                                             le64toh(o->data.entry_offset),
-                                             le64toh(o->data.entry_array_offset),
-                                             le64toh(o->data.n_entries),
-                                             monotonic,
-                                             test_object_monotonic,
-                                             direction,
-                                             ret, offset, NULL);
+        return generic_array_bisect_plus_one(
+                        f,
+                        le64toh(o->data.entry_offset),
+                        le64toh(o->data.entry_array_offset),
+                        le64toh(o->data.n_entries),
+                        monotonic,
+                        test_object_monotonic,
+                        direction,
+                        ret, ret_offset, NULL);
 }
 
 void journal_file_reset_location(JournalFile *f) {
@@ -2670,7 +2818,7 @@ int journal_file_next_entry(
                 JournalFile *f,
                 uint64_t p,
                 direction_t direction,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
 
         uint64_t i, n, ofs;
         int r;
@@ -2678,7 +2826,7 @@ int journal_file_next_entry(
         assert(f);
         assert(f->header);
 
-        n = le64toh(f->header->n_entries);
+        n = le64toh(READ_NOW(f->header->n_entries));
         if (n <= 0)
                 return 0;
 
@@ -2727,8 +2875,8 @@ int journal_file_next_entry(
                                        "%s: entry array not properly ordered at entry %" PRIu64,
                                        f->path, i);
 
-        if (offset)
-                *offset = ofs;
+        if (ret_offset)
+                *ret_offset = ofs;
 
         return 1;
 }
@@ -2738,7 +2886,7 @@ int journal_file_next_entry_for_data(
                 Object *o, uint64_t p,
                 uint64_t data_offset,
                 direction_t direction,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
 
         uint64_t i, n, ofs;
         Object *d;
@@ -2751,7 +2899,7 @@ int journal_file_next_entry_for_data(
         if (r < 0)
                 return r;
 
-        n = le64toh(d->data.n_entries);
+        n = le64toh(READ_NOW(d->data.n_entries));
         if (n <= 0)
                 return n;
 
@@ -2803,8 +2951,8 @@ int journal_file_next_entry_for_data(
                                        "%s data entry array not properly ordered at entry %" PRIu64,
                                        f->path, i);
 
-        if (offset)
-                *offset = ofs;
+        if (ret_offset)
+                *ret_offset = ofs;
 
         return 1;
 }
@@ -2814,7 +2962,7 @@ int journal_file_move_to_entry_by_offset_for_data(
                 uint64_t data_offset,
                 uint64_t p,
                 direction_t direction,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
 
         int r;
         Object *d;
@@ -2825,14 +2973,15 @@ int journal_file_move_to_entry_by_offset_for_data(
         if (r < 0)
                 return r;
 
-        return generic_array_bisect_plus_one(f,
-                                             le64toh(d->data.entry_offset),
-                                             le64toh(d->data.entry_array_offset),
-                                             le64toh(d->data.n_entries),
-                                             p,
-                                             test_object_offset,
-                                             direction,
-                                             ret, offset, NULL);
+        return generic_array_bisect_plus_one(
+                        f,
+                        le64toh(d->data.entry_offset),
+                        le64toh(d->data.entry_array_offset),
+                        le64toh(d->data.n_entries),
+                        p,
+                        test_object_offset,
+                        direction,
+                        ret, ret_offset, NULL);
 }
 
 int journal_file_move_to_entry_by_monotonic_for_data(
@@ -2841,7 +2990,7 @@ int journal_file_move_to_entry_by_monotonic_for_data(
                 sd_id128_t boot_id,
                 uint64_t monotonic,
                 direction_t direction,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
 
         Object *o, *d;
         int r;
@@ -2908,8 +3057,8 @@ int journal_file_move_to_entry_by_monotonic_for_data(
                 if (p == q) {
                         if (ret)
                                 *ret = qo;
-                        if (offset)
-                                *offset = q;
+                        if (ret_offset)
+                                *ret_offset = q;
 
                         return 1;
                 }
@@ -2923,7 +3072,7 @@ int journal_file_move_to_entry_by_seqnum_for_data(
                 uint64_t data_offset,
                 uint64_t seqnum,
                 direction_t direction,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
 
         Object *d;
         int r;
@@ -2934,14 +3083,15 @@ int journal_file_move_to_entry_by_seqnum_for_data(
         if (r < 0)
                 return r;
 
-        return generic_array_bisect_plus_one(f,
-                                             le64toh(d->data.entry_offset),
-                                             le64toh(d->data.entry_array_offset),
-                                             le64toh(d->data.n_entries),
-                                             seqnum,
-                                             test_object_seqnum,
-                                             direction,
-                                             ret, offset, NULL);
+        return generic_array_bisect_plus_one(
+                        f,
+                        le64toh(d->data.entry_offset),
+                        le64toh(d->data.entry_array_offset),
+                        le64toh(d->data.n_entries),
+                        seqnum,
+                        test_object_seqnum,
+                        direction,
+                        ret, ret_offset, NULL);
 }
 
 int journal_file_move_to_entry_by_realtime_for_data(
@@ -2949,7 +3099,7 @@ int journal_file_move_to_entry_by_realtime_for_data(
                 uint64_t data_offset,
                 uint64_t realtime,
                 direction_t direction,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
 
         Object *d;
         int r;
@@ -2960,14 +3110,15 @@ int journal_file_move_to_entry_by_realtime_for_data(
         if (r < 0)
                 return r;
 
-        return generic_array_bisect_plus_one(f,
-                                             le64toh(d->data.entry_offset),
-                                             le64toh(d->data.entry_array_offset),
-                                             le64toh(d->data.n_entries),
-                                             realtime,
-                                             test_object_realtime,
-                                             direction,
-                                             ret, offset, NULL);
+        return generic_array_bisect_plus_one(
+                        f,
+                        le64toh(d->data.entry_offset),
+                        le64toh(d->data.entry_array_offset),
+                        le64toh(d->data.n_entries),
+                        realtime,
+                        test_object_realtime,
+                        direction,
+                        ret, ret_offset, NULL);
 }
 
 void journal_file_dump(JournalFile *f) {
@@ -2980,7 +3131,7 @@ void journal_file_dump(JournalFile *f) {
 
         journal_file_print_header(f);
 
-        p = le64toh(f->header->header_size);
+        p = le64toh(READ_NOW(f->header->header_size));
         while (p != 0) {
                 r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o);
                 if (r < 0)
@@ -3037,7 +3188,7 @@ void journal_file_dump(JournalFile *f) {
                 if (p == le64toh(f->header->tail_object_offset))
                         p = 0;
                 else
-                        p = p + ALIGN64(le64toh(o->object.size));
+                        p += ALIGN64(le64toh(o->object.size));
         }
 
         return;
@@ -3055,7 +3206,7 @@ static const char* format_timestamp_safe(char *buf, size_t l, usec_t t) {
 }
 
 void journal_file_print_header(JournalFile *f) {
-        char a[33], b[33], c[33], d[33];
+        char a[SD_ID128_STRING_MAX], b[SD_ID128_STRING_MAX], c[SD_ID128_STRING_MAX], d[SD_ID128_STRING_MAX];
         char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX], z[FORMAT_TIMESTAMP_MAX];
         struct stat st;
         char bytes[FORMAT_BYTES_MAX];
@@ -3063,26 +3214,26 @@ void journal_file_print_header(JournalFile *f) {
         assert(f);
         assert(f->header);
 
-        printf("File Path: %s\n"
+        printf("File path: %s\n"
                "File ID: %s\n"
                "Machine ID: %s\n"
                "Boot ID: %s\n"
-               "Sequential Number ID: %s\n"
+               "Sequential number ID: %s\n"
                "State: %s\n"
-               "Compatible Flags:%s%s\n"
-               "Incompatible Flags:%s%s%s\n"
+               "Compatible flags:%s%s\n"
+               "Incompatible flags:%s%s%s%s%s\n"
                "Header size: %"PRIu64"\n"
                "Arena size: %"PRIu64"\n"
-               "Data Hash Table Size: %"PRIu64"\n"
-               "Field Hash Table Size: %"PRIu64"\n"
-               "Rotate Suggested: %s\n"
-               "Head Sequential Number: %"PRIu64" (%"PRIx64")\n"
-               "Tail Sequential Number: %"PRIu64" (%"PRIx64")\n"
-               "Head Realtime Timestamp: %s (%"PRIx64")\n"
-               "Tail Realtime Timestamp: %s (%"PRIx64")\n"
-               "Tail Monotonic Timestamp: %s (%"PRIx64")\n"
+               "Data hash table size: %"PRIu64"\n"
+               "Field hash table size: %"PRIu64"\n"
+               "Rotate suggested: %s\n"
+               "Head sequential number: %"PRIu64" (%"PRIx64")\n"
+               "Tail sequential number: %"PRIu64" (%"PRIx64")\n"
+               "Head realtime timestamp: %s (%"PRIx64")\n"
+               "Tail realtime timestamp: %s (%"PRIx64")\n"
+               "Tail monotonic timestamp: %s (%"PRIx64")\n"
                "Objects: %"PRIu64"\n"
-               "Entry Objects: %"PRIu64"\n",
+               "Entry objects: %"PRIu64"\n",
                f->path,
                sd_id128_to_string(f->header->file_id, a),
                sd_id128_to_string(f->header->machine_id, b),
@@ -3095,6 +3246,8 @@ void journal_file_print_header(JournalFile *f) {
                (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_ANY) ? " ???" : "",
                JOURNAL_HEADER_COMPRESSED_XZ(f->header) ? " COMPRESSED-XZ" : "",
                JOURNAL_HEADER_COMPRESSED_LZ4(f->header) ? " COMPRESSED-LZ4" : "",
+               JOURNAL_HEADER_COMPRESSED_ZSTD(f->header) ? " COMPRESSED-ZSTD" : "",
+               JOURNAL_HEADER_KEYED_HASH(f->header) ? " KEYED-HASH" : "",
                (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_ANY) ? " ???" : "",
                le64toh(f->header->header_size),
                le64toh(f->header->arena_size),
@@ -3110,24 +3263,32 @@ void journal_file_print_header(JournalFile *f) {
                le64toh(f->header->n_entries));
 
         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
-                printf("Data Objects: %"PRIu64"\n"
-                       "Data Hash Table Fill: %.1f%%\n",
+                printf("Data objects: %"PRIu64"\n"
+                       "Data hash table fill: %.1f%%\n",
                        le64toh(f->header->n_data),
                        100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
 
         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
-                printf("Field Objects: %"PRIu64"\n"
-                       "Field Hash Table Fill: %.1f%%\n",
+                printf("Field objects: %"PRIu64"\n"
+                       "Field hash table fill: %.1f%%\n",
                        le64toh(f->header->n_fields),
                        100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
 
         if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
-                printf("Tag Objects: %"PRIu64"\n",
+                printf("Tag objects: %"PRIu64"\n",
                        le64toh(f->header->n_tags));
         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
-                printf("Entry Array Objects: %"PRIu64"\n",
+                printf("Entry array objects: %"PRIu64"\n",
                        le64toh(f->header->n_entry_arrays));
 
+        if (JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth))
+                printf("Deepest field hash chain: %" PRIu64"\n",
+                       f->header->field_hash_chain_depth);
+
+        if (JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth))
+                printf("Deepest data hash chain: %" PRIu64"\n",
+                       f->header->data_hash_chain_depth);
+
         if (fstat(f->fd, &st) >= 0)
                 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (uint64_t) st.st_blocks * 512ULL));
 }
@@ -3185,7 +3346,6 @@ int journal_file_open(
         JournalFile *f;
         void *h;
         int r;
-        char bytes[FORMAT_BYTES_MAX];
 
         assert(ret);
         assert(fd >= 0 || fname);
@@ -3208,7 +3368,9 @@ int journal_file_open(
                 .prot = prot_from_flags(flags),
                 .writable = (flags & O_ACCMODE) != O_RDONLY,
 
-#if HAVE_LZ4
+#if HAVE_ZSTD
+                .compress_zstd = compress,
+#elif HAVE_LZ4
                 .compress_lz4 = compress,
 #elif HAVE_XZ
                 .compress_xz = compress,
@@ -3221,9 +3383,35 @@ int journal_file_open(
 #endif
         };
 
-        log_debug("Journal effective settings seal=%s compress=%s compress_threshold_bytes=%s",
-                  yes_no(f->seal), yes_no(JOURNAL_FILE_COMPRESS(f)),
-                  format_bytes(bytes, sizeof(bytes), f->compress_threshold_bytes));
+        /* We turn on keyed hashes by default, but provide an environment variable to turn them off, if
+         * people really want that */
+        r = getenv_bool("SYSTEMD_JOURNAL_KEYED_HASH");
+        if (r < 0) {
+                if (r != -ENXIO)
+                        log_debug_errno(r, "Failed to parse $SYSTEMD_JOURNAL_KEYED_HASH environment variable, ignoring.");
+                f->keyed_hash = true;
+        } else
+                f->keyed_hash = r;
+
+        if (DEBUG_LOGGING) {
+                static int last_seal = -1, last_compress = -1, last_keyed_hash = -1;
+                static uint64_t last_bytes = UINT64_MAX;
+                char bytes[FORMAT_BYTES_MAX];
+
+                if (last_seal != f->seal ||
+                    last_keyed_hash != f->keyed_hash ||
+                    last_compress != JOURNAL_FILE_COMPRESS(f) ||
+                    last_bytes != f->compress_threshold_bytes) {
+
+                        log_debug("Journal effective settings seal=%s keyed_hash=%s compress=%s compress_threshold_bytes=%s",
+                                  yes_no(f->seal), yes_no(f->keyed_hash), yes_no(JOURNAL_FILE_COMPRESS(f)),
+                                  format_bytes(bytes, sizeof bytes, f->compress_threshold_bytes));
+                        last_seal = f->seal;
+                        last_keyed_hash = f->keyed_hash;
+                        last_compress = JOURNAL_FILE_COMPRESS(f);
+                        last_bytes = f->compress_threshold_bytes;
+                }
+        }
 
         if (mmap_cache)
                 f->mmap = mmap_cache_ref(mmap_cache);
@@ -3645,7 +3833,11 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
                 if (le_hash != o->data.hash)
                         return -EBADMSG;
 
-                l = le64toh(o->object.size) - offsetof(Object, data.payload);
+                l = le64toh(READ_NOW(o->object.size));
+                if (l < offsetof(Object, data.payload))
+                        return -EBADMSG;
+
+                l -= offsetof(Object, data.payload);
                 t = (size_t) l;
 
                 /* We hit the limit on 32bit machines */
@@ -3653,7 +3845,7 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
                         return -E2BIG;
 
                 if (o->object.flags & OBJECT_COMPRESSION_MASK) {
-#if HAVE_XZ || HAVE_LZ4
+#if HAVE_COMPRESSION
                         size_t rsize = 0;
 
                         r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
@@ -3673,7 +3865,11 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
                 if (r < 0)
                         return r;
 
-                xor_hash ^= le64toh(u->data.hash);
+                if (JOURNAL_HEADER_KEYED_HASH(to->header))
+                        xor_hash ^= jenkins_hash64(data, l);
+                else
+                        xor_hash ^= le64toh(u->data.hash);
+
                 items[i].object_offset = htole64(h);
                 items[i].hash = u->data.hash;
 
@@ -3709,30 +3905,23 @@ void journal_reset_metrics(JournalMetrics *m) {
 void journal_default_metrics(JournalMetrics *m, int fd) {
         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX], e[FORMAT_BYTES_MAX];
         struct statvfs ss;
-        uint64_t fs_size;
+        uint64_t fs_size = 0;
 
         assert(m);
         assert(fd >= 0);
 
         if (fstatvfs(fd, &ss) >= 0)
                 fs_size = ss.f_frsize * ss.f_blocks;
-        else {
+        else
                 log_debug_errno(errno, "Failed to determine disk size: %m");
-                fs_size = 0;
-        }
 
         if (m->max_use == (uint64_t) -1) {
 
-                if (fs_size > 0) {
-                        m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
-
-                        if (m->max_use > DEFAULT_MAX_USE_UPPER)
-                                m->max_use = DEFAULT_MAX_USE_UPPER;
-
-                        if (m->max_use < DEFAULT_MAX_USE_LOWER)
-                                m->max_use = DEFAULT_MAX_USE_LOWER;
-                } else
-                        m->max_use = DEFAULT_MAX_USE_LOWER;
+                if (fs_size > 0)
+                        m->max_use = CLAMP(PAGE_ALIGN(fs_size / 10), /* 10% of file system size */
+                                           MAX_USE_LOWER, MAX_USE_UPPER);
+                else
+                        m->max_use = MAX_USE_LOWER;
         } else {
                 m->max_use = PAGE_ALIGN(m->max_use);
 
@@ -3740,18 +3929,21 @@ void journal_default_metrics(JournalMetrics *m, int fd) {
                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
         }
 
-        if (m->min_use == (uint64_t) -1)
-                m->min_use = DEFAULT_MIN_USE;
+        if (m->min_use == (uint64_t) -1) {
+                if (fs_size > 0)
+                        m->min_use = CLAMP(PAGE_ALIGN(fs_size / 50), /* 2% of file system size */
+                                           MIN_USE_LOW, MIN_USE_HIGH);
+                else
+                        m->min_use = MIN_USE_LOW;
+        }
 
         if (m->min_use > m->max_use)
                 m->min_use = m->max_use;
 
-        if (m->max_size == (uint64_t) -1) {
-                m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
-
-                if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
-                        m->max_size = DEFAULT_MAX_SIZE_UPPER;
-        } else
+        if (m->max_size == (uint64_t) -1)
+                m->max_size = MIN(PAGE_ALIGN(m->max_use / 8), /* 8 chunks */
+                                  MAX_SIZE_UPPER);
+        else
                 m->max_size = PAGE_ALIGN(m->max_size);
 
         if (m->max_size != 0) {
@@ -3764,25 +3956,16 @@ void journal_default_metrics(JournalMetrics *m, int fd) {
 
         if (m->min_size == (uint64_t) -1)
                 m->min_size = JOURNAL_FILE_SIZE_MIN;
-        else {
-                m->min_size = PAGE_ALIGN(m->min_size);
-
-                if (m->min_size < JOURNAL_FILE_SIZE_MIN)
-                        m->min_size = JOURNAL_FILE_SIZE_MIN;
-
-                if (m->max_size != 0 && m->min_size > m->max_size)
-                        m->max_size = m->min_size;
-        }
+        else
+                m->min_size = CLAMP(PAGE_ALIGN(m->min_size),
+                                    JOURNAL_FILE_SIZE_MIN,
+                                    m->max_size ?: UINT64_MAX);
 
         if (m->keep_free == (uint64_t) -1) {
-
-                if (fs_size > 0) {
-                        m->keep_free = PAGE_ALIGN(fs_size * 3 / 20); /* 15% of file system size */
-
-                        if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
-                                m->keep_free = DEFAULT_KEEP_FREE_UPPER;
-
-                } else
+                if (fs_size > 0)
+                        m->keep_free = MIN(PAGE_ALIGN(fs_size / 20), /* 5% of file system size */
+                                           KEEP_FREE_UPPER);
+                else
                         m->keep_free = DEFAULT_KEEP_FREE;
         }
 
@@ -3873,11 +4056,9 @@ bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
                 return true;
         }
 
-        /* Let's check if the hash tables grew over a certain fill
-         * level (75%, borrowing this value from Java's hash table
-         * implementation), and if so suggest a rotation. To calculate
-         * the fill level we need the n_data field, which only exists
-         * in newer versions. */
+        /* Let's check if the hash tables grew over a certain fill level (75%, borrowing this value from
+         * Java's hash table implementation), and if so suggest a rotation. To calculate the fill level we
+         * need the n_data field, which only exists in newer versions. */
 
         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
                 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
@@ -3901,6 +4082,22 @@ bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
                         return true;
                 }
 
+        /* If there are too many hash collisions somebody is most likely playing games with us. Hence, if our
+         * longest chain is longer than some threshold, let's suggest rotation. */
+        if (JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth) &&
+            le64toh(f->header->data_hash_chain_depth) > HASH_CHAIN_DEPTH_MAX) {
+                log_debug("Data hash table of %s has deepest hash chain of length %" PRIu64 ", suggesting rotation.",
+                          f->path, le64toh(f->header->data_hash_chain_depth));
+                return true;
+        }
+
+        if (JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth) &&
+            le64toh(f->header->field_hash_chain_depth) > HASH_CHAIN_DEPTH_MAX) {
+                log_debug("Field hash table of %s has deepest hash chain of length at %" PRIu64 ", suggesting rotation.",
+                          f->path, le64toh(f->header->field_hash_chain_depth));
+                return true;
+        }
+
         /* Are the data objects properly indexed by field objects? */
         if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
             JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&