tree-wide: make use of new relative time events in sd-event.h

[thirdparty/systemd.git] / src / journal / journal-file.c
diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c

index c38c3e7c14f3f04a825d46741bdbb56d6bded63e..feff3bd9f03669033185088766b1a9ad2f3ef0d6 100644 (file)
--- a/src/journal/journal-file.c
+++ b/src/journal/journal-file.c
@@ -16,14 +16,15 @@
  #include "btrfs-util.h"
  #include "chattr-util.h"
  #include "compress.h"
+#include "env-util.h"
  #include "fd-util.h"
+#include "format-util.h"
  #include "fs-util.h"
  #include "journal-authenticate.h"
  #include "journal-def.h"
  #include "journal-file.h"
  #include "lookup3.h"
  #include "memory-util.h"
-#include "parse-util.h"
  #include "path-util.h"
  #include "random-util.h"
  #include "set.h"
@@ -40,29 +41,31 @@
  #define MIN_COMPRESS_THRESHOLD (8ULL)
  
  /* This is the minimum journal file size */
-#define JOURNAL_FILE_SIZE_MIN (512ULL*1024ULL)                 /* 512 KiB */
+#define JOURNAL_FILE_SIZE_MIN (512 * 1024ULL)             /* 512 KiB */
  
  /* These are the lower and upper bounds if we deduce the max_use value
   * from the file system size */
-#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
-#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
+#define MAX_USE_LOWER (1 * 1024 * 1024ULL)                /* 1 MiB */
+#define MAX_USE_UPPER (4 * 1024 * 1024 * 1024ULL)         /* 4 GiB */
  
-/* This is the default minimal use limit, how much we'll use even if keep_free suggests otherwise. */
-#define DEFAULT_MIN_USE (1ULL*1024ULL*1024ULL)                 /* 1 MiB */
+/* Those are the lower and upper bounds for the minimal use limit,
+ * i.e. how much we'll use even if keep_free suggests otherwise. */
+#define MIN_USE_LOW (1 * 1024 * 1024ULL)                  /* 1 MiB */
+#define MIN_USE_HIGH (16 * 1024 * 1024ULL)                /* 16 MiB */
  
  /* This is the upper bound if we deduce max_size from max_use */
-#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
+#define MAX_SIZE_UPPER (128 * 1024 * 1024ULL)             /* 128 MiB */
  
  /* This is the upper bound if we deduce the keep_free value from the
   * file system size */
-#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
+#define KEEP_FREE_UPPER (4 * 1024 * 1024 * 1024ULL)       /* 4 GiB */
  
  /* This is the keep_free value when we can't determine the system
   * size */
-#define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
+#define DEFAULT_KEEP_FREE (1024 * 1024ULL)                /* 1 MB */
  
  /* This is the default maximum number of journal files to keep around. */
-#define DEFAULT_N_MAX_FILES (100)
+#define DEFAULT_N_MAX_FILES 100
  
  /* n_data was the first entry we added after the initial file format design */
  #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
@@ -71,7 +74,7 @@
  #define CHAIN_CACHE_MAX 20
  
  /* How much to increase the journal file size at once each time we allocate something new. */
-#define FILE_SIZE_INCREASE (8ULL*1024ULL*1024ULL)              /* 8MB */
+#define FILE_SIZE_INCREASE (8 * 1024 * 1024ULL)          /* 8MB */
  
  /* Reread fstat() of the file for detecting deletions at least this often */
  #define LAST_STAT_REFRESH_USEC (5*USEC_PER_SEC)
@@ -79,6 +82,9 @@
  /* The mmap context to use for the header we pick as one above the last defined typed */
  #define CONTEXT_HEADER _OBJECT_TYPE_MAX
  
+/* Longest hash chain to rotate after */
+#define HASH_CHAIN_DEPTH_MAX 100
+
  #ifdef __clang__
  #  pragma GCC diagnostic ignored "-Waddress-of-packed-member"
  #endif
@@ -340,7 +346,8 @@ bool journal_file_is_offlining(JournalFile *f) {
  }
  
  JournalFile* journal_file_close(JournalFile *f) {
-        assert(f);
+        if (!f)
+                return NULL;
  
  #if HAVE_GCRYPT
          /* Write the final tag */
@@ -385,7 +392,7 @@ JournalFile* journal_file_close(JournalFile *f) {
  
          ordered_hashmap_free_free(f->chain_cache);
  
-#if HAVE_XZ || HAVE_LZ4
+#if HAVE_COMPRESSION
          free(f->compress_buffer);
  #endif
  
@@ -416,7 +423,9 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) {
  
          h.incompatible_flags |= htole32(
                  f->compress_xz * HEADER_INCOMPATIBLE_COMPRESSED_XZ |
-                f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4);
+                f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4 |
+                f->compress_zstd * HEADER_INCOMPATIBLE_COMPRESSED_ZSTD |
+                f->keyed_hash * HEADER_INCOMPATIBLE_KEYED_HASH);
  
          h.compatible_flags = htole32(
                  f->seal * HEADER_COMPATIBLE_SEALED);
@@ -442,7 +451,6 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) {
  }
  
  static int journal_file_refresh_header(JournalFile *f) {
-        sd_id128_t boot_id;
          int r;
  
          assert(f);
@@ -455,12 +463,10 @@ static int journal_file_refresh_header(JournalFile *f) {
          else if (r < 0)
                  return r;
  
-        r = sd_id128_get_boot(&boot_id);
+        r = sd_id128_get_boot(&f->header->boot_id);
          if (r < 0)
                  return r;
  
-        f->header->boot_id = boot_id;
-
          r = journal_file_set_online(f);
  
          /* Sync the online state to disk */
@@ -486,16 +492,23 @@ static bool warn_wrong_flags(const JournalFile *f, bool compatible) {
                                    f->path, type, flags & ~any);
                  flags = (flags & any) & ~supported;
                  if (flags) {
-                        const char* strv[3];
+                        const char* strv[5];
                          unsigned n = 0;
                          _cleanup_free_ char *t = NULL;
  
-                        if (compatible && (flags & HEADER_COMPATIBLE_SEALED))
-                                strv[n++] = "sealed";
-                        if (!compatible && (flags & HEADER_INCOMPATIBLE_COMPRESSED_XZ))
-                                strv[n++] = "xz-compressed";
-                        if (!compatible && (flags & HEADER_INCOMPATIBLE_COMPRESSED_LZ4))
-                                strv[n++] = "lz4-compressed";
+                        if (compatible) {
+                                if (flags & HEADER_COMPATIBLE_SEALED)
+                                        strv[n++] = "sealed";
+                        } else {
+                                if (flags & HEADER_INCOMPATIBLE_COMPRESSED_XZ)
+                                        strv[n++] = "xz-compressed";
+                                if (flags & HEADER_INCOMPATIBLE_COMPRESSED_LZ4)
+                                        strv[n++] = "lz4-compressed";
+                                if (flags & HEADER_INCOMPATIBLE_COMPRESSED_ZSTD)
+                                        strv[n++] = "zstd-compressed";
+                                if (flags & HEADER_INCOMPATIBLE_KEYED_HASH)
+                                        strv[n++] = "keyed-hash";
+                        }
                          strv[n] = NULL;
                          assert(n < ELEMENTSOF(strv));
  
@@ -530,7 +543,7 @@ static int journal_file_verify_header(JournalFile *f) {
          if (f->header->state >= _STATE_MAX)
                  return -EBADMSG;
  
-        header_size = le64toh(f->header->header_size);
+        header_size = le64toh(READ_NOW(f->header->header_size));
  
          /* The first addition was n_data, so check that we are at least this large */
          if (header_size < HEADER_SIZE_MIN)
@@ -539,7 +552,7 @@ static int journal_file_verify_header(JournalFile *f) {
          if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
                  return -EBADMSG;
  
-        arena_size = le64toh(f->header->arena_size);
+        arena_size = le64toh(READ_NOW(f->header->arena_size));
  
          if (UINT64_MAX - header_size < arena_size || header_size + arena_size > (uint64_t) f->last_stat.st_size)
                  return -ENODATA;
@@ -592,13 +605,16 @@ static int journal_file_verify_header(JournalFile *f) {
  
          f->compress_xz = JOURNAL_HEADER_COMPRESSED_XZ(f->header);
          f->compress_lz4 = JOURNAL_HEADER_COMPRESSED_LZ4(f->header);
+        f->compress_zstd = JOURNAL_HEADER_COMPRESSED_ZSTD(f->header);
  
          f->seal = JOURNAL_HEADER_SEALED(f->header);
  
+        f->keyed_hash = JOURNAL_HEADER_KEYED_HASH(f->header);
+
          return 0;
  }
  
-static int journal_file_fstat(JournalFile *f) {
+int journal_file_fstat(JournalFile *f) {
          int r;
  
          assert(f);
@@ -609,7 +625,7 @@ static int journal_file_fstat(JournalFile *f) {
  
          f->last_stat_usec = now(CLOCK_MONOTONIC);
  
-        /* Refuse dealing with with files that aren't regular */
+        /* Refuse dealing with files that aren't regular */
          r = stat_verify_regular(&f->last_stat);
          if (r < 0)
                  return r;
@@ -622,26 +638,29 @@ static int journal_file_fstat(JournalFile *f) {
  }
  
  static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
-        uint64_t old_size, new_size;
+        uint64_t old_size, new_size, old_header_size, old_arena_size;
          int r;
  
          assert(f);
          assert(f->header);
  
-        /* We assume that this file is not sparse, and we know that
-         * for sure, since we always call posix_fallocate()
-         * ourselves */
+        /* We assume that this file is not sparse, and we know that for sure, since we always call
+         * posix_fallocate() ourselves */
+
+        if (size > PAGE_ALIGN_DOWN(UINT64_MAX) - offset)
+                return -EINVAL;
  
          if (mmap_cache_got_sigbus(f->mmap, f->cache_fd))
                  return -EIO;
  
-        old_size =
-                le64toh(f->header->header_size) +
-                le64toh(f->header->arena_size);
+        old_header_size = le64toh(READ_NOW(f->header->header_size));
+        old_arena_size = le64toh(READ_NOW(f->header->arena_size));
+        if (old_arena_size > PAGE_ALIGN_DOWN(UINT64_MAX) - old_header_size)
+                return -EBADMSG;
  
-        new_size = PAGE_ALIGN(offset + size);
-        if (new_size < le64toh(f->header->header_size))
-                new_size = le64toh(f->header->header_size);
+        old_size = old_header_size + old_arena_size;
+
+        new_size = MAX(PAGE_ALIGN(offset + size), old_header_size);
  
          if (new_size <= old_size) {
  
@@ -687,7 +706,7 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size)
          if (r != 0)
                  return -r;
  
-        f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
+        f->header->arena_size = htole64(new_size - old_header_size);
  
          return journal_file_fstat(f);
  }
@@ -699,7 +718,15 @@ static unsigned type_to_context(ObjectType type) {
          return type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX ? type : 0;
  }
  
-static int journal_file_move_to(JournalFile *f, ObjectType type, bool keep_always, uint64_t offset, uint64_t size, void **ret, size_t *ret_size) {
+static int journal_file_move_to(
+                JournalFile *f,
+                ObjectType type,
+                bool keep_always,
+                uint64_t offset,
+                uint64_t size,
+                void **ret,
+                size_t *ret_size) {
+
          int r;
  
          assert(f);
@@ -708,6 +735,9 @@ static int journal_file_move_to(JournalFile *f, ObjectType type, bool keep_alway
          if (size <= 0)
                  return -EINVAL;
  
+        if (size > UINT64_MAX - offset)
+                return -EBADMSG;
+
          /* Avoid SIGBUS on invalid accesses */
          if (offset + size > (uint64_t) f->last_stat.st_size) {
                  /* Hmm, out of range? Let's refresh the fstat() data
@@ -750,14 +780,14 @@ static int journal_file_check_object(JournalFile *f, uint64_t offset, Object *o)
  
          switch (o->object.type) {
  
-        case OBJECT_DATA: {
+        case OBJECT_DATA:
                  if ((le64toh(o->data.entry_offset) == 0) ^ (le64toh(o->data.n_entries) == 0))
                          return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
                                                 "Bad n_entries: %" PRIu64 ": %" PRIu64,
                                                 le64toh(o->data.n_entries),
                                                 offset);
  
-                if (le64toh(o->object.size) - offsetof(DataObject, payload) <= 0)
+                if (le64toh(o->object.size) <= offsetof(DataObject, payload))
                          return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
                                                 "Bad object size (<= %zu): %" PRIu64 ": %" PRIu64,
                                                 offsetof(DataObject, payload),
@@ -777,10 +807,9 @@ static int journal_file_check_object(JournalFile *f, uint64_t offset, Object *o)
                                                 offset);
  
                  break;
-        }
  
          case OBJECT_FIELD:
-                if (le64toh(o->object.size) - offsetof(FieldObject, payload) <= 0)
+                if (le64toh(o->object.size) <= offsetof(FieldObject, payload))
                          return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
                                                 "Bad field size (<= %zu): %" PRIu64 ": %" PRIu64,
                                                 offsetof(FieldObject, payload),
@@ -796,18 +825,22 @@ static int journal_file_check_object(JournalFile *f, uint64_t offset, Object *o)
                                                 offset);
                  break;
  
-        case OBJECT_ENTRY:
-                if ((le64toh(o->object.size) - offsetof(EntryObject, items)) % sizeof(EntryItem) != 0)
+        case OBJECT_ENTRY: {
+                uint64_t sz;
+
+                sz = le64toh(READ_NOW(o->object.size));
+                if (sz < offsetof(EntryObject, items) ||
+                    (sz - offsetof(EntryObject, items)) % sizeof(EntryItem) != 0)
                          return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
                                                 "Bad entry size (<= %zu): %" PRIu64 ": %" PRIu64,
                                                 offsetof(EntryObject, items),
-                                               le64toh(o->object.size),
+                                               sz,
                                                 offset);
  
-                if ((le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem) <= 0)
+                if ((sz - offsetof(EntryObject, items)) / sizeof(EntryItem) <= 0)
                          return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
                                                 "Invalid number items in entry: %" PRIu64 ": %" PRIu64,
-                                               (le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem),
+                                               (sz - offsetof(EntryObject, items)) / sizeof(EntryItem),
                                                 offset);
  
                  if (le64toh(o->entry.seqnum) <= 0)
@@ -829,25 +862,35 @@ static int journal_file_check_object(JournalFile *f, uint64_t offset, Object *o)
                                                 offset);
  
                  break;
+        }
  
          case OBJECT_DATA_HASH_TABLE:
-        case OBJECT_FIELD_HASH_TABLE:
-                if ((le64toh(o->object.size) - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0 ||
-                    (le64toh(o->object.size) - offsetof(HashTableObject, items)) / sizeof(HashItem) <= 0)
+        case OBJECT_FIELD_HASH_TABLE: {
+                uint64_t sz;
+
+                sz = le64toh(READ_NOW(o->object.size));
+                if (sz < offsetof(HashTableObject, items) ||
+                    (sz - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0 ||
+                    (sz - offsetof(HashTableObject, items)) / sizeof(HashItem) <= 0)
                          return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
                                                 "Invalid %s hash table size: %" PRIu64 ": %" PRIu64,
                                                 o->object.type == OBJECT_DATA_HASH_TABLE ? "data" : "field",
-                                               le64toh(o->object.size),
+                                               sz,
                                                 offset);
  
                  break;
+        }
+
+        case OBJECT_ENTRY_ARRAY: {
+                uint64_t sz;
  
-        case OBJECT_ENTRY_ARRAY:
-                if ((le64toh(o->object.size) - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0 ||
-                    (le64toh(o->object.size) - offsetof(EntryArrayObject, items)) / sizeof(le64_t) <= 0)
+                sz = le64toh(READ_NOW(o->object.size));
+                if (sz < offsetof(EntryArrayObject, items) ||
+                    (sz - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0 ||
+                    (sz - offsetof(EntryArrayObject, items)) / sizeof(le64_t) <= 0)
                          return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
                                                 "Invalid object entry array size: %" PRIu64 ": %" PRIu64,
-                                               le64toh(o->object.size),
+                                               sz,
                                                 offset);
  
                  if (!VALID64(le64toh(o->entry_array.next_entry_array_offset)))
@@ -857,6 +900,7 @@ static int journal_file_check_object(JournalFile *f, uint64_t offset, Object *o)
                                                 offset);
  
                  break;
+        }
  
          case OBJECT_TAG:
                  if (le64toh(o->object.size) != sizeof(TagObject))
@@ -903,7 +947,7 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset
                  return r;
  
          o = (Object*) t;
-        s = le64toh(o->object.size);
+        s = le64toh(READ_NOW(o->object.size));
  
          if (s == 0)
                  return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
@@ -972,7 +1016,13 @@ static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
          return r;
  }
  
-int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, Object **ret, uint64_t *offset) {
+int journal_file_append_object(
+                JournalFile *f,
+                ObjectType type,
+                uint64_t size,
+                Object **ret,
+                uint64_t *ret_offset) {
+
          int r;
          uint64_t p;
          Object *tail, *o;
@@ -982,8 +1032,6 @@ int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, O
          assert(f->header);
          assert(type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX);
          assert(size >= sizeof(ObjectHeader));
-        assert(offset);
-        assert(ret);
  
          r = journal_file_set_online(f);
          if (r < 0)
@@ -993,11 +1041,21 @@ int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, O
          if (p == 0)
                  p = le64toh(f->header->header_size);
          else {
+                uint64_t sz;
+
                  r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &tail);
                  if (r < 0)
                          return r;
  
-                p += ALIGN64(le64toh(tail->object.size));
+                sz = le64toh(READ_NOW(tail->object.size));
+                if (sz > UINT64_MAX - sizeof(uint64_t) + 1)
+                        return -EBADMSG;
+
+                sz = ALIGN64(sz);
+                if (p > UINT64_MAX - sz)
+                        return -EBADMSG;
+
+                p += sz;
          }
  
          r = journal_file_allocate(f, p, size);
@@ -1009,16 +1067,19 @@ int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, O
                  return r;
  
          o = (Object*) t;
-
-        zero(o->object);
-        o->object.type = type;
-        o->object.size = htole64(size);
+        o->object = (ObjectHeader) {
+                .type = type,
+                .size = htole64(size),
+        };
  
          f->header->tail_object_offset = htole64(p);
          f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
  
-        *ret = o;
-        *offset = p;
+        if (ret)
+                *ret = o;
+
+        if (ret_offset)
+                *ret_offset = p;
  
          return 0;
  }
@@ -1040,7 +1101,7 @@ static int journal_file_setup_data_hash_table(JournalFile *f) {
          if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
                  s = DEFAULT_DATA_HASH_TABLE_SIZE;
  
-        log_debug("Reserving %"PRIu64" entries in hash table.", s / sizeof(HashItem));
+        log_debug("Reserving %"PRIu64" entries in data hash table.", s / sizeof(HashItem));
  
          r = journal_file_append_object(f,
                                         OBJECT_DATA_HASH_TABLE,
@@ -1069,6 +1130,8 @@ static int journal_file_setup_field_hash_table(JournalFile *f) {
           * number should grow very slowly only */
  
          s = DEFAULT_FIELD_HASH_TABLE_SIZE;
+        log_debug("Reserving %"PRIu64" entries in field hash table.", s / sizeof(HashItem));
+
          r = journal_file_append_object(f,
                                         OBJECT_FIELD_HASH_TABLE,
                                         offsetof(Object, hash_table.items) + s,
@@ -1154,7 +1217,7 @@ static int journal_file_link_field(
          if (o->object.type != OBJECT_FIELD)
                  return -EINVAL;
  
-        m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
+        m = le64toh(READ_NOW(f->header->field_hash_table_size)) / sizeof(HashItem);
          if (m <= 0)
                  return -EBADMSG;
  
@@ -1199,7 +1262,7 @@ static int journal_file_link_data(
          if (o->object.type != OBJECT_DATA)
                  return -EINVAL;
  
-        m = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
+        m = le64toh(READ_NOW(f->header->data_hash_table_size)) / sizeof(HashItem);
          if (m <= 0)
                  return -EBADMSG;
  
@@ -1232,12 +1295,38 @@ static int journal_file_link_data(
          return 0;
  }
  
+static int next_hash_offset(
+                JournalFile *f,
+                uint64_t *p,
+                le64_t *next_hash_offset,
+                uint64_t *depth,
+                le64_t *header_max_depth) {
+
+        uint64_t nextp;
+
+        nextp = le64toh(READ_NOW(*next_hash_offset));
+        if (nextp > 0) {
+                if (nextp <= *p) /* Refuse going in loops */
+                        return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+                                               "Detected hash item loop in %s, refusing.", f->path);
+
+                (*depth)++;
+
+                /* If the depth of this hash chain is larger than all others we have seen so far, record it */
+                if (header_max_depth && f->writable)
+                        *header_max_depth = htole64(MAX(*depth, le64toh(*header_max_depth)));
+        }
+
+        *p = nextp;
+        return 0;
+}
+
  int journal_file_find_field_object_with_hash(
                  JournalFile *f,
                  const void *field, uint64_t size, uint64_t hash,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
  
-        uint64_t p, osize, h, m;
+        uint64_t p, osize, h, m, depth = 0;
          int r;
  
          assert(f);
@@ -1255,13 +1344,12 @@ int journal_file_find_field_object_with_hash(
  
          osize = offsetof(Object, field.payload) + size;
  
-        m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
+        m = le64toh(READ_NOW(f->header->field_hash_table_size)) / sizeof(HashItem);
          if (m <= 0)
                  return -EBADMSG;
  
          h = hash % m;
          p = le64toh(f->field_hash_table[h].head_hash_offset);
-
          while (p > 0) {
                  Object *o;
  
@@ -1275,41 +1363,63 @@ int journal_file_find_field_object_with_hash(
  
                          if (ret)
                                  *ret = o;
-                        if (offset)
-                                *offset = p;
+                        if (ret_offset)
+                                *ret_offset = p;
  
                          return 1;
                  }
  
-                p = le64toh(o->field.next_hash_offset);
+                r = next_hash_offset(
+                                f,
+                                &p,
+                                &o->field.next_hash_offset,
+                                &depth,
+                                JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth) ? &f->header->field_hash_chain_depth : NULL);
+                if (r < 0)
+                        return r;
          }
  
          return 0;
  }
  
+uint64_t journal_file_hash_data(
+                JournalFile *f,
+                const void *data,
+                size_t sz) {
+
+        assert(f);
+        assert(data || sz == 0);
+
+        /* We try to unify our codebase on siphash, hence new-styled journal files utilizing the keyed hash
+         * function use siphash. Old journal files use the Jenkins hash. */
+
+        if (JOURNAL_HEADER_KEYED_HASH(f->header))
+                return siphash24(data, sz, f->header->file_id.bytes);
+
+        return jenkins_hash64(data, sz);
+}
+
  int journal_file_find_field_object(
                  JournalFile *f,
                  const void *field, uint64_t size,
-                Object **ret, uint64_t *offset) {
-
-        uint64_t hash;
+                Object **ret, uint64_t *ret_offset) {
  
          assert(f);
          assert(field && size > 0);
  
-        hash = hash64(field, size);
-
-        return journal_file_find_field_object_with_hash(f,
-                                                        field, size, hash,
-                                                        ret, offset);
+        return journal_file_find_field_object_with_hash(
+                        f,
+                        field, size,
+                        journal_file_hash_data(f, field, size),
+                        ret, ret_offset);
  }
  
  int journal_file_find_data_object_with_hash(
                  JournalFile *f,
                  const void *data, uint64_t size, uint64_t hash,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
  
-        uint64_t p, osize, h, m;
+        uint64_t p, osize, h, m, depth = 0;
          int r;
  
          assert(f);
@@ -1327,7 +1437,7 @@ int journal_file_find_data_object_with_hash(
  
          osize = offsetof(Object, data.payload) + size;
  
-        m = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
+        m = le64toh(READ_NOW(f->header->data_hash_table_size)) / sizeof(HashItem);
          if (m <= 0)
                  return -EBADMSG;
  
@@ -1345,11 +1455,11 @@ int journal_file_find_data_object_with_hash(
                          goto next;
  
                  if (o->object.flags & OBJECT_COMPRESSION_MASK) {
-#if HAVE_XZ || HAVE_LZ4
+#if HAVE_COMPRESSION
                          uint64_t l;
                          size_t rsize = 0;
  
-                        l = le64toh(o->object.size);
+                        l = le64toh(READ_NOW(o->object.size));
                          if (l <= offsetof(Object, data.payload))
                                  return -EBADMSG;
  
@@ -1366,8 +1476,8 @@ int journal_file_find_data_object_with_hash(
                                  if (ret)
                                          *ret = o;
  
-                                if (offset)
-                                        *offset = p;
+                                if (ret_offset)
+                                        *ret_offset = p;
  
                                  return 1;
                          }
@@ -1380,14 +1490,21 @@ int journal_file_find_data_object_with_hash(
                          if (ret)
                                  *ret = o;
  
-                        if (offset)
-                                *offset = p;
+                        if (ret_offset)
+                                *ret_offset = p;
  
                          return 1;
                  }
  
          next:
-                p = le64toh(o->data.next_hash_offset);
+                r = next_hash_offset(
+                                f,
+                                &p,
+                                &o->data.next_hash_offset,
+                                &depth,
+                                JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth) ? &f->header->data_hash_chain_depth : NULL);
+                if (r < 0)
+                        return r;
          }
  
          return 0;
@@ -1396,24 +1513,22 @@ int journal_file_find_data_object_with_hash(
  int journal_file_find_data_object(
                  JournalFile *f,
                  const void *data, uint64_t size,
-                Object **ret, uint64_t *offset) {
-
-        uint64_t hash;
+                Object **ret, uint64_t *ret_offset) {
  
          assert(f);
          assert(data || size == 0);
  
-        hash = hash64(data, size);
-
-        return journal_file_find_data_object_with_hash(f,
-                                                       data, size, hash,
-                                                       ret, offset);
+        return journal_file_find_data_object_with_hash(
+                        f,
+                        data, size,
+                        journal_file_hash_data(f, data, size),
+                        ret, ret_offset);
  }
  
  static int journal_file_append_field(
                  JournalFile *f,
                  const void *field, uint64_t size,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
  
          uint64_t hash, p;
          uint64_t osize;
@@ -1423,7 +1538,7 @@ static int journal_file_append_field(
          assert(f);
          assert(field && size > 0);
  
-        hash = hash64(field, size);
+        hash = journal_file_hash_data(f, field, size);
  
          r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
          if (r < 0)
@@ -1433,8 +1548,8 @@ static int journal_file_append_field(
                  if (ret)
                          *ret = o;
  
-                if (offset)
-                        *offset = p;
+                if (ret_offset)
+                        *ret_offset = p;
  
                  return 0;
          }
@@ -1466,8 +1581,8 @@ static int journal_file_append_field(
          if (ret)
                  *ret = o;
  
-        if (offset)
-                *offset = p;
+        if (ret_offset)
+                *ret_offset = p;
  
          return 0;
  }
@@ -1475,7 +1590,7 @@ static int journal_file_append_field(
  static int journal_file_append_data(
                  JournalFile *f,
                  const void *data, uint64_t size,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
  
          uint64_t hash, p;
          uint64_t osize;
@@ -1486,7 +1601,7 @@ static int journal_file_append_data(
          assert(f);
          assert(data || size == 0);
  
-        hash = hash64(data, size);
+        hash = journal_file_hash_data(f, data, size);
  
          r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
          if (r < 0)
@@ -1496,8 +1611,8 @@ static int journal_file_append_data(
                  if (ret)
                          *ret = o;
  
-                if (offset)
-                        *offset = p;
+                if (ret_offset)
+                        *ret_offset = p;
  
                  return 0;
          }
@@ -1509,7 +1624,7 @@ static int journal_file_append_data(
  
          o->data.hash = htole64(hash);
  
-#if HAVE_XZ || HAVE_LZ4
+#if HAVE_COMPRESSION
          if (JOURNAL_FILE_COMPRESS(f) && size >= f->compress_threshold_bytes) {
                  size_t rsize = 0;
  
@@ -1567,37 +1682,54 @@ static int journal_file_append_data(
          if (ret)
                  *ret = o;
  
-        if (offset)
-                *offset = p;
+        if (ret_offset)
+                *ret_offset = p;
  
          return 0;
  }
  
  uint64_t journal_file_entry_n_items(Object *o) {
+        uint64_t sz;
          assert(o);
  
          if (o->object.type != OBJECT_ENTRY)
                  return 0;
  
-        return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
+        sz = le64toh(READ_NOW(o->object.size));
+        if (sz < offsetof(Object, entry.items))
+                return 0;
+
+        return (sz - offsetof(Object, entry.items)) / sizeof(EntryItem);
  }
  
  uint64_t journal_file_entry_array_n_items(Object *o) {
+        uint64_t sz;
+
          assert(o);
  
          if (o->object.type != OBJECT_ENTRY_ARRAY)
                  return 0;
  
-        return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
+        sz = le64toh(READ_NOW(o->object.size));
+        if (sz < offsetof(Object, entry_array.items))
+                return 0;
+
+        return (sz - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
  }
  
  uint64_t journal_file_hash_table_n_items(Object *o) {
+        uint64_t sz;
+
          assert(o);
  
          if (!IN_SET(o->object.type, OBJECT_DATA_HASH_TABLE, OBJECT_FIELD_HASH_TABLE))
                  return 0;
  
-        return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
+        sz = le64toh(READ_NOW(o->object.size));
+        if (sz < offsetof(Object, hash_table.items))
+                return 0;
+
+        return (sz - offsetof(Object, hash_table.items)) / sizeof(HashItem);
  }
  
  static int link_entry_into_array(JournalFile *f,
@@ -1615,7 +1747,7 @@ static int link_entry_into_array(JournalFile *f,
          assert(p > 0);
  
          a = le64toh(*first);
-        i = hidx = le64toh(*idx);
+        i = hidx = le64toh(READ_NOW(*idx));
          while (a > 0) {
  
                  r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
@@ -1680,6 +1812,7 @@ static int link_entry_into_array_plus_one(JournalFile *f,
                                            le64_t *idx,
                                            uint64_t p) {
  
+        uint64_t hidx;
          int r;
  
          assert(f);
@@ -1688,32 +1821,33 @@ static int link_entry_into_array_plus_one(JournalFile *f,
          assert(idx);
          assert(p > 0);
  
-        if (*idx == 0)
+        hidx = le64toh(READ_NOW(*idx));
+        if (hidx == UINT64_MAX)
+                return -EBADMSG;
+        if (hidx == 0)
                  *extra = htole64(p);
          else {
                  le64_t i;
  
-                i = htole64(le64toh(*idx) - 1);
+                i = htole64(hidx - 1);
                  r = link_entry_into_array(f, first, &i, p);
                  if (r < 0)
                          return r;
          }
  
-        *idx = htole64(le64toh(*idx) + 1);
+        *idx = htole64(hidx + 1);
          return 0;
  }
  
  static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
          uint64_t p;
          int r;
+
          assert(f);
          assert(o);
          assert(offset > 0);
  
          p = le64toh(o->entry.items[i].object_offset);
-        if (p == 0)
-                return -EINVAL;
-
          r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
          if (r < 0)
                  return r;
@@ -1773,7 +1907,7 @@ static int journal_file_append_entry_internal(
                  uint64_t xor_hash,
                  const EntryItem items[], unsigned n_items,
                  uint64_t *seqnum,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
          uint64_t np;
          uint64_t osize;
          Object *o;
@@ -1812,8 +1946,8 @@ static int journal_file_append_entry_internal(
          if (ret)
                  *ret = o;
  
-        if (offset)
-                *offset = np;
+        if (ret_offset)
+                *ret_offset = np;
  
          return 0;
  }
@@ -1844,7 +1978,6 @@ static int post_change_thunk(sd_event_source *timer, uint64_t usec, void *userda
  }
  
  static void schedule_post_change(JournalFile *f) {
-        uint64_t now;
          int r;
  
          assert(f);
@@ -1858,13 +1991,7 @@ static void schedule_post_change(JournalFile *f) {
          if (r > 0)
                  return;
  
-        r = sd_event_now(sd_event_source_get_event(f->post_change_timer), CLOCK_MONOTONIC, &now);
-        if (r < 0) {
-                log_debug_errno(r, "Failed to get clock's now for scheduling ftruncate: %m");
-                goto fail;
-        }
-
-        r = sd_event_source_set_time(f->post_change_timer, now + f->post_change_timer_period);
+        r = sd_event_source_set_time_relative(f->post_change_timer, f->post_change_timer_period);
          if (r < 0) {
                  log_debug_errno(r, "Failed to set time for scheduling ftruncate: %m");
                  goto fail;
@@ -1917,7 +2044,7 @@ int journal_file_append_entry(
                  const sd_id128_t *boot_id,
                  const struct iovec iovec[], unsigned n_iovec,
                  uint64_t *seqnum,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
  
          unsigned i;
          EntryItem *items;
@@ -1960,7 +2087,20 @@ int journal_file_append_entry(
                  if (r < 0)
                          return r;
  
-                xor_hash ^= le64toh(o->data.hash);
+                /* When calculating the XOR hash field, we need to take special care if the "keyed-hash"
+                 * journal file flag is on. We use the XOR hash field to quickly determine the identity of a
+                 * specific record, and give records with otherwise identical position (i.e. match in seqno,
+                 * timestamp, …) a stable ordering. But for that we can't have it that the hash of the
+                 * objects in each file is different since they are keyed. Hence let's calculate the Jenkins
+                 * hash here for that. This also has the benefit that cursors for old and new journal files
+                 * are completely identical (they include the XOR hash after all). For classic Jenkins-hash
+                 * files things are easier, we can just take the value from the stored record directly. */
+
+                if (JOURNAL_HEADER_KEYED_HASH(f->header))
+                        xor_hash ^= jenkins_hash64(iovec[i].iov_base, iovec[i].iov_len);
+                else
+                        xor_hash ^= le64toh(o->data.hash);
+
                  items[i].object_offset = htole64(p);
                  items[i].hash = o->data.hash;
          }
@@ -1969,7 +2109,7 @@ int journal_file_append_entry(
           * times for rotating media. */
          typesafe_qsort(items, n_iovec, entry_item_cmp);
  
-        r = journal_file_append_entry_internal(f, ts, boot_id, xor_hash, items, n_iovec, seqnum, ret, offset);
+        r = journal_file_append_entry_internal(f, ts, boot_id, xor_hash, items, n_iovec, seqnum, ret, ret_offset);
  
          /* If the memory mapping triggered a SIGBUS then we return an
           * IO error and ignore the error code passed down to us, since
@@ -2038,7 +2178,7 @@ static int generic_array_get(
                  JournalFile *f,
                  uint64_t first,
                  uint64_t i,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
  
          Object *o;
          uint64_t p = 0, a, t = 0;
@@ -2088,8 +2228,8 @@ found:
          if (ret)
                  *ret = o;
  
-        if (offset)
-                *offset = p;
+        if (ret_offset)
+                *ret_offset = p;
  
          return 1;
  }
@@ -2099,7 +2239,7 @@ static int generic_array_get_plus_one(
                  uint64_t extra,
                  uint64_t first,
                  uint64_t i,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
  
          Object *o;
  
@@ -2115,13 +2255,13 @@ static int generic_array_get_plus_one(
                  if (ret)
                          *ret = o;
  
-                if (offset)
-                        *offset = extra;
+                if (ret_offset)
+                        *ret_offset = extra;
  
                  return 1;
          }
  
-        return generic_array_get(f, first, i-1, ret, offset);
+        return generic_array_get(f, first, i-1, ret, ret_offset);
  }
  
  enum {
@@ -2138,8 +2278,8 @@ static int generic_array_bisect(
                  int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
                  direction_t direction,
                  Object **ret,
-                uint64_t *offset,
-                uint64_t *idx) {
+                uint64_t *ret_offset,
+                uint64_t *ret_idx) {
  
          uint64_t a, p, t = 0, i = 0, last_p = 0, last_index = (uint64_t) -1;
          bool subtract_one = false;
@@ -2338,11 +2478,11 @@ found:
          if (ret)
                  *ret = o;
  
-        if (offset)
-                *offset = p;
+        if (ret_offset)
+                *ret_offset = p;
  
-        if (idx)
-                *idx = t + i + (subtract_one ? -1 : 0);
+        if (ret_idx)
+                *ret_idx = t + i + (subtract_one ? -1 : 0);
  
          return 1;
  }
@@ -2356,8 +2496,8 @@ static int generic_array_bisect_plus_one(
                  int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
                  direction_t direction,
                  Object **ret,
-                uint64_t *offset,
-                uint64_t *idx) {
+                uint64_t *ret_offset,
+                uint64_t *ret_idx) {
  
          int r;
          bool step_back = false;
@@ -2393,13 +2533,13 @@ static int generic_array_bisect_plus_one(
                          return 0;
          }
  
-        r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
+        r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, ret_offset, ret_idx);
  
          if (r == 0 && step_back)
                  goto found;
  
-        if (r > 0 && idx)
-                (*idx)++;
+        if (r > 0 && ret_idx)
+                (*ret_idx)++;
  
          return r;
  
@@ -2411,11 +2551,11 @@ found:
          if (ret)
                  *ret = o;
  
-        if (offset)
-                *offset = extra;
+        if (ret_offset)
+                *ret_offset = extra;
  
-        if (idx)
-                *idx = 0;
+        if (ret_idx)
+                *ret_idx = 0;
  
          return 1;
  }
@@ -2433,6 +2573,7 @@ _pure_ static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle
  }
  
  static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
+        uint64_t sq;
          Object *o;
          int r;
  
@@ -2443,9 +2584,10 @@ static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
          if (r < 0)
                  return r;
  
-        if (le64toh(o->entry.seqnum) == needle)
+        sq = le64toh(READ_NOW(o->entry.seqnum));
+        if (sq == needle)
                  return TEST_FOUND;
-        else if (le64toh(o->entry.seqnum) < needle)
+        else if (sq < needle)
                  return TEST_LEFT;
          else
                  return TEST_RIGHT;
@@ -2456,21 +2598,23 @@ int journal_file_move_to_entry_by_seqnum(
                  uint64_t seqnum,
                  direction_t direction,
                  Object **ret,
-                uint64_t *offset) {
+                uint64_t *ret_offset) {
          assert(f);
          assert(f->header);
  
-        return generic_array_bisect(f,
-                                    le64toh(f->header->entry_array_offset),
-                                    le64toh(f->header->n_entries),
-                                    seqnum,
-                                    test_object_seqnum,
-                                    direction,
-                                    ret, offset, NULL);
+        return generic_array_bisect(
+                        f,
+                        le64toh(f->header->entry_array_offset),
+                        le64toh(f->header->n_entries),
+                        seqnum,
+                        test_object_seqnum,
+                        direction,
+                        ret, ret_offset, NULL);
  }
  
  static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
          Object *o;
+        uint64_t rt;
          int r;
  
          assert(f);
@@ -2480,9 +2624,10 @@ static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
          if (r < 0)
                  return r;
  
-        if (le64toh(o->entry.realtime) == needle)
+        rt = le64toh(READ_NOW(o->entry.realtime));
+        if (rt == needle)
                  return TEST_FOUND;
-        else if (le64toh(o->entry.realtime) < needle)
+        else if (rt < needle)
                  return TEST_LEFT;
          else
                  return TEST_RIGHT;
@@ -2493,21 +2638,23 @@ int journal_file_move_to_entry_by_realtime(
                  uint64_t realtime,
                  direction_t direction,
                  Object **ret,
-                uint64_t *offset) {
+                uint64_t *ret_offset) {
          assert(f);
          assert(f->header);
  
-        return generic_array_bisect(f,
-                                    le64toh(f->header->entry_array_offset),
-                                    le64toh(f->header->n_entries),
-                                    realtime,
-                                    test_object_realtime,
-                                    direction,
-                                    ret, offset, NULL);
+        return generic_array_bisect(
+                        f,
+                        le64toh(f->header->entry_array_offset),
+                        le64toh(f->header->n_entries),
+                        realtime,
+                        test_object_realtime,
+                        direction,
+                        ret, ret_offset, NULL);
  }
  
  static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
          Object *o;
+        uint64_t m;
          int r;
  
          assert(f);
@@ -2517,9 +2664,10 @@ static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
          if (r < 0)
                  return r;
  
-        if (le64toh(o->entry.monotonic) == needle)
+        m = le64toh(READ_NOW(o->entry.monotonic));
+        if (m == needle)
                  return TEST_FOUND;
-        else if (le64toh(o->entry.monotonic) < needle)
+        else if (m < needle)
                  return TEST_LEFT;
          else
                  return TEST_RIGHT;
@@ -2543,7 +2691,7 @@ int journal_file_move_to_entry_by_monotonic(
                  uint64_t monotonic,
                  direction_t direction,
                  Object **ret,
-                uint64_t *offset) {
+                uint64_t *ret_offset) {
  
          Object *o;
          int r;
@@ -2556,14 +2704,15 @@ int journal_file_move_to_entry_by_monotonic(
          if (r == 0)
                  return -ENOENT;
  
-        return generic_array_bisect_plus_one(f,
-                                             le64toh(o->data.entry_offset),
-                                             le64toh(o->data.entry_array_offset),
-                                             le64toh(o->data.n_entries),
-                                             monotonic,
-                                             test_object_monotonic,
-                                             direction,
-                                             ret, offset, NULL);
+        return generic_array_bisect_plus_one(
+                        f,
+                        le64toh(o->data.entry_offset),
+                        le64toh(o->data.entry_array_offset),
+                        le64toh(o->data.n_entries),
+                        monotonic,
+                        test_object_monotonic,
+                        direction,
+                        ret, ret_offset, NULL);
  }
  
  void journal_file_reset_location(JournalFile *f) {
@@ -2669,7 +2818,7 @@ int journal_file_next_entry(
                  JournalFile *f,
                  uint64_t p,
                  direction_t direction,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
  
          uint64_t i, n, ofs;
          int r;
@@ -2677,7 +2826,7 @@ int journal_file_next_entry(
          assert(f);
          assert(f->header);
  
-        n = le64toh(f->header->n_entries);
+        n = le64toh(READ_NOW(f->header->n_entries));
          if (n <= 0)
                  return 0;
  
@@ -2726,8 +2875,8 @@ int journal_file_next_entry(
                                         "%s: entry array not properly ordered at entry %" PRIu64,
                                         f->path, i);
  
-        if (offset)
-                *offset = ofs;
+        if (ret_offset)
+                *ret_offset = ofs;
  
          return 1;
  }
@@ -2737,7 +2886,7 @@ int journal_file_next_entry_for_data(
                  Object *o, uint64_t p,
                  uint64_t data_offset,
                  direction_t direction,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
  
          uint64_t i, n, ofs;
          Object *d;
@@ -2750,7 +2899,7 @@ int journal_file_next_entry_for_data(
          if (r < 0)
                  return r;
  
-        n = le64toh(d->data.n_entries);
+        n = le64toh(READ_NOW(d->data.n_entries));
          if (n <= 0)
                  return n;
  
@@ -2802,8 +2951,8 @@ int journal_file_next_entry_for_data(
                                         "%s data entry array not properly ordered at entry %" PRIu64,
                                         f->path, i);
  
-        if (offset)
-                *offset = ofs;
+        if (ret_offset)
+                *ret_offset = ofs;
  
          return 1;
  }
@@ -2813,7 +2962,7 @@ int journal_file_move_to_entry_by_offset_for_data(
                  uint64_t data_offset,
                  uint64_t p,
                  direction_t direction,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
  
          int r;
          Object *d;
@@ -2824,14 +2973,15 @@ int journal_file_move_to_entry_by_offset_for_data(
          if (r < 0)
                  return r;
  
-        return generic_array_bisect_plus_one(f,
-                                             le64toh(d->data.entry_offset),
-                                             le64toh(d->data.entry_array_offset),
-                                             le64toh(d->data.n_entries),
-                                             p,
-                                             test_object_offset,
-                                             direction,
-                                             ret, offset, NULL);
+        return generic_array_bisect_plus_one(
+                        f,
+                        le64toh(d->data.entry_offset),
+                        le64toh(d->data.entry_array_offset),
+                        le64toh(d->data.n_entries),
+                        p,
+                        test_object_offset,
+                        direction,
+                        ret, ret_offset, NULL);
  }
  
  int journal_file_move_to_entry_by_monotonic_for_data(
@@ -2840,7 +2990,7 @@ int journal_file_move_to_entry_by_monotonic_for_data(
                  sd_id128_t boot_id,
                  uint64_t monotonic,
                  direction_t direction,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
  
          Object *o, *d;
          int r;
@@ -2907,8 +3057,8 @@ int journal_file_move_to_entry_by_monotonic_for_data(
                  if (p == q) {
                          if (ret)
                                  *ret = qo;
-                        if (offset)
-                                *offset = q;
+                        if (ret_offset)
+                                *ret_offset = q;
  
                          return 1;
                  }
@@ -2922,7 +3072,7 @@ int journal_file_move_to_entry_by_seqnum_for_data(
                  uint64_t data_offset,
                  uint64_t seqnum,
                  direction_t direction,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
  
          Object *d;
          int r;
@@ -2933,14 +3083,15 @@ int journal_file_move_to_entry_by_seqnum_for_data(
          if (r < 0)
                  return r;
  
-        return generic_array_bisect_plus_one(f,
-                                             le64toh(d->data.entry_offset),
-                                             le64toh(d->data.entry_array_offset),
-                                             le64toh(d->data.n_entries),
-                                             seqnum,
-                                             test_object_seqnum,
-                                             direction,
-                                             ret, offset, NULL);
+        return generic_array_bisect_plus_one(
+                        f,
+                        le64toh(d->data.entry_offset),
+                        le64toh(d->data.entry_array_offset),
+                        le64toh(d->data.n_entries),
+                        seqnum,
+                        test_object_seqnum,
+                        direction,
+                        ret, ret_offset, NULL);
  }
  
  int journal_file_move_to_entry_by_realtime_for_data(
@@ -2948,7 +3099,7 @@ int journal_file_move_to_entry_by_realtime_for_data(
                  uint64_t data_offset,
                  uint64_t realtime,
                  direction_t direction,
-                Object **ret, uint64_t *offset) {
+                Object **ret, uint64_t *ret_offset) {
  
          Object *d;
          int r;
@@ -2959,14 +3110,15 @@ int journal_file_move_to_entry_by_realtime_for_data(
          if (r < 0)
                  return r;
  
-        return generic_array_bisect_plus_one(f,
-                                             le64toh(d->data.entry_offset),
-                                             le64toh(d->data.entry_array_offset),
-                                             le64toh(d->data.n_entries),
-                                             realtime,
-                                             test_object_realtime,
-                                             direction,
-                                             ret, offset, NULL);
+        return generic_array_bisect_plus_one(
+                        f,
+                        le64toh(d->data.entry_offset),
+                        le64toh(d->data.entry_array_offset),
+                        le64toh(d->data.n_entries),
+                        realtime,
+                        test_object_realtime,
+                        direction,
+                        ret, ret_offset, NULL);
  }
  
  void journal_file_dump(JournalFile *f) {
@@ -2979,7 +3131,7 @@ void journal_file_dump(JournalFile *f) {
  
          journal_file_print_header(f);
  
-        p = le64toh(f->header->header_size);
+        p = le64toh(READ_NOW(f->header->header_size));
          while (p != 0) {
                  r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o);
                  if (r < 0)
@@ -3036,7 +3188,7 @@ void journal_file_dump(JournalFile *f) {
                  if (p == le64toh(f->header->tail_object_offset))
                          p = 0;
                  else
-                        p = p + ALIGN64(le64toh(o->object.size));
+                        p += ALIGN64(le64toh(o->object.size));
          }
  
          return;
@@ -3054,7 +3206,7 @@ static const char* format_timestamp_safe(char *buf, size_t l, usec_t t) {
  }
  
  void journal_file_print_header(JournalFile *f) {
-        char a[33], b[33], c[33], d[33];
+        char a[SD_ID128_STRING_MAX], b[SD_ID128_STRING_MAX], c[SD_ID128_STRING_MAX], d[SD_ID128_STRING_MAX];
          char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX], z[FORMAT_TIMESTAMP_MAX];
          struct stat st;
          char bytes[FORMAT_BYTES_MAX];
@@ -3062,26 +3214,26 @@ void journal_file_print_header(JournalFile *f) {
          assert(f);
          assert(f->header);
  
-        printf("File Path: %s\n"
+        printf("File path: %s\n"
                 "File ID: %s\n"
                 "Machine ID: %s\n"
                 "Boot ID: %s\n"
-               "Sequential Number ID: %s\n"
+               "Sequential number ID: %s\n"
                 "State: %s\n"
-               "Compatible Flags:%s%s\n"
-               "Incompatible Flags:%s%s%s\n"
+               "Compatible flags:%s%s\n"
+               "Incompatible flags:%s%s%s%s%s\n"
                 "Header size: %"PRIu64"\n"
                 "Arena size: %"PRIu64"\n"
-               "Data Hash Table Size: %"PRIu64"\n"
-               "Field Hash Table Size: %"PRIu64"\n"
-               "Rotate Suggested: %s\n"
-               "Head Sequential Number: %"PRIu64" (%"PRIx64")\n"
-               "Tail Sequential Number: %"PRIu64" (%"PRIx64")\n"
-               "Head Realtime Timestamp: %s (%"PRIx64")\n"
-               "Tail Realtime Timestamp: %s (%"PRIx64")\n"
-               "Tail Monotonic Timestamp: %s (%"PRIx64")\n"
+               "Data hash table size: %"PRIu64"\n"
+               "Field hash table size: %"PRIu64"\n"
+               "Rotate suggested: %s\n"
+               "Head sequential number: %"PRIu64" (%"PRIx64")\n"
+               "Tail sequential number: %"PRIu64" (%"PRIx64")\n"
+               "Head realtime timestamp: %s (%"PRIx64")\n"
+               "Tail realtime timestamp: %s (%"PRIx64")\n"
+               "Tail monotonic timestamp: %s (%"PRIx64")\n"
                 "Objects: %"PRIu64"\n"
-               "Entry Objects: %"PRIu64"\n",
+               "Entry objects: %"PRIu64"\n",
                 f->path,
                 sd_id128_to_string(f->header->file_id, a),
                 sd_id128_to_string(f->header->machine_id, b),
@@ -3094,6 +3246,8 @@ void journal_file_print_header(JournalFile *f) {
                 (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_ANY) ? " ???" : "",
                 JOURNAL_HEADER_COMPRESSED_XZ(f->header) ? " COMPRESSED-XZ" : "",
                 JOURNAL_HEADER_COMPRESSED_LZ4(f->header) ? " COMPRESSED-LZ4" : "",
+               JOURNAL_HEADER_COMPRESSED_ZSTD(f->header) ? " COMPRESSED-ZSTD" : "",
+               JOURNAL_HEADER_KEYED_HASH(f->header) ? " KEYED-HASH" : "",
                 (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_ANY) ? " ???" : "",
                 le64toh(f->header->header_size),
                 le64toh(f->header->arena_size),
@@ -3109,24 +3263,32 @@ void journal_file_print_header(JournalFile *f) {
                 le64toh(f->header->n_entries));
  
          if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
-                printf("Data Objects: %"PRIu64"\n"
-                       "Data Hash Table Fill: %.1f%%\n",
+                printf("Data objects: %"PRIu64"\n"
+                       "Data hash table fill: %.1f%%\n",
                         le64toh(f->header->n_data),
                         100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
  
          if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
-                printf("Field Objects: %"PRIu64"\n"
-                       "Field Hash Table Fill: %.1f%%\n",
+                printf("Field objects: %"PRIu64"\n"
+                       "Field hash table fill: %.1f%%\n",
                         le64toh(f->header->n_fields),
                         100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
  
          if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
-                printf("Tag Objects: %"PRIu64"\n",
+                printf("Tag objects: %"PRIu64"\n",
                         le64toh(f->header->n_tags));
          if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
-                printf("Entry Array Objects: %"PRIu64"\n",
+                printf("Entry array objects: %"PRIu64"\n",
                         le64toh(f->header->n_entry_arrays));
  
+        if (JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth))
+                printf("Deepest field hash chain: %" PRIu64"\n",
+                       f->header->field_hash_chain_depth);
+
+        if (JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth))
+                printf("Deepest data hash chain: %" PRIu64"\n",
+                       f->header->data_hash_chain_depth);
+
          if (fstat(f->fd, &st) >= 0)
                  printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (uint64_t) st.st_blocks * 512ULL));
  }
@@ -3184,7 +3346,6 @@ int journal_file_open(
          JournalFile *f;
          void *h;
          int r;
-        char bytes[FORMAT_BYTES_MAX];
  
          assert(ret);
          assert(fd >= 0 || fname);
@@ -3207,7 +3368,9 @@ int journal_file_open(
                  .prot = prot_from_flags(flags),
                  .writable = (flags & O_ACCMODE) != O_RDONLY,
  
-#if HAVE_LZ4
+#if HAVE_ZSTD
+                .compress_zstd = compress,
+#elif HAVE_LZ4
                  .compress_lz4 = compress,
  #elif HAVE_XZ
                  .compress_xz = compress,
@@ -3220,9 +3383,35 @@ int journal_file_open(
  #endif
          };
  
-        log_debug("Journal effective settings seal=%s compress=%s compress_threshold_bytes=%s",
-                  yes_no(f->seal), yes_no(JOURNAL_FILE_COMPRESS(f)),
-                  format_bytes(bytes, sizeof(bytes), f->compress_threshold_bytes));
+        /* We turn on keyed hashes by default, but provide an environment variable to turn them off, if
+         * people really want that */
+        r = getenv_bool("SYSTEMD_JOURNAL_KEYED_HASH");
+        if (r < 0) {
+                if (r != -ENXIO)
+                        log_debug_errno(r, "Failed to parse $SYSTEMD_JOURNAL_KEYED_HASH environment variable, ignoring.");
+                f->keyed_hash = true;
+        } else
+                f->keyed_hash = r;
+
+        if (DEBUG_LOGGING) {
+                static int last_seal = -1, last_compress = -1, last_keyed_hash = -1;
+                static uint64_t last_bytes = UINT64_MAX;
+                char bytes[FORMAT_BYTES_MAX];
+
+                if (last_seal != f->seal ||
+                    last_keyed_hash != f->keyed_hash ||
+                    last_compress != JOURNAL_FILE_COMPRESS(f) ||
+                    last_bytes != f->compress_threshold_bytes) {
+
+                        log_debug("Journal effective settings seal=%s keyed_hash=%s compress=%s compress_threshold_bytes=%s",
+                                  yes_no(f->seal), yes_no(f->keyed_hash), yes_no(JOURNAL_FILE_COMPRESS(f)),
+                                  format_bytes(bytes, sizeof bytes, f->compress_threshold_bytes));
+                        last_seal = f->seal;
+                        last_keyed_hash = f->keyed_hash;
+                        last_compress = JOURNAL_FILE_COMPRESS(f);
+                        last_bytes = f->compress_threshold_bytes;
+                }
+        }
  
          if (mmap_cache)
                  f->mmap = mmap_cache_ref(mmap_cache);
@@ -3644,7 +3833,11 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
                  if (le_hash != o->data.hash)
                          return -EBADMSG;
  
-                l = le64toh(o->object.size) - offsetof(Object, data.payload);
+                l = le64toh(READ_NOW(o->object.size));
+                if (l < offsetof(Object, data.payload))
+                        return -EBADMSG;
+
+                l -= offsetof(Object, data.payload);
                  t = (size_t) l;
  
                  /* We hit the limit on 32bit machines */
@@ -3652,7 +3845,7 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
                          return -E2BIG;
  
                  if (o->object.flags & OBJECT_COMPRESSION_MASK) {
-#if HAVE_XZ || HAVE_LZ4
+#if HAVE_COMPRESSION
                          size_t rsize = 0;
  
                          r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
@@ -3672,7 +3865,11 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
                  if (r < 0)
                          return r;
  
-                xor_hash ^= le64toh(u->data.hash);
+                if (JOURNAL_HEADER_KEYED_HASH(to->header))
+                        xor_hash ^= jenkins_hash64(data, l);
+                else
+                        xor_hash ^= le64toh(u->data.hash);
+
                  items[i].object_offset = htole64(h);
                  items[i].hash = u->data.hash;
  
@@ -3708,30 +3905,23 @@ void journal_reset_metrics(JournalMetrics *m) {
  void journal_default_metrics(JournalMetrics *m, int fd) {
          char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX], e[FORMAT_BYTES_MAX];
          struct statvfs ss;
-        uint64_t fs_size;
+        uint64_t fs_size = 0;
  
          assert(m);
          assert(fd >= 0);
  
          if (fstatvfs(fd, &ss) >= 0)
                  fs_size = ss.f_frsize * ss.f_blocks;
-        else {
+        else
                  log_debug_errno(errno, "Failed to determine disk size: %m");
-                fs_size = 0;
-        }
  
          if (m->max_use == (uint64_t) -1) {
  
-                if (fs_size > 0) {
-                        m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
-
-                        if (m->max_use > DEFAULT_MAX_USE_UPPER)
-                                m->max_use = DEFAULT_MAX_USE_UPPER;
-
-                        if (m->max_use < DEFAULT_MAX_USE_LOWER)
-                                m->max_use = DEFAULT_MAX_USE_LOWER;
-                } else
-                        m->max_use = DEFAULT_MAX_USE_LOWER;
+                if (fs_size > 0)
+                        m->max_use = CLAMP(PAGE_ALIGN(fs_size / 10), /* 10% of file system size */
+                                           MAX_USE_LOWER, MAX_USE_UPPER);
+                else
+                        m->max_use = MAX_USE_LOWER;
          } else {
                  m->max_use = PAGE_ALIGN(m->max_use);
  
@@ -3739,18 +3929,21 @@ void journal_default_metrics(JournalMetrics *m, int fd) {
                          m->max_use = JOURNAL_FILE_SIZE_MIN*2;
          }
  
-        if (m->min_use == (uint64_t) -1)
-                m->min_use = DEFAULT_MIN_USE;
+        if (m->min_use == (uint64_t) -1) {
+                if (fs_size > 0)
+                        m->min_use = CLAMP(PAGE_ALIGN(fs_size / 50), /* 2% of file system size */
+                                           MIN_USE_LOW, MIN_USE_HIGH);
+                else
+                        m->min_use = MIN_USE_LOW;
+        }
  
          if (m->min_use > m->max_use)
                  m->min_use = m->max_use;
  
-        if (m->max_size == (uint64_t) -1) {
-                m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
-
-                if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
-                        m->max_size = DEFAULT_MAX_SIZE_UPPER;
-        } else
+        if (m->max_size == (uint64_t) -1)
+                m->max_size = MIN(PAGE_ALIGN(m->max_use / 8), /* 8 chunks */
+                                  MAX_SIZE_UPPER);
+        else
                  m->max_size = PAGE_ALIGN(m->max_size);
  
          if (m->max_size != 0) {
@@ -3763,25 +3956,16 @@ void journal_default_metrics(JournalMetrics *m, int fd) {
  
          if (m->min_size == (uint64_t) -1)
                  m->min_size = JOURNAL_FILE_SIZE_MIN;
-        else {
-                m->min_size = PAGE_ALIGN(m->min_size);
-
-                if (m->min_size < JOURNAL_FILE_SIZE_MIN)
-                        m->min_size = JOURNAL_FILE_SIZE_MIN;
-
-                if (m->max_size != 0 && m->min_size > m->max_size)
-                        m->max_size = m->min_size;
-        }
+        else
+                m->min_size = CLAMP(PAGE_ALIGN(m->min_size),
+                                    JOURNAL_FILE_SIZE_MIN,
+                                    m->max_size ?: UINT64_MAX);
  
          if (m->keep_free == (uint64_t) -1) {
-
-                if (fs_size > 0) {
-                        m->keep_free = PAGE_ALIGN(fs_size * 3 / 20); /* 15% of file system size */
-
-                        if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
-                                m->keep_free = DEFAULT_KEEP_FREE_UPPER;
-
-                } else
+                if (fs_size > 0)
+                        m->keep_free = MIN(PAGE_ALIGN(fs_size / 20), /* 5% of file system size */
+                                           KEEP_FREE_UPPER);
+                else
                          m->keep_free = DEFAULT_KEEP_FREE;
          }
  
@@ -3872,11 +4056,9 @@ bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
                  return true;
          }
  
-        /* Let's check if the hash tables grew over a certain fill
-         * level (75%, borrowing this value from Java's hash table
-         * implementation), and if so suggest a rotation. To calculate
-         * the fill level we need the n_data field, which only exists
-         * in newer versions. */
+        /* Let's check if the hash tables grew over a certain fill level (75%, borrowing this value from
+         * Java's hash table implementation), and if so suggest a rotation. To calculate the fill level we
+         * need the n_data field, which only exists in newer versions. */
  
          if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
                  if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
@@ -3900,6 +4082,22 @@ bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
                          return true;
                  }
  
+        /* If there are too many hash collisions somebody is most likely playing games with us. Hence, if our
+         * longest chain is longer than some threshold, let's suggest rotation. */
+        if (JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth) &&
+            le64toh(f->header->data_hash_chain_depth) > HASH_CHAIN_DEPTH_MAX) {
+                log_debug("Data hash table of %s has deepest hash chain of length %" PRIu64 ", suggesting rotation.",
+                          f->path, le64toh(f->header->data_hash_chain_depth));
+                return true;
+        }
+
+        if (JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth) &&
+            le64toh(f->header->field_hash_chain_depth) > HASH_CHAIN_DEPTH_MAX) {
+                log_debug("Field hash table of %s has deepest hash chain of length at %" PRIu64 ", suggesting rotation.",
+                          f->path, le64toh(f->header->field_hash_chain_depth));
+                return true;
+        }
+
          /* Are the data objects properly indexed by field objects? */
          if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
              JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&