]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
journal: use a different hash function for each journal file
authorLennart Poettering <lennart@poettering.net>
Fri, 29 May 2020 22:00:50 +0000 (00:00 +0200)
committerLennart Poettering <lennart@poettering.net>
Thu, 25 Jun 2020 13:01:45 +0000 (15:01 +0200)
This adds a new (incompatible) feature to journal files: if enabled the
hash function used for the hash tables is no longer jenkins hash with a
zero key, but siphash keyed by the file uuid that is included in the
file header anyway. This should make our hash tables more robust against
collision attacks, as long as the attacker has no read access to the
journal files. We switch from jenkins to siphash simply because it's
more well-known and we standardize for the rest of our codebase onto it.

This is hardening in order to make collision attacks harder for clients
that can forge log messages but have no read access to the logs. It has
no effect on clients that have read access.

src/journal/journal-def.h
src/journal/journal-file.c
src/journal/journal-file.h
src/journal/journal-internal.h
src/journal/journal-verify.c
src/journal/sd-journal.c
src/journal/test-journal-stream.c

index 54260c97b02406a598ce696fde86d7d12c78c9fd..431f46bb593bbf2834984de27917d228610f19d8 100644 (file)
@@ -147,18 +147,22 @@ enum {
 enum {
         HEADER_INCOMPATIBLE_COMPRESSED_XZ   = 1 << 0,
         HEADER_INCOMPATIBLE_COMPRESSED_LZ4  = 1 << 1,
+        HEADER_INCOMPATIBLE_KEYED_HASH      = 1 << 2,
 };
 
-#define HEADER_INCOMPATIBLE_ANY (HEADER_INCOMPATIBLE_COMPRESSED_XZ|HEADER_INCOMPATIBLE_COMPRESSED_LZ4)
+#define HEADER_INCOMPATIBLE_ANY              \
+        (HEADER_INCOMPATIBLE_COMPRESSED_XZ|  \
+         HEADER_INCOMPATIBLE_COMPRESSED_LZ4| \
+         HEADER_INCOMPATIBLE_KEYED_HASH)
 
 #if HAVE_XZ && HAVE_LZ4
 #  define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_ANY
 #elif HAVE_XZ
-#  define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_COMPRESSED_XZ
+#  define HEADER_INCOMPATIBLE_SUPPORTED (HEADER_INCOMPATIBLE_COMPRESSED_XZ|HEADER_INCOMPATIBLE_KEYED_HASH)
 #elif HAVE_LZ4
-#  define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_COMPRESSED_LZ4
+#  define HEADER_INCOMPATIBLE_SUPPORTED (HEADER_INCOMPATIBLE_COMPRESSED_LZ4|HEADER_INCOMPATIBLE_KEYED_HASH)
 #else
-#  define HEADER_INCOMPATIBLE_SUPPORTED 0
+#  define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_KEYED_HASH
 #endif
 
 enum {
index c77a9436e6b7abb15ce34dd7e0479cdc6797585c..8ae966a6b2d9fac84af904a0db9522267b101387 100644 (file)
@@ -16,6 +16,7 @@
 #include "btrfs-util.h"
 #include "chattr-util.h"
 #include "compress.h"
+#include "env-util.h"
 #include "fd-util.h"
 #include "format-util.h"
 #include "fs-util.h"
@@ -419,7 +420,8 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) {
 
         h.incompatible_flags |= htole32(
                 f->compress_xz * HEADER_INCOMPATIBLE_COMPRESSED_XZ |
-                f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4);
+                f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4 |
+                f->keyed_hash * HEADER_INCOMPATIBLE_KEYED_HASH);
 
         h.compatible_flags = htole32(
                 f->seal * HEADER_COMPATIBLE_SEALED);
@@ -486,16 +488,21 @@ static bool warn_wrong_flags(const JournalFile *f, bool compatible) {
                                   f->path, type, flags & ~any);
                 flags = (flags & any) & ~supported;
                 if (flags) {
-                        const char* strv[3];
+                        const char* strv[4];
                         unsigned n = 0;
                         _cleanup_free_ char *t = NULL;
 
-                        if (compatible && (flags & HEADER_COMPATIBLE_SEALED))
-                                strv[n++] = "sealed";
-                        if (!compatible && (flags & HEADER_INCOMPATIBLE_COMPRESSED_XZ))
-                                strv[n++] = "xz-compressed";
-                        if (!compatible && (flags & HEADER_INCOMPATIBLE_COMPRESSED_LZ4))
-                                strv[n++] = "lz4-compressed";
+                        if (compatible) {
+                                if (flags & HEADER_COMPATIBLE_SEALED)
+                                        strv[n++] = "sealed";
+                        } else {
+                                if (flags & HEADER_INCOMPATIBLE_COMPRESSED_XZ)
+                                        strv[n++] = "xz-compressed";
+                                if (flags & HEADER_INCOMPATIBLE_COMPRESSED_LZ4)
+                                        strv[n++] = "lz4-compressed";
+                                if (flags & HEADER_INCOMPATIBLE_KEYED_HASH)
+                                        strv[n++] = "keyed-hash";
+                        }
                         strv[n] = NULL;
                         assert(n < ELEMENTSOF(strv));
 
@@ -595,6 +602,8 @@ static int journal_file_verify_header(JournalFile *f) {
 
         f->seal = JOURNAL_HEADER_SEALED(f->header);
 
+        f->keyed_hash = JOURNAL_HEADER_KEYED_HASH(f->header);
+
         return 0;
 }
 
@@ -1334,21 +1343,35 @@ int journal_file_find_field_object_with_hash(
         return 0;
 }
 
+uint64_t journal_file_hash_data(
+                JournalFile *f,
+                const void *data,
+                size_t sz) {
+
+        assert(f);
+        assert(data || sz == 0);
+
+        /* We try to unify our codebase on siphash, hence new-styled journal files utilizing the keyed hash
+         * function use siphash. Old journal files use the Jenkins hash. */
+
+        if (JOURNAL_HEADER_KEYED_HASH(f->header))
+                return siphash24(data, sz, f->header->file_id.bytes);
+
+        return jenkins_hash64(data, sz);
+}
+
 int journal_file_find_field_object(
                 JournalFile *f,
                 const void *field, uint64_t size,
                 Object **ret, uint64_t *ret_offset) {
 
-        uint64_t hash;
-
         assert(f);
         assert(field && size > 0);
 
-        hash = jenkins_hash64(field, size);
-
         return journal_file_find_field_object_with_hash(
                         f,
-                        field, size, hash,
+                        field, size,
+                        journal_file_hash_data(f, field, size),
                         ret, ret_offset);
 }
 
@@ -1446,16 +1469,13 @@ int journal_file_find_data_object(
                 const void *data, uint64_t size,
                 Object **ret, uint64_t *ret_offset) {
 
-        uint64_t hash;
-
         assert(f);
         assert(data || size == 0);
 
-        hash = jenkins_hash64(data, size);
-
         return journal_file_find_data_object_with_hash(
                         f,
-                        data, size, hash,
+                        data, size,
+                        journal_file_hash_data(f, data, size),
                         ret, ret_offset);
 }
 
@@ -1472,7 +1492,7 @@ static int journal_file_append_field(
         assert(f);
         assert(field && size > 0);
 
-        hash = jenkins_hash64(field, size);
+        hash = journal_file_hash_data(f, field, size);
 
         r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
         if (r < 0)
@@ -1535,7 +1555,7 @@ static int journal_file_append_data(
         assert(f);
         assert(data || size == 0);
 
-        hash = jenkins_hash64(data, size);
+        hash = journal_file_hash_data(f, data, size);
 
         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
         if (r < 0)
@@ -2028,7 +2048,20 @@ int journal_file_append_entry(
                 if (r < 0)
                         return r;
 
-                xor_hash ^= le64toh(o->data.hash);
+                /* When calculating the XOR hash field, we need to take special care if the "keyed-hash"
+                 * journal file flag is on. We use the XOR hash field to quickly determine the identity of a
+                 * specific record, and give records with otherwise identical position (i.e. match in seqno,
+                 * timestamp, …) a stable ordering. But for that we can't have it that the hash of the
+                 * objects in each file is different since they are keyed. Hence let's calculate the Jenkins
+                 * hash here for that. This also has the benefit that cursors for old and new journal files
+                 * are completely identical (they include the XOR hash after all). For classic Jenkins-hash
+                 * files things are easier, we can just take the value from the stored record directly. */
+
+                if (JOURNAL_HEADER_KEYED_HASH(f->header))
+                        xor_hash ^= jenkins_hash64(iovec[i].iov_base, iovec[i].iov_len);
+                else
+                        xor_hash ^= le64toh(o->data.hash);
+
                 items[i].object_offset = htole64(p);
                 items[i].hash = o->data.hash;
         }
@@ -3149,7 +3182,7 @@ void journal_file_print_header(JournalFile *f) {
                "Sequential number ID: %s\n"
                "State: %s\n"
                "Compatible flags:%s%s\n"
-               "Incompatible flags:%s%s%s\n"
+               "Incompatible flags:%s%s%s%s\n"
                "Header size: %"PRIu64"\n"
                "Arena size: %"PRIu64"\n"
                "Data hash table size: %"PRIu64"\n"
@@ -3174,6 +3207,7 @@ void journal_file_print_header(JournalFile *f) {
                (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_ANY) ? " ???" : "",
                JOURNAL_HEADER_COMPRESSED_XZ(f->header) ? " COMPRESSED-XZ" : "",
                JOURNAL_HEADER_COMPRESSED_LZ4(f->header) ? " COMPRESSED-LZ4" : "",
+               JOURNAL_HEADER_KEYED_HASH(f->header) ? " KEYED-HASH" : "",
                (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_ANY) ? " ???" : "",
                le64toh(f->header->header_size),
                le64toh(f->header->arena_size),
@@ -3299,19 +3333,31 @@ int journal_file_open(
 #endif
         };
 
+        /* We turn on keyed hashes by default, but provide an environment variable to turn them off, if
+         * people really want that */
+        r = getenv_bool("SYSTEMD_JOURNAL_KEYED_HASH");
+        if (r < 0) {
+                if (r != -ENXIO)
+                        log_debug_errno(r, "Failed to parse $SYSTEMD_JOURNAL_KEYED_HASH environment variable, ignoring.");
+                f->keyed_hash = true;
+        } else
+                f->keyed_hash = r;
+
         if (DEBUG_LOGGING) {
-                static int last_seal = -1, last_compress = -1;
+                static int last_seal = -1, last_compress = -1, last_keyed_hash = -1;
                 static uint64_t last_bytes = UINT64_MAX;
                 char bytes[FORMAT_BYTES_MAX];
 
                 if (last_seal != f->seal ||
+                    last_keyed_hash != f->keyed_hash ||
                     last_compress != JOURNAL_FILE_COMPRESS(f) ||
                     last_bytes != f->compress_threshold_bytes) {
 
-                        log_debug("Journal effective settings seal=%s compress=%s compress_threshold_bytes=%s",
-                                  yes_no(f->seal), yes_no(JOURNAL_FILE_COMPRESS(f)),
+                        log_debug("Journal effective settings seal=%s keyed_hash=%s compress=%s compress_threshold_bytes=%s",
+                                  yes_no(f->seal), yes_no(f->keyed_hash), yes_no(JOURNAL_FILE_COMPRESS(f)),
                                   format_bytes(bytes, sizeof bytes, f->compress_threshold_bytes));
                         last_seal = f->seal;
+                        last_keyed_hash = f->keyed_hash;
                         last_compress = JOURNAL_FILE_COMPRESS(f);
                         last_bytes = f->compress_threshold_bytes;
                 }
@@ -3769,7 +3815,11 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
                 if (r < 0)
                         return r;
 
-                xor_hash ^= le64toh(u->data.hash);
+                if (JOURNAL_HEADER_KEYED_HASH(to->header))
+                        xor_hash ^= jenkins_hash64(data, l);
+                else
+                        xor_hash ^= le64toh(u->data.hash);
+
                 items[i].object_offset = htole64(h);
                 items[i].hash = u->data.hash;
 
index 121e9153a6ba062407ed228c52c3e0fce0049acb..732c2f31cd263dc28dae7406668843bf03175244 100644 (file)
@@ -71,6 +71,7 @@ typedef struct JournalFile {
         bool defrag_on_close:1;
         bool close_fd:1;
         bool archive:1;
+        bool keyed_hash:1;
 
         direction_t last_direction;
         LocationType location_type;
@@ -195,6 +196,9 @@ static inline bool VALID_EPOCH(uint64_t u) {
 #define JOURNAL_HEADER_COMPRESSED_LZ4(h) \
         FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_COMPRESSED_LZ4)
 
+#define JOURNAL_HEADER_KEYED_HASH(h) \
+        FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_KEYED_HASH)
+
 int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset, Object **ret);
 
 uint64_t journal_file_entry_n_items(Object *o) _pure_;
@@ -262,3 +266,5 @@ static inline bool JOURNAL_FILE_COMPRESS(JournalFile *f) {
         assert(f);
         return f->compress_xz || f->compress_lz4;
 }
+
+uint64_t journal_file_hash_data(JournalFile *f, const void *data, size_t sz);
index 028f0d9055d470480e075c7087506302bb2f16c0..a649acf634e74d4409da33af209cce716f5a7eaa 100644 (file)
@@ -32,7 +32,7 @@ struct Match {
         /* For concrete matches */
         char *data;
         size_t size;
-        uint64_t hash;
+        uint64_t hash; /* old-style jenkins hash. New-style siphash is different per file, hence won't be cached here */
 
         /* For terms */
         LIST_HEAD(Match, matches);
index c70ab7aa24459ea2550feb79edf9ea29122401cf..fe9997bc14518c430dc2e691a8eb160da2d3ea50 100644 (file)
@@ -163,9 +163,9 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o
                                 return r;
                         }
 
-                        h2 = jenkins_hash64(b, b_size);
+                        h2 = journal_file_hash_data(f, b, b_size);
                 } else
-                        h2 = jenkins_hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
+                        h2 = journal_file_hash_data(f, o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
 
                 if (h1 != h2) {
                         error(offset, "Invalid hash (%08"PRIx64" vs. %08"PRIx64, h1, h2);
index 5ddca5f93a8b8e370bfe52c96c3213e2626a5ecb..515bb82621f02d9585c737978bb68e0b9cec31de 100644 (file)
@@ -279,6 +279,8 @@ _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size)
         assert(j->level1->type == MATCH_OR_TERM);
         assert(j->level2->type == MATCH_AND_TERM);
 
+        /* Old-style Jenkins (unkeyed) hashing only here. We do not cover new-style siphash (keyed) hashing
+         * here, since it's different for each file, and thus can't be pre-calculated in the Match object. */
         hash = jenkins_hash64(data, size);
 
         LIST_FOREACH(matches, l3, j->level2->matches) {
@@ -501,9 +503,16 @@ static int next_for_match(
         assert(f);
 
         if (m->type == MATCH_DISCRETE) {
-                uint64_t dp;
+                uint64_t dp, hash;
 
-                r = journal_file_find_data_object_with_hash(f, m->data, m->size, m->hash, NULL, &dp);
+                /* If the keyed hash logic is used, we need to calculate the hash fresh per file. Otherwise
+                 * we can use what we pre-calculated. */
+                if (JOURNAL_HEADER_KEYED_HASH(f->header))
+                        hash = journal_file_hash_data(f, m->data, m->size);
+                else
+                        hash = m->hash;
+
+                r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, NULL, &dp);
                 if (r <= 0)
                         return r;
 
@@ -590,9 +599,14 @@ static int find_location_for_match(
         assert(f);
 
         if (m->type == MATCH_DISCRETE) {
-                uint64_t dp;
+                uint64_t dp, hash;
+
+                if (JOURNAL_HEADER_KEYED_HASH(f->header))
+                        hash = journal_file_hash_data(f, m->data, m->size);
+                else
+                        hash = m->hash;
 
-                r = journal_file_find_data_object_with_hash(f, m->data, m->size, m->hash, NULL, &dp);
+                r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, NULL, &dp);
                 if (r <= 0)
                         return r;
 
index 6d97bc5ce8aa6f656c434efdc903d8a4f7bf14ff..50aab11c6a8b61182f4a3914477e7b7909aa79bb 100644 (file)
@@ -58,7 +58,7 @@ static void verify_contents(sd_journal *j, unsigned skip) {
                 assert_se(i == N_ENTRIES);
 }
 
-int main(int argc, char *argv[]) {
+static void run_test(void) {
         JournalFile *one, *two, *three;
         char t[] = "/var/tmp/journal-stream-XXXXXX";
         unsigned i;
@@ -68,12 +68,6 @@ int main(int argc, char *argv[]) {
         size_t l;
         dual_timestamp previous_ts = DUAL_TIMESTAMP_NULL;
 
-        /* journal_file_open requires a valid machine id */
-        if (access("/etc/machine-id", F_OK) != 0)
-                return log_tests_skipped("/etc/machine-id not found");
-
-        test_setup_logging(LOG_DEBUG);
-
         assert_se(mkdtemp(t));
         assert_se(chdir(t) >= 0);
         (void) chattr_path(t, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
@@ -177,6 +171,22 @@ int main(int argc, char *argv[]) {
                 printf("%.*s\n", (int) l, (const char*) data);
 
         assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+}
+
+int main(int argc, char *argv[]) {
+
+        /* journal_file_open requires a valid machine id */
+        if (access("/etc/machine-id", F_OK) != 0)
+                return log_tests_skipped("/etc/machine-id not found");
+
+        test_setup_logging(LOG_DEBUG);
+
+        /* Run this test twice. Once with old hashing and once with new hashing */
+        assert_se(setenv("SYSTEMD_JOURNAL_KEYED_HASH", "1", 1) >= 0);
+        run_test();
+
+        assert_se(setenv("SYSTEMD_JOURNAL_KEYED_HASH", "0", 1) >= 0);
+        run_test();
 
         return 0;
 }