]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
journal: Store offsets to tail entry array objects in chain
authorDaan De Meyer <daan.j.demeyer@gmail.com>
Fri, 21 Jan 2022 18:29:41 +0000 (18:29 +0000)
committerDaan De Meyer <daan.j.demeyer@gmail.com>
Fri, 7 Oct 2022 10:28:09 +0000 (12:28 +0200)
Previously, we'd iterate an entry array from start to end every time
we added an entry offset to it. To speed up this operation, we cache
the last entry array in the chain and how many items it contains.
This allows the addition of an entry to the chain to be done in
constant time instead of linear time as we don't have to iterate
the entire chain anymore every time we add an entry.

docs/JOURNAL_FILE_FORMAT.md
src/libsystemd/sd-journal/journal-authenticate.c
src/libsystemd/sd-journal/journal-def.h
src/libsystemd/sd-journal/journal-file.c
src/libsystemd/sd-journal/journal-file.h
src/libsystemd/sd-journal/journal-verify.c

index 5f7f97c1b80ad0339ab895a840c0d94ccf9b5dc9..2d0debd858c0120a5ecfc688e512e81a286dd01e 100644 (file)
@@ -177,6 +177,9 @@ _packed_ struct Header {
         /* Added in 246 */
         le64_t data_hash_chain_depth;
         le64_t field_hash_chain_depth;
+        /* Added in 252 */
+        le32_t tail_entry_array_offset;                 \
+        le32_t tail_entry_array_n_entries;              \
 };
 ```
 
@@ -231,6 +234,8 @@ became too frequent.
 Similar, **field_hash_chain_depth** is a counter of the deepest chain in the
 field hash table, minus one.
 
+**tail_entry_array_offset** and **tail_entry_array_n_entries** allow immediate
+access to the last entry array in the global entry array chain.
 
 ## Extensibility
 
@@ -397,7 +402,16 @@ _packed_ struct DataObject {
         le64_t entry_offset; /* the first array entry we store inline */
         le64_t entry_array_offset;
         le64_t n_entries;
-        uint8_t payload[];
+        union {                                                         \
+                struct {                                                \
+                        uint8_t payload[] ;                             \
+                } regular;                                              \
+                struct {                                                \
+                        le32_t tail_entry_array_offset;                 \
+                        le32_t tail_entry_array_n_entries;              \
+                        uint8_t payload[];                              \
+                } compact;                                              \
+        };                                                              \
 };
 ```
 
@@ -430,6 +444,9 @@ OBJECT_COMPRESSED_XZ/OBJECT_COMPRESSED_LZ4/OBJECT_COMPRESSED_ZSTD is set in the
 `ObjectHeader`, in which case the payload is compressed with the indicated
 compression algorithm.
 
+If the `HEADER_INCOMPATIBLE_COMPACT` flag is set, Two extra fields are stored to
+allow immediate access to the tail entry array in the DATA object's entry array
+chain.
 
 ## Field Objects
 
index 3965f3f589c310f31ed977d556481119825de2b5..1cb894338979365d104d0b0775fc1bfa79760eb3 100644 (file)
@@ -248,7 +248,7 @@ int journal_file_hmac_put_object(JournalFile *f, ObjectType type, Object *o, uin
         case OBJECT_DATA:
                 /* All but hash and payload are mutable */
                 gcry_md_write(f->hmac, &o->data.hash, sizeof(o->data.hash));
-                gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
+                gcry_md_write(f->hmac, journal_file_data_payload_field(f, o), le64toh(o->object.size) - journal_file_data_payload_offset(f));
                 break;
 
         case OBJECT_FIELD:
index f04a2298c43aa0765cbf0c1df43a508e622877d8..8f994b01787252ea857aac973698ac2ba5dc0c45 100644 (file)
@@ -65,8 +65,17 @@ struct ObjectHeader {
         le64_t entry_offset; /* the first array entry we store inline */ \
         le64_t entry_array_offset;                                      \
         le64_t n_entries;                                               \
-        uint8_t payload[];                                              \
-        }
+        union {                                                         \
+                struct {                                                \
+                        uint8_t payload[0];                             \
+                } regular;                                              \
+                struct {                                                \
+                        le32_t tail_entry_array_offset;                 \
+                        le32_t tail_entry_array_n_entries;              \
+                        uint8_t payload[0];                             \
+                } compact;                                              \
+        };                                                              \
+}
 
 struct DataObject DataObject__contents;
 struct DataObject__packed DataObject__contents _packed_;
@@ -222,12 +231,15 @@ enum {
         /* Added in 246 */                              \
         le64_t data_hash_chain_depth;                   \
         le64_t field_hash_chain_depth;                  \
+        /* Added in 252 */                              \
+        le32_t tail_entry_array_offset;                 \
+        le32_t tail_entry_array_n_entries;              \
         }
 
 struct Header struct_Header__contents;
 struct Header__packed struct_Header__contents _packed_;
 assert_cc(sizeof(struct Header) == sizeof(struct Header__packed));
-assert_cc(sizeof(struct Header) == 256);
+assert_cc(sizeof(struct Header) == 264);
 
 #define FSS_HEADER_SIGNATURE                                            \
         ((const char[]) { 'K', 'S', 'H', 'H', 'R', 'H', 'L', 'P' })
index 67bd2305adb0b5bc10d65c9cdf31511ce0a72fba..7dbbd4889c4d03f8cf9caa29bbebd17e29ec291d 100644 (file)
@@ -662,7 +662,7 @@ static int journal_file_move_to(
         return mmap_cache_fd_get(f->cache_fd, type_to_context(type), keep_always, offset, size, &f->last_stat, ret);
 }
 
-static uint64_t minimum_header_size(Object *o) {
+static uint64_t minimum_header_size(JournalFile *f, Object *o) {
 
         static const uint64_t table[] = {
                 [OBJECT_DATA]             = sizeof(DataObject),
@@ -674,15 +674,22 @@ static uint64_t minimum_header_size(Object *o) {
                 [OBJECT_TAG]              = sizeof(TagObject),
         };
 
+        assert(f);
+        assert(o);
+
+        if (o->object.type == OBJECT_DATA)
+                return journal_file_data_payload_offset(f);
+
         if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
                 return sizeof(ObjectHeader);
 
         return table[o->object.type];
 }
 
-static int check_object_header(Object *o, ObjectType type, uint64_t offset) {
+static int check_object_header(JournalFile *f, Object *o, ObjectType type, uint64_t offset) {
         uint64_t s;
 
+        assert(f);
         assert(o);
 
         s = le64toh(READ_NOW(o->object.size));
@@ -706,7 +713,7 @@ static int check_object_header(Object *o, ObjectType type, uint64_t offset) {
                                        "Attempt to move to object of unexpected type: %" PRIu64,
                                        offset);
 
-        if (s < minimum_header_size(o))
+        if (s < minimum_header_size(f, o))
                 return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
                                        "Attempt to move to truncated object: %" PRIu64,
                                        offset);
@@ -728,10 +735,10 @@ static int check_object(JournalFile *f, Object *o, uint64_t offset) {
                                                le64toh(o->data.n_entries),
                                                offset);
 
-                if (le64toh(o->object.size) <= offsetof(Object, data.payload))
+                if (le64toh(o->object.size) <= journal_file_data_payload_offset(f))
                         return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
                                                "Bad object size (<= %zu): %" PRIu64 ": %" PRIu64,
-                                               offsetof(Object, data.payload),
+                                               journal_file_data_payload_offset(f),
                                                le64toh(o->object.size),
                                                offset);
 
@@ -883,7 +890,7 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset
         if (r < 0)
                 return r;
 
-        r = check_object_header(o, type, offset);
+        r = check_object_header(f, o, type, offset);
         if (r < 0)
                 return r;
 
@@ -891,7 +898,7 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset
         if (r < 0)
                 return r;
 
-        r = check_object_header(o, type, offset);
+        r = check_object_header(f, o, type, offset);
         if (r < 0)
                 return r;
 
@@ -935,11 +942,11 @@ int journal_file_read_object_header(JournalFile *f, ObjectType type, uint64_t of
                                        "Failed to read short object at offset: %" PRIu64,
                                        offset);
 
-        r = check_object_header(&o, type, offset);
+        r = check_object_header(f, &o, type, offset);
         if (r < 0)
                 return r;
 
-        if ((size_t) n < minimum_header_size(&o))
+        if ((size_t) n < minimum_header_size(f, &o))
                 return log_debug_errno(SYNTHETIC_ERRNO(EIO),
                                        "Short read while reading object: %" PRIu64,
                                        offset);
@@ -1541,15 +1548,35 @@ static int journal_file_append_field(
         return 0;
 }
 
+static Compression maybe_compress_payload(JournalFile *f, uint8_t *dst, const uint8_t *src, uint64_t size, size_t *rsize) {
+        Compression compression = COMPRESSION_NONE;
+
+#if HAVE_COMPRESSION
+        if (JOURNAL_FILE_COMPRESS(f) && size >= f->compress_threshold_bytes) {
+                compression = compress_blob(src, size, dst, size - 1, rsize);
+                if (compression > 0) {
+                        log_debug("Compressed data object %"PRIu64" -> %zu using %s",
+                                  size, *rsize, compression_to_string(compression));
+                } else
+                        /* Compression didn't work, we don't really care why, let's continue without compression */
+                        compression = COMPRESSION_NONE;
+        }
+#endif
+
+        return compression;
+}
+
 static int journal_file_append_data(
                 JournalFile *f,
                 const void *data, uint64_t size,
                 Object **ret, uint64_t *ret_offset) {
 
-        uint64_t hash, p, fp, osize;
+        uint64_t hash, p, osize;
         Object *o, *fo;
-        int r, compression = 0;
+        size_t rsize = 0;
+        Compression c;
         const void *eq;
+        int r;
 
         assert(f);
 
@@ -1568,32 +1595,20 @@ static int journal_file_append_data(
         if (!eq)
                 return -EINVAL;
 
-        osize = offsetof(Object, data.payload) + size;
+        osize = journal_file_data_payload_offset(f) + size;
         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
         if (r < 0)
                 return r;
 
         o->data.hash = htole64(hash);
 
-#if HAVE_COMPRESSION
-        if (JOURNAL_FILE_COMPRESS(f) && size >= f->compress_threshold_bytes) {
-                size_t rsize = 0;
-
-                compression = compress_blob(data, size, o->data.payload, size - 1, &rsize);
-                if (compression > COMPRESSION_NONE) {
-                        o->object.size = htole64(offsetof(Object, data.payload) + rsize);
-                        o->object.flags |= COMPRESSION_TO_OBJECT_FLAG(compression);
-
-                        log_debug("Compressed data object %"PRIu64" -> %zu using %s",
-                                  size, rsize, compression_to_string(compression));
-                } else
-                        /* Compression didn't work, we don't really care why, let's continue without compression */
-                        compression = COMPRESSION_NONE;
-        }
-#endif
+        c = maybe_compress_payload(f, journal_file_data_payload_field(f, o), data, size, &rsize);
 
-        if (compression == 0)
-                memcpy_safe(o->data.payload, data, size);
+        if (c != COMPRESSION_NONE) {
+                o->object.size = htole64(journal_file_data_payload_offset(f) + rsize);
+                o->object.flags |= COMPRESSION_TO_OBJECT_FLAG(c);
+        } else
+                memcpy_safe(journal_file_data_payload_field(f, o), data, size);
 
         r = journal_file_link_data(f, o, p, hash);
         if (r < 0)
@@ -1611,7 +1626,7 @@ static int journal_file_append_data(
 #endif
 
         /* Create field object ... */
-        r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp);
+        r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, NULL);
         if (r < 0)
                 return r;
 
@@ -1715,17 +1730,17 @@ int journal_file_data_payload(
         }
 
         size = le64toh(READ_NOW(o->object.size));
-        if (size < offsetof(Object, data.payload))
+        if (size < journal_file_data_payload_offset(f))
                 return -EBADMSG;
 
-        size -= offsetof(Object, data.payload);
+        size -= journal_file_data_payload_offset(f);
 
         c = COMPRESSION_FROM_OBJECT(o);
         if (c < 0)
                 return -EPROTONOSUPPORT;
 
-        return maybe_decompress_payload(f, o->data.payload, size, c, field, field_length, data_threshold,
-                                        ret_data, ret_size);
+        return maybe_decompress_payload(f, journal_file_data_payload_field(f, o), size, c, field,
+                                        field_length, data_threshold, ret_data, ret_size);
 }
 
 uint64_t journal_file_entry_n_items(JournalFile *f, Object *o) {
@@ -1788,6 +1803,8 @@ static void write_entry_array_item(JournalFile *f, Object *o, uint64_t i, uint64
 static int link_entry_into_array(JournalFile *f,
                                  le64_t *first,
                                  le64_t *idx,
+                                 le32_t *tail,
+                                 le32_t *tidx,
                                  uint64_t p) {
         int r;
         uint64_t n = 0, ap = 0, q, i, a, hidx;
@@ -1799,8 +1816,9 @@ static int link_entry_into_array(JournalFile *f,
         assert(idx);
         assert(p > 0);
 
-        a = le64toh(*first);
-        i = hidx = le64toh(READ_NOW(*idx));
+        a = tail ? le32toh(*tail) : le64toh(*first);
+        hidx = le64toh(READ_NOW(*idx));
+        i = tidx ? le32toh(READ_NOW(*tidx)) : hidx;
         while (a > 0) {
 
                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
@@ -1811,6 +1829,8 @@ static int link_entry_into_array(JournalFile *f,
                 if (i < n) {
                         write_entry_array_item(f, o, i, p);
                         *idx = htole64(hidx + 1);
+                        if (tidx)
+                                *tidx = htole32(le32toh(*tidx) + 1);
                         return 0;
                 }
 
@@ -1851,10 +1871,15 @@ static int link_entry_into_array(JournalFile *f,
                 o->entry_array.next_entry_array_offset = htole64(q);
         }
 
+        if (tail)
+                *tail = htole32(q);
+
         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
                 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
 
         *idx = htole64(hidx + 1);
+        if (tidx)
+                *tidx = htole32(1);
 
         return 0;
 }
@@ -1863,6 +1888,8 @@ static int link_entry_into_array_plus_one(JournalFile *f,
                                           le64_t *extra,
                                           le64_t *first,
                                           le64_t *idx,
+                                          le32_t *tail,
+                                          le32_t *tidx,
                                           uint64_t p) {
 
         uint64_t hidx;
@@ -1883,7 +1910,7 @@ static int link_entry_into_array_plus_one(JournalFile *f,
                 le64_t i;
 
                 i = htole64(hidx - 1);
-                r = link_entry_into_array(f, first, &i, p);
+                r = link_entry_into_array(f, first, &i, tail, tidx, p);
                 if (r < 0)
                         return r;
         }
@@ -1907,6 +1934,8 @@ static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offs
                                               &o->data.entry_offset,
                                               &o->data.entry_array_offset,
                                               &o->data.n_entries,
+                                              JOURNAL_HEADER_COMPACT(f->header) ? &o->data.compact.tail_entry_array_offset : NULL,
+                                              JOURNAL_HEADER_COMPACT(f->header) ? &o->data.compact.tail_entry_array_n_entries : NULL,
                                               offset);
 }
 
@@ -1933,6 +1962,8 @@ static int journal_file_link_entry(
         r = link_entry_into_array(f,
                                   &f->header->entry_array_offset,
                                   &f->header->n_entries,
+                                  JOURNAL_HEADER_CONTAINS(f->header, tail_entry_array_offset) ? &f->header->tail_entry_array_offset : NULL,
+                                  JOURNAL_HEADER_CONTAINS(f->header, tail_entry_array_n_entries) ? &f->header->tail_entry_array_n_entries : NULL,
                                   offset);
         if (r < 0)
                 return r;
index 79769537937c7753db01419bbaee655ffba0aefe..e5b9765471e61d1814c416cbc1f6e679ee3acd79 100644 (file)
@@ -223,6 +223,16 @@ int journal_file_data_payload(
                 void **ret_data,
                 size_t *ret_size);
 
+static inline size_t journal_file_data_payload_offset(JournalFile *f) {
+        return JOURNAL_HEADER_COMPACT(f->header)
+                        ? offsetof(Object, data.compact.payload)
+                        : offsetof(Object, data.regular.payload);
+}
+
+static inline uint8_t* journal_file_data_payload_field(JournalFile *f, Object *o) {
+        return JOURNAL_HEADER_COMPACT(f->header) ? o->data.compact.payload : o->data.regular.payload;
+}
+
 uint64_t journal_file_entry_array_n_items(JournalFile *f, Object *o) _pure_;
 
 static inline uint64_t journal_file_entry_array_item(JournalFile *f, Object *o, size_t i) {
index 37d2a656b2123fb597160c25fd1d9bd7849a9fd5..8b2c468a0b7178f0fd636d48daf80f224fcb12ca 100644 (file)
@@ -170,16 +170,16 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o
                         return -EBADMSG;
                 }
 
-                if (le64toh(o->object.size) - offsetof(Object, data.payload) <= 0) {
+                if (le64toh(o->object.size) - journal_file_data_payload_offset(f) <= 0) {
                         error(offset, "Bad object size (<= %zu): %"PRIu64,
-                              offsetof(Object, data.payload),
+                              journal_file_data_payload_offset(f),
                               le64toh(o->object.size));
                         return -EBADMSG;
                 }
 
                 h1 = le64toh(o->data.hash);
-                r = hash_payload(f, o, offset, o->data.payload,
-                                 le64toh(o->object.size) - offsetof(Object, data.payload),
+                r = hash_payload(f, o, offset, journal_file_data_payload_field(f, o),
+                                 le64toh(o->object.size) - journal_file_data_payload_offset(f),
                                  &h2);
                 if (r < 0)
                         return r;