#include "btrfs-util.h"
#include "chattr-util.h"
#include "compress.h"
+#include "env-util.h"
#include "fd-util.h"
+#include "format-util.h"
#include "fs-util.h"
#include "journal-authenticate.h"
#include "journal-def.h"
#include "journal-file.h"
#include "lookup3.h"
#include "memory-util.h"
-#include "parse-util.h"
#include "path-util.h"
#include "random-util.h"
#include "set.h"
#define MIN_COMPRESS_THRESHOLD (8ULL)
/* This is the minimum journal file size */
-#define JOURNAL_FILE_SIZE_MIN (512ULL*1024ULL) /* 512 KiB */
+#define JOURNAL_FILE_SIZE_MIN (512 * 1024ULL) /* 512 KiB */
/* These are the lower and upper bounds if we deduce the max_use value
* from the file system size */
-#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
-#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
+#define MAX_USE_LOWER (1 * 1024 * 1024ULL) /* 1 MiB */
+#define MAX_USE_UPPER (4 * 1024 * 1024 * 1024ULL) /* 4 GiB */
-/* This is the default minimal use limit, how much we'll use even if keep_free suggests otherwise. */
-#define DEFAULT_MIN_USE (1ULL*1024ULL*1024ULL) /* 1 MiB */
+/* Those are the lower and upper bounds for the minimal use limit,
+ * i.e. how much we'll use even if keep_free suggests otherwise. */
+#define MIN_USE_LOW (1 * 1024 * 1024ULL) /* 1 MiB */
+#define MIN_USE_HIGH (16 * 1024 * 1024ULL) /* 16 MiB */
/* This is the upper bound if we deduce max_size from max_use */
-#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
+#define MAX_SIZE_UPPER (128 * 1024 * 1024ULL) /* 128 MiB */
/* This is the upper bound if we deduce the keep_free value from the
* file system size */
-#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
+#define KEEP_FREE_UPPER (4 * 1024 * 1024 * 1024ULL) /* 4 GiB */
/* This is the keep_free value when we can't determine the system
* size */
-#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
+#define DEFAULT_KEEP_FREE (1024 * 1024ULL) /* 1 MB */
/* This is the default maximum number of journal files to keep around. */
-#define DEFAULT_N_MAX_FILES (100)
+#define DEFAULT_N_MAX_FILES 100
/* n_data was the first entry we added after the initial file format design */
#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
#define CHAIN_CACHE_MAX 20
/* How much to increase the journal file size at once each time we allocate something new. */
-#define FILE_SIZE_INCREASE (8ULL*1024ULL*1024ULL) /* 8MB */
+#define FILE_SIZE_INCREASE (8 * 1024 * 1024ULL) /* 8MB */
/* Reread fstat() of the file for detecting deletions at least this often */
#define LAST_STAT_REFRESH_USEC (5*USEC_PER_SEC)
/* The mmap context to use for the header we pick as one above the last defined typed */
#define CONTEXT_HEADER _OBJECT_TYPE_MAX
+/* Longest hash chain to rotate after */
+#define HASH_CHAIN_DEPTH_MAX 100
+
#ifdef __clang__
# pragma GCC diagnostic ignored "-Waddress-of-packed-member"
#endif
}
JournalFile* journal_file_close(JournalFile *f) {
- assert(f);
+ if (!f)
+ return NULL;
#if HAVE_GCRYPT
/* Write the final tag */
ordered_hashmap_free_free(f->chain_cache);
-#if HAVE_XZ || HAVE_LZ4
+#if HAVE_COMPRESSION
free(f->compress_buffer);
#endif
h.incompatible_flags |= htole32(
f->compress_xz * HEADER_INCOMPATIBLE_COMPRESSED_XZ |
- f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4);
+ f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4 |
+ f->compress_zstd * HEADER_INCOMPATIBLE_COMPRESSED_ZSTD |
+ f->keyed_hash * HEADER_INCOMPATIBLE_KEYED_HASH);
h.compatible_flags = htole32(
f->seal * HEADER_COMPATIBLE_SEALED);
}
static int journal_file_refresh_header(JournalFile *f) {
- sd_id128_t boot_id;
int r;
assert(f);
else if (r < 0)
return r;
- r = sd_id128_get_boot(&boot_id);
+ r = sd_id128_get_boot(&f->header->boot_id);
if (r < 0)
return r;
- f->header->boot_id = boot_id;
-
r = journal_file_set_online(f);
/* Sync the online state to disk */
f->path, type, flags & ~any);
flags = (flags & any) & ~supported;
if (flags) {
- const char* strv[3];
+ const char* strv[5];
unsigned n = 0;
_cleanup_free_ char *t = NULL;
- if (compatible && (flags & HEADER_COMPATIBLE_SEALED))
- strv[n++] = "sealed";
- if (!compatible && (flags & HEADER_INCOMPATIBLE_COMPRESSED_XZ))
- strv[n++] = "xz-compressed";
- if (!compatible && (flags & HEADER_INCOMPATIBLE_COMPRESSED_LZ4))
- strv[n++] = "lz4-compressed";
+ if (compatible) {
+ if (flags & HEADER_COMPATIBLE_SEALED)
+ strv[n++] = "sealed";
+ } else {
+ if (flags & HEADER_INCOMPATIBLE_COMPRESSED_XZ)
+ strv[n++] = "xz-compressed";
+ if (flags & HEADER_INCOMPATIBLE_COMPRESSED_LZ4)
+ strv[n++] = "lz4-compressed";
+ if (flags & HEADER_INCOMPATIBLE_COMPRESSED_ZSTD)
+ strv[n++] = "zstd-compressed";
+ if (flags & HEADER_INCOMPATIBLE_KEYED_HASH)
+ strv[n++] = "keyed-hash";
+ }
strv[n] = NULL;
assert(n < ELEMENTSOF(strv));
if (f->header->state >= _STATE_MAX)
return -EBADMSG;
- header_size = le64toh(f->header->header_size);
+ header_size = le64toh(READ_NOW(f->header->header_size));
/* The first addition was n_data, so check that we are at least this large */
if (header_size < HEADER_SIZE_MIN)
if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
return -EBADMSG;
- arena_size = le64toh(f->header->arena_size);
+ arena_size = le64toh(READ_NOW(f->header->arena_size));
if (UINT64_MAX - header_size < arena_size || header_size + arena_size > (uint64_t) f->last_stat.st_size)
return -ENODATA;
f->compress_xz = JOURNAL_HEADER_COMPRESSED_XZ(f->header);
f->compress_lz4 = JOURNAL_HEADER_COMPRESSED_LZ4(f->header);
+ f->compress_zstd = JOURNAL_HEADER_COMPRESSED_ZSTD(f->header);
f->seal = JOURNAL_HEADER_SEALED(f->header);
+ f->keyed_hash = JOURNAL_HEADER_KEYED_HASH(f->header);
+
return 0;
}
-static int journal_file_fstat(JournalFile *f) {
+int journal_file_fstat(JournalFile *f) {
int r;
assert(f);
f->last_stat_usec = now(CLOCK_MONOTONIC);
- /* Refuse dealing with with files that aren't regular */
+ /* Refuse dealing with files that aren't regular */
r = stat_verify_regular(&f->last_stat);
if (r < 0)
return r;
}
static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
- uint64_t old_size, new_size;
+ uint64_t old_size, new_size, old_header_size, old_arena_size;
int r;
assert(f);
assert(f->header);
- /* We assume that this file is not sparse, and we know that
- * for sure, since we always call posix_fallocate()
- * ourselves */
+ /* We assume that this file is not sparse, and we know that for sure, since we always call
+ * posix_fallocate() ourselves */
+
+ if (size > PAGE_ALIGN_DOWN(UINT64_MAX) - offset)
+ return -EINVAL;
if (mmap_cache_got_sigbus(f->mmap, f->cache_fd))
return -EIO;
- old_size =
- le64toh(f->header->header_size) +
- le64toh(f->header->arena_size);
+ old_header_size = le64toh(READ_NOW(f->header->header_size));
+ old_arena_size = le64toh(READ_NOW(f->header->arena_size));
+ if (old_arena_size > PAGE_ALIGN_DOWN(UINT64_MAX) - old_header_size)
+ return -EBADMSG;
- new_size = PAGE_ALIGN(offset + size);
- if (new_size < le64toh(f->header->header_size))
- new_size = le64toh(f->header->header_size);
+ old_size = old_header_size + old_arena_size;
+
+ new_size = MAX(PAGE_ALIGN(offset + size), old_header_size);
if (new_size <= old_size) {
if (r != 0)
return -r;
- f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
+ f->header->arena_size = htole64(new_size - old_header_size);
return journal_file_fstat(f);
}
return type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX ? type : 0;
}
-static int journal_file_move_to(JournalFile *f, ObjectType type, bool keep_always, uint64_t offset, uint64_t size, void **ret, size_t *ret_size) {
+static int journal_file_move_to(
+ JournalFile *f,
+ ObjectType type,
+ bool keep_always,
+ uint64_t offset,
+ uint64_t size,
+ void **ret,
+ size_t *ret_size) {
+
int r;
assert(f);
if (size <= 0)
return -EINVAL;
+ if (size > UINT64_MAX - offset)
+ return -EBADMSG;
+
/* Avoid SIGBUS on invalid accesses */
if (offset + size > (uint64_t) f->last_stat.st_size) {
/* Hmm, out of range? Let's refresh the fstat() data
switch (o->object.type) {
- case OBJECT_DATA: {
+ case OBJECT_DATA:
if ((le64toh(o->data.entry_offset) == 0) ^ (le64toh(o->data.n_entries) == 0))
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
"Bad n_entries: %" PRIu64 ": %" PRIu64,
le64toh(o->data.n_entries),
offset);
- if (le64toh(o->object.size) - offsetof(DataObject, payload) <= 0)
+ if (le64toh(o->object.size) <= offsetof(DataObject, payload))
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
"Bad object size (<= %zu): %" PRIu64 ": %" PRIu64,
offsetof(DataObject, payload),
offset);
break;
- }
case OBJECT_FIELD:
- if (le64toh(o->object.size) - offsetof(FieldObject, payload) <= 0)
+ if (le64toh(o->object.size) <= offsetof(FieldObject, payload))
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
"Bad field size (<= %zu): %" PRIu64 ": %" PRIu64,
offsetof(FieldObject, payload),
offset);
break;
- case OBJECT_ENTRY:
- if ((le64toh(o->object.size) - offsetof(EntryObject, items)) % sizeof(EntryItem) != 0)
+ case OBJECT_ENTRY: {
+ uint64_t sz;
+
+ sz = le64toh(READ_NOW(o->object.size));
+ if (sz < offsetof(EntryObject, items) ||
+ (sz - offsetof(EntryObject, items)) % sizeof(EntryItem) != 0)
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
"Bad entry size (<= %zu): %" PRIu64 ": %" PRIu64,
offsetof(EntryObject, items),
- le64toh(o->object.size),
+ sz,
offset);
- if ((le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem) <= 0)
+ if ((sz - offsetof(EntryObject, items)) / sizeof(EntryItem) <= 0)
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
"Invalid number items in entry: %" PRIu64 ": %" PRIu64,
- (le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem),
+ (sz - offsetof(EntryObject, items)) / sizeof(EntryItem),
offset);
if (le64toh(o->entry.seqnum) <= 0)
offset);
break;
+ }
case OBJECT_DATA_HASH_TABLE:
- case OBJECT_FIELD_HASH_TABLE:
- if ((le64toh(o->object.size) - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0 ||
- (le64toh(o->object.size) - offsetof(HashTableObject, items)) / sizeof(HashItem) <= 0)
+ case OBJECT_FIELD_HASH_TABLE: {
+ uint64_t sz;
+
+ sz = le64toh(READ_NOW(o->object.size));
+ if (sz < offsetof(HashTableObject, items) ||
+ (sz - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0 ||
+ (sz - offsetof(HashTableObject, items)) / sizeof(HashItem) <= 0)
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
"Invalid %s hash table size: %" PRIu64 ": %" PRIu64,
o->object.type == OBJECT_DATA_HASH_TABLE ? "data" : "field",
- le64toh(o->object.size),
+ sz,
offset);
break;
+ }
+
+ case OBJECT_ENTRY_ARRAY: {
+ uint64_t sz;
- case OBJECT_ENTRY_ARRAY:
- if ((le64toh(o->object.size) - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0 ||
- (le64toh(o->object.size) - offsetof(EntryArrayObject, items)) / sizeof(le64_t) <= 0)
+ sz = le64toh(READ_NOW(o->object.size));
+ if (sz < offsetof(EntryArrayObject, items) ||
+ (sz - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0 ||
+ (sz - offsetof(EntryArrayObject, items)) / sizeof(le64_t) <= 0)
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
"Invalid object entry array size: %" PRIu64 ": %" PRIu64,
- le64toh(o->object.size),
+ sz,
offset);
if (!VALID64(le64toh(o->entry_array.next_entry_array_offset)))
offset);
break;
+ }
case OBJECT_TAG:
if (le64toh(o->object.size) != sizeof(TagObject))
return r;
o = (Object*) t;
- s = le64toh(o->object.size);
+ s = le64toh(READ_NOW(o->object.size));
if (s == 0)
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
return r;
}
-int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, Object **ret, uint64_t *offset) {
+int journal_file_append_object(
+ JournalFile *f,
+ ObjectType type,
+ uint64_t size,
+ Object **ret,
+ uint64_t *ret_offset) {
+
int r;
uint64_t p;
Object *tail, *o;
assert(f->header);
assert(type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX);
assert(size >= sizeof(ObjectHeader));
- assert(offset);
- assert(ret);
r = journal_file_set_online(f);
if (r < 0)
if (p == 0)
p = le64toh(f->header->header_size);
else {
+ uint64_t sz;
+
r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &tail);
if (r < 0)
return r;
- p += ALIGN64(le64toh(tail->object.size));
+ sz = le64toh(READ_NOW(tail->object.size));
+ if (sz > UINT64_MAX - sizeof(uint64_t) + 1)
+ return -EBADMSG;
+
+ sz = ALIGN64(sz);
+ if (p > UINT64_MAX - sz)
+ return -EBADMSG;
+
+ p += sz;
}
r = journal_file_allocate(f, p, size);
return r;
o = (Object*) t;
-
- zero(o->object);
- o->object.type = type;
- o->object.size = htole64(size);
+ o->object = (ObjectHeader) {
+ .type = type,
+ .size = htole64(size),
+ };
f->header->tail_object_offset = htole64(p);
f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
- *ret = o;
- *offset = p;
+ if (ret)
+ *ret = o;
+
+ if (ret_offset)
+ *ret_offset = p;
return 0;
}
if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
s = DEFAULT_DATA_HASH_TABLE_SIZE;
- log_debug("Reserving %"PRIu64" entries in hash table.", s / sizeof(HashItem));
+ log_debug("Reserving %"PRIu64" entries in data hash table.", s / sizeof(HashItem));
r = journal_file_append_object(f,
OBJECT_DATA_HASH_TABLE,
* number should grow very slowly only */
s = DEFAULT_FIELD_HASH_TABLE_SIZE;
+ log_debug("Reserving %"PRIu64" entries in field hash table.", s / sizeof(HashItem));
+
r = journal_file_append_object(f,
OBJECT_FIELD_HASH_TABLE,
offsetof(Object, hash_table.items) + s,
if (o->object.type != OBJECT_FIELD)
return -EINVAL;
- m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
+ m = le64toh(READ_NOW(f->header->field_hash_table_size)) / sizeof(HashItem);
if (m <= 0)
return -EBADMSG;
if (o->object.type != OBJECT_DATA)
return -EINVAL;
- m = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
+ m = le64toh(READ_NOW(f->header->data_hash_table_size)) / sizeof(HashItem);
if (m <= 0)
return -EBADMSG;
return 0;
}
+static int next_hash_offset(
+ JournalFile *f,
+ uint64_t *p,
+ le64_t *next_hash_offset,
+ uint64_t *depth,
+ le64_t *header_max_depth) {
+
+ uint64_t nextp;
+
+ nextp = le64toh(READ_NOW(*next_hash_offset));
+ if (nextp > 0) {
+ if (nextp <= *p) /* Refuse going in loops */
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Detected hash item loop in %s, refusing.", f->path);
+
+ (*depth)++;
+
+ /* If the depth of this hash chain is larger than all others we have seen so far, record it */
+ if (header_max_depth && f->writable)
+ *header_max_depth = htole64(MAX(*depth, le64toh(*header_max_depth)));
+ }
+
+ *p = nextp;
+ return 0;
+}
+
int journal_file_find_field_object_with_hash(
JournalFile *f,
const void *field, uint64_t size, uint64_t hash,
- Object **ret, uint64_t *offset) {
+ Object **ret, uint64_t *ret_offset) {
- uint64_t p, osize, h, m;
+ uint64_t p, osize, h, m, depth = 0;
int r;
assert(f);
osize = offsetof(Object, field.payload) + size;
- m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
+ m = le64toh(READ_NOW(f->header->field_hash_table_size)) / sizeof(HashItem);
if (m <= 0)
return -EBADMSG;
h = hash % m;
p = le64toh(f->field_hash_table[h].head_hash_offset);
-
while (p > 0) {
Object *o;
if (ret)
*ret = o;
- if (offset)
- *offset = p;
+ if (ret_offset)
+ *ret_offset = p;
return 1;
}
- p = le64toh(o->field.next_hash_offset);
+ r = next_hash_offset(
+ f,
+ &p,
+ &o->field.next_hash_offset,
+ &depth,
+ JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth) ? &f->header->field_hash_chain_depth : NULL);
+ if (r < 0)
+ return r;
}
return 0;
}
+uint64_t journal_file_hash_data(
+ JournalFile *f,
+ const void *data,
+ size_t sz) {
+
+ assert(f);
+ assert(data || sz == 0);
+
+ /* We try to unify our codebase on siphash, hence new-styled journal files utilizing the keyed hash
+ * function use siphash. Old journal files use the Jenkins hash. */
+
+ if (JOURNAL_HEADER_KEYED_HASH(f->header))
+ return siphash24(data, sz, f->header->file_id.bytes);
+
+ return jenkins_hash64(data, sz);
+}
+
int journal_file_find_field_object(
JournalFile *f,
const void *field, uint64_t size,
- Object **ret, uint64_t *offset) {
-
- uint64_t hash;
+ Object **ret, uint64_t *ret_offset) {
assert(f);
assert(field && size > 0);
- hash = hash64(field, size);
-
- return journal_file_find_field_object_with_hash(f,
- field, size, hash,
- ret, offset);
+ return journal_file_find_field_object_with_hash(
+ f,
+ field, size,
+ journal_file_hash_data(f, field, size),
+ ret, ret_offset);
}
int journal_file_find_data_object_with_hash(
JournalFile *f,
const void *data, uint64_t size, uint64_t hash,
- Object **ret, uint64_t *offset) {
+ Object **ret, uint64_t *ret_offset) {
- uint64_t p, osize, h, m;
+ uint64_t p, osize, h, m, depth = 0;
int r;
assert(f);
osize = offsetof(Object, data.payload) + size;
- m = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
+ m = le64toh(READ_NOW(f->header->data_hash_table_size)) / sizeof(HashItem);
if (m <= 0)
return -EBADMSG;
goto next;
if (o->object.flags & OBJECT_COMPRESSION_MASK) {
-#if HAVE_XZ || HAVE_LZ4
+#if HAVE_COMPRESSION
uint64_t l;
size_t rsize = 0;
- l = le64toh(o->object.size);
+ l = le64toh(READ_NOW(o->object.size));
if (l <= offsetof(Object, data.payload))
return -EBADMSG;
if (ret)
*ret = o;
- if (offset)
- *offset = p;
+ if (ret_offset)
+ *ret_offset = p;
return 1;
}
if (ret)
*ret = o;
- if (offset)
- *offset = p;
+ if (ret_offset)
+ *ret_offset = p;
return 1;
}
next:
- p = le64toh(o->data.next_hash_offset);
+ r = next_hash_offset(
+ f,
+ &p,
+ &o->data.next_hash_offset,
+ &depth,
+ JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth) ? &f->header->data_hash_chain_depth : NULL);
+ if (r < 0)
+ return r;
}
return 0;
int journal_file_find_data_object(
JournalFile *f,
const void *data, uint64_t size,
- Object **ret, uint64_t *offset) {
-
- uint64_t hash;
+ Object **ret, uint64_t *ret_offset) {
assert(f);
assert(data || size == 0);
- hash = hash64(data, size);
-
- return journal_file_find_data_object_with_hash(f,
- data, size, hash,
- ret, offset);
+ return journal_file_find_data_object_with_hash(
+ f,
+ data, size,
+ journal_file_hash_data(f, data, size),
+ ret, ret_offset);
}
static int journal_file_append_field(
JournalFile *f,
const void *field, uint64_t size,
- Object **ret, uint64_t *offset) {
+ Object **ret, uint64_t *ret_offset) {
uint64_t hash, p;
uint64_t osize;
assert(f);
assert(field && size > 0);
- hash = hash64(field, size);
+ hash = journal_file_hash_data(f, field, size);
r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
if (r < 0)
if (ret)
*ret = o;
- if (offset)
- *offset = p;
+ if (ret_offset)
+ *ret_offset = p;
return 0;
}
if (ret)
*ret = o;
- if (offset)
- *offset = p;
+ if (ret_offset)
+ *ret_offset = p;
return 0;
}
static int journal_file_append_data(
JournalFile *f,
const void *data, uint64_t size,
- Object **ret, uint64_t *offset) {
+ Object **ret, uint64_t *ret_offset) {
uint64_t hash, p;
uint64_t osize;
assert(f);
assert(data || size == 0);
- hash = hash64(data, size);
+ hash = journal_file_hash_data(f, data, size);
r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
if (r < 0)
if (ret)
*ret = o;
- if (offset)
- *offset = p;
+ if (ret_offset)
+ *ret_offset = p;
return 0;
}
o->data.hash = htole64(hash);
-#if HAVE_XZ || HAVE_LZ4
+#if HAVE_COMPRESSION
if (JOURNAL_FILE_COMPRESS(f) && size >= f->compress_threshold_bytes) {
size_t rsize = 0;
if (ret)
*ret = o;
- if (offset)
- *offset = p;
+ if (ret_offset)
+ *ret_offset = p;
return 0;
}
uint64_t journal_file_entry_n_items(Object *o) {
+ uint64_t sz;
assert(o);
if (o->object.type != OBJECT_ENTRY)
return 0;
- return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
+ sz = le64toh(READ_NOW(o->object.size));
+ if (sz < offsetof(Object, entry.items))
+ return 0;
+
+ return (sz - offsetof(Object, entry.items)) / sizeof(EntryItem);
}
uint64_t journal_file_entry_array_n_items(Object *o) {
+ uint64_t sz;
+
assert(o);
if (o->object.type != OBJECT_ENTRY_ARRAY)
return 0;
- return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
+ sz = le64toh(READ_NOW(o->object.size));
+ if (sz < offsetof(Object, entry_array.items))
+ return 0;
+
+ return (sz - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
}
uint64_t journal_file_hash_table_n_items(Object *o) {
+ uint64_t sz;
+
assert(o);
if (!IN_SET(o->object.type, OBJECT_DATA_HASH_TABLE, OBJECT_FIELD_HASH_TABLE))
return 0;
- return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
+ sz = le64toh(READ_NOW(o->object.size));
+ if (sz < offsetof(Object, hash_table.items))
+ return 0;
+
+ return (sz - offsetof(Object, hash_table.items)) / sizeof(HashItem);
}
static int link_entry_into_array(JournalFile *f,
assert(p > 0);
a = le64toh(*first);
- i = hidx = le64toh(*idx);
+ i = hidx = le64toh(READ_NOW(*idx));
while (a > 0) {
r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
le64_t *idx,
uint64_t p) {
+ uint64_t hidx;
int r;
assert(f);
assert(idx);
assert(p > 0);
- if (*idx == 0)
+ hidx = le64toh(READ_NOW(*idx));
+ if (hidx == UINT64_MAX)
+ return -EBADMSG;
+ if (hidx == 0)
*extra = htole64(p);
else {
le64_t i;
- i = htole64(le64toh(*idx) - 1);
+ i = htole64(hidx - 1);
r = link_entry_into_array(f, first, &i, p);
if (r < 0)
return r;
}
- *idx = htole64(le64toh(*idx) + 1);
+ *idx = htole64(hidx + 1);
return 0;
}
static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
uint64_t p;
int r;
+
assert(f);
assert(o);
assert(offset > 0);
p = le64toh(o->entry.items[i].object_offset);
- if (p == 0)
- return -EINVAL;
-
r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
if (r < 0)
return r;
uint64_t xor_hash,
const EntryItem items[], unsigned n_items,
uint64_t *seqnum,
- Object **ret, uint64_t *offset) {
+ Object **ret, uint64_t *ret_offset) {
uint64_t np;
uint64_t osize;
Object *o;
if (ret)
*ret = o;
- if (offset)
- *offset = np;
+ if (ret_offset)
+ *ret_offset = np;
return 0;
}
}
static void schedule_post_change(JournalFile *f) {
- uint64_t now;
int r;
assert(f);
if (r > 0)
return;
- r = sd_event_now(sd_event_source_get_event(f->post_change_timer), CLOCK_MONOTONIC, &now);
- if (r < 0) {
- log_debug_errno(r, "Failed to get clock's now for scheduling ftruncate: %m");
- goto fail;
- }
-
- r = sd_event_source_set_time(f->post_change_timer, now + f->post_change_timer_period);
+ r = sd_event_source_set_time_relative(f->post_change_timer, f->post_change_timer_period);
if (r < 0) {
log_debug_errno(r, "Failed to set time for scheduling ftruncate: %m");
goto fail;
const sd_id128_t *boot_id,
const struct iovec iovec[], unsigned n_iovec,
uint64_t *seqnum,
- Object **ret, uint64_t *offset) {
+ Object **ret, uint64_t *ret_offset) {
unsigned i;
EntryItem *items;
if (r < 0)
return r;
- xor_hash ^= le64toh(o->data.hash);
+ /* When calculating the XOR hash field, we need to take special care if the "keyed-hash"
+ * journal file flag is on. We use the XOR hash field to quickly determine the identity of a
+ * specific record, and give records with otherwise identical position (i.e. match in seqno,
+ * timestamp, …) a stable ordering. But for that we can't have it that the hash of the
+ * objects in each file is different since they are keyed. Hence let's calculate the Jenkins
+ * hash here for that. This also has the benefit that cursors for old and new journal files
+ * are completely identical (they include the XOR hash after all). For classic Jenkins-hash
+ * files things are easier, we can just take the value from the stored record directly. */
+
+ if (JOURNAL_HEADER_KEYED_HASH(f->header))
+ xor_hash ^= jenkins_hash64(iovec[i].iov_base, iovec[i].iov_len);
+ else
+ xor_hash ^= le64toh(o->data.hash);
+
items[i].object_offset = htole64(p);
items[i].hash = o->data.hash;
}
* times for rotating media. */
typesafe_qsort(items, n_iovec, entry_item_cmp);
- r = journal_file_append_entry_internal(f, ts, boot_id, xor_hash, items, n_iovec, seqnum, ret, offset);
+ r = journal_file_append_entry_internal(f, ts, boot_id, xor_hash, items, n_iovec, seqnum, ret, ret_offset);
/* If the memory mapping triggered a SIGBUS then we return an
* IO error and ignore the error code passed down to us, since
JournalFile *f,
uint64_t first,
uint64_t i,
- Object **ret, uint64_t *offset) {
+ Object **ret, uint64_t *ret_offset) {
Object *o;
uint64_t p = 0, a, t = 0;
if (ret)
*ret = o;
- if (offset)
- *offset = p;
+ if (ret_offset)
+ *ret_offset = p;
return 1;
}
uint64_t extra,
uint64_t first,
uint64_t i,
- Object **ret, uint64_t *offset) {
+ Object **ret, uint64_t *ret_offset) {
Object *o;
if (ret)
*ret = o;
- if (offset)
- *offset = extra;
+ if (ret_offset)
+ *ret_offset = extra;
return 1;
}
- return generic_array_get(f, first, i-1, ret, offset);
+ return generic_array_get(f, first, i-1, ret, ret_offset);
}
enum {
int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
direction_t direction,
Object **ret,
- uint64_t *offset,
- uint64_t *idx) {
+ uint64_t *ret_offset,
+ uint64_t *ret_idx) {
uint64_t a, p, t = 0, i = 0, last_p = 0, last_index = (uint64_t) -1;
bool subtract_one = false;
if (ret)
*ret = o;
- if (offset)
- *offset = p;
+ if (ret_offset)
+ *ret_offset = p;
- if (idx)
- *idx = t + i + (subtract_one ? -1 : 0);
+ if (ret_idx)
+ *ret_idx = t + i + (subtract_one ? -1 : 0);
return 1;
}
int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
direction_t direction,
Object **ret,
- uint64_t *offset,
- uint64_t *idx) {
+ uint64_t *ret_offset,
+ uint64_t *ret_idx) {
int r;
bool step_back = false;
return 0;
}
- r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
+ r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, ret_offset, ret_idx);
if (r == 0 && step_back)
goto found;
- if (r > 0 && idx)
- (*idx)++;
+ if (r > 0 && ret_idx)
+ (*ret_idx)++;
return r;
if (ret)
*ret = o;
- if (offset)
- *offset = extra;
+ if (ret_offset)
+ *ret_offset = extra;
- if (idx)
- *idx = 0;
+ if (ret_idx)
+ *ret_idx = 0;
return 1;
}
}
static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
+ uint64_t sq;
Object *o;
int r;
if (r < 0)
return r;
- if (le64toh(o->entry.seqnum) == needle)
+ sq = le64toh(READ_NOW(o->entry.seqnum));
+ if (sq == needle)
return TEST_FOUND;
- else if (le64toh(o->entry.seqnum) < needle)
+ else if (sq < needle)
return TEST_LEFT;
else
return TEST_RIGHT;
uint64_t seqnum,
direction_t direction,
Object **ret,
- uint64_t *offset) {
+ uint64_t *ret_offset) {
assert(f);
assert(f->header);
- return generic_array_bisect(f,
- le64toh(f->header->entry_array_offset),
- le64toh(f->header->n_entries),
- seqnum,
- test_object_seqnum,
- direction,
- ret, offset, NULL);
+ return generic_array_bisect(
+ f,
+ le64toh(f->header->entry_array_offset),
+ le64toh(f->header->n_entries),
+ seqnum,
+ test_object_seqnum,
+ direction,
+ ret, ret_offset, NULL);
}
static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
Object *o;
+ uint64_t rt;
int r;
assert(f);
if (r < 0)
return r;
- if (le64toh(o->entry.realtime) == needle)
+ rt = le64toh(READ_NOW(o->entry.realtime));
+ if (rt == needle)
return TEST_FOUND;
- else if (le64toh(o->entry.realtime) < needle)
+ else if (rt < needle)
return TEST_LEFT;
else
return TEST_RIGHT;
uint64_t realtime,
direction_t direction,
Object **ret,
- uint64_t *offset) {
+ uint64_t *ret_offset) {
assert(f);
assert(f->header);
- return generic_array_bisect(f,
- le64toh(f->header->entry_array_offset),
- le64toh(f->header->n_entries),
- realtime,
- test_object_realtime,
- direction,
- ret, offset, NULL);
+ return generic_array_bisect(
+ f,
+ le64toh(f->header->entry_array_offset),
+ le64toh(f->header->n_entries),
+ realtime,
+ test_object_realtime,
+ direction,
+ ret, ret_offset, NULL);
}
static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
Object *o;
+ uint64_t m;
int r;
assert(f);
if (r < 0)
return r;
- if (le64toh(o->entry.monotonic) == needle)
+ m = le64toh(READ_NOW(o->entry.monotonic));
+ if (m == needle)
return TEST_FOUND;
- else if (le64toh(o->entry.monotonic) < needle)
+ else if (m < needle)
return TEST_LEFT;
else
return TEST_RIGHT;
uint64_t monotonic,
direction_t direction,
Object **ret,
- uint64_t *offset) {
+ uint64_t *ret_offset) {
Object *o;
int r;
if (r == 0)
return -ENOENT;
- return generic_array_bisect_plus_one(f,
- le64toh(o->data.entry_offset),
- le64toh(o->data.entry_array_offset),
- le64toh(o->data.n_entries),
- monotonic,
- test_object_monotonic,
- direction,
- ret, offset, NULL);
+ return generic_array_bisect_plus_one(
+ f,
+ le64toh(o->data.entry_offset),
+ le64toh(o->data.entry_array_offset),
+ le64toh(o->data.n_entries),
+ monotonic,
+ test_object_monotonic,
+ direction,
+ ret, ret_offset, NULL);
}
void journal_file_reset_location(JournalFile *f) {
JournalFile *f,
uint64_t p,
direction_t direction,
- Object **ret, uint64_t *offset) {
+ Object **ret, uint64_t *ret_offset) {
uint64_t i, n, ofs;
int r;
assert(f);
assert(f->header);
- n = le64toh(f->header->n_entries);
+ n = le64toh(READ_NOW(f->header->n_entries));
if (n <= 0)
return 0;
"%s: entry array not properly ordered at entry %" PRIu64,
f->path, i);
- if (offset)
- *offset = ofs;
+ if (ret_offset)
+ *ret_offset = ofs;
return 1;
}
Object *o, uint64_t p,
uint64_t data_offset,
direction_t direction,
- Object **ret, uint64_t *offset) {
+ Object **ret, uint64_t *ret_offset) {
uint64_t i, n, ofs;
Object *d;
if (r < 0)
return r;
- n = le64toh(d->data.n_entries);
+ n = le64toh(READ_NOW(d->data.n_entries));
if (n <= 0)
return n;
"%s data entry array not properly ordered at entry %" PRIu64,
f->path, i);
- if (offset)
- *offset = ofs;
+ if (ret_offset)
+ *ret_offset = ofs;
return 1;
}
uint64_t data_offset,
uint64_t p,
direction_t direction,
- Object **ret, uint64_t *offset) {
+ Object **ret, uint64_t *ret_offset) {
int r;
Object *d;
if (r < 0)
return r;
- return generic_array_bisect_plus_one(f,
- le64toh(d->data.entry_offset),
- le64toh(d->data.entry_array_offset),
- le64toh(d->data.n_entries),
- p,
- test_object_offset,
- direction,
- ret, offset, NULL);
+ return generic_array_bisect_plus_one(
+ f,
+ le64toh(d->data.entry_offset),
+ le64toh(d->data.entry_array_offset),
+ le64toh(d->data.n_entries),
+ p,
+ test_object_offset,
+ direction,
+ ret, ret_offset, NULL);
}
int journal_file_move_to_entry_by_monotonic_for_data(
sd_id128_t boot_id,
uint64_t monotonic,
direction_t direction,
- Object **ret, uint64_t *offset) {
+ Object **ret, uint64_t *ret_offset) {
Object *o, *d;
int r;
if (p == q) {
if (ret)
*ret = qo;
- if (offset)
- *offset = q;
+ if (ret_offset)
+ *ret_offset = q;
return 1;
}
uint64_t data_offset,
uint64_t seqnum,
direction_t direction,
- Object **ret, uint64_t *offset) {
+ Object **ret, uint64_t *ret_offset) {
Object *d;
int r;
if (r < 0)
return r;
- return generic_array_bisect_plus_one(f,
- le64toh(d->data.entry_offset),
- le64toh(d->data.entry_array_offset),
- le64toh(d->data.n_entries),
- seqnum,
- test_object_seqnum,
- direction,
- ret, offset, NULL);
+ return generic_array_bisect_plus_one(
+ f,
+ le64toh(d->data.entry_offset),
+ le64toh(d->data.entry_array_offset),
+ le64toh(d->data.n_entries),
+ seqnum,
+ test_object_seqnum,
+ direction,
+ ret, ret_offset, NULL);
}
int journal_file_move_to_entry_by_realtime_for_data(
uint64_t data_offset,
uint64_t realtime,
direction_t direction,
- Object **ret, uint64_t *offset) {
+ Object **ret, uint64_t *ret_offset) {
Object *d;
int r;
if (r < 0)
return r;
- return generic_array_bisect_plus_one(f,
- le64toh(d->data.entry_offset),
- le64toh(d->data.entry_array_offset),
- le64toh(d->data.n_entries),
- realtime,
- test_object_realtime,
- direction,
- ret, offset, NULL);
+ return generic_array_bisect_plus_one(
+ f,
+ le64toh(d->data.entry_offset),
+ le64toh(d->data.entry_array_offset),
+ le64toh(d->data.n_entries),
+ realtime,
+ test_object_realtime,
+ direction,
+ ret, ret_offset, NULL);
}
void journal_file_dump(JournalFile *f) {
journal_file_print_header(f);
- p = le64toh(f->header->header_size);
+ p = le64toh(READ_NOW(f->header->header_size));
while (p != 0) {
r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o);
if (r < 0)
if (p == le64toh(f->header->tail_object_offset))
p = 0;
else
- p = p + ALIGN64(le64toh(o->object.size));
+ p += ALIGN64(le64toh(o->object.size));
}
return;
}
void journal_file_print_header(JournalFile *f) {
- char a[33], b[33], c[33], d[33];
+ char a[SD_ID128_STRING_MAX], b[SD_ID128_STRING_MAX], c[SD_ID128_STRING_MAX], d[SD_ID128_STRING_MAX];
char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX], z[FORMAT_TIMESTAMP_MAX];
struct stat st;
char bytes[FORMAT_BYTES_MAX];
assert(f);
assert(f->header);
- printf("File Path: %s\n"
+ printf("File path: %s\n"
"File ID: %s\n"
"Machine ID: %s\n"
"Boot ID: %s\n"
- "Sequential Number ID: %s\n"
+ "Sequential number ID: %s\n"
"State: %s\n"
- "Compatible Flags:%s%s\n"
- "Incompatible Flags:%s%s%s\n"
+ "Compatible flags:%s%s\n"
+ "Incompatible flags:%s%s%s%s%s\n"
"Header size: %"PRIu64"\n"
"Arena size: %"PRIu64"\n"
- "Data Hash Table Size: %"PRIu64"\n"
- "Field Hash Table Size: %"PRIu64"\n"
- "Rotate Suggested: %s\n"
- "Head Sequential Number: %"PRIu64" (%"PRIx64")\n"
- "Tail Sequential Number: %"PRIu64" (%"PRIx64")\n"
- "Head Realtime Timestamp: %s (%"PRIx64")\n"
- "Tail Realtime Timestamp: %s (%"PRIx64")\n"
- "Tail Monotonic Timestamp: %s (%"PRIx64")\n"
+ "Data hash table size: %"PRIu64"\n"
+ "Field hash table size: %"PRIu64"\n"
+ "Rotate suggested: %s\n"
+ "Head sequential number: %"PRIu64" (%"PRIx64")\n"
+ "Tail sequential number: %"PRIu64" (%"PRIx64")\n"
+ "Head realtime timestamp: %s (%"PRIx64")\n"
+ "Tail realtime timestamp: %s (%"PRIx64")\n"
+ "Tail monotonic timestamp: %s (%"PRIx64")\n"
"Objects: %"PRIu64"\n"
- "Entry Objects: %"PRIu64"\n",
+ "Entry objects: %"PRIu64"\n",
f->path,
sd_id128_to_string(f->header->file_id, a),
sd_id128_to_string(f->header->machine_id, b),
(le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_ANY) ? " ???" : "",
JOURNAL_HEADER_COMPRESSED_XZ(f->header) ? " COMPRESSED-XZ" : "",
JOURNAL_HEADER_COMPRESSED_LZ4(f->header) ? " COMPRESSED-LZ4" : "",
+ JOURNAL_HEADER_COMPRESSED_ZSTD(f->header) ? " COMPRESSED-ZSTD" : "",
+ JOURNAL_HEADER_KEYED_HASH(f->header) ? " KEYED-HASH" : "",
(le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_ANY) ? " ???" : "",
le64toh(f->header->header_size),
le64toh(f->header->arena_size),
le64toh(f->header->n_entries));
if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
- printf("Data Objects: %"PRIu64"\n"
- "Data Hash Table Fill: %.1f%%\n",
+ printf("Data objects: %"PRIu64"\n"
+ "Data hash table fill: %.1f%%\n",
le64toh(f->header->n_data),
100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
- printf("Field Objects: %"PRIu64"\n"
- "Field Hash Table Fill: %.1f%%\n",
+ printf("Field objects: %"PRIu64"\n"
+ "Field hash table fill: %.1f%%\n",
le64toh(f->header->n_fields),
100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
- printf("Tag Objects: %"PRIu64"\n",
+ printf("Tag objects: %"PRIu64"\n",
le64toh(f->header->n_tags));
if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
- printf("Entry Array Objects: %"PRIu64"\n",
+ printf("Entry array objects: %"PRIu64"\n",
le64toh(f->header->n_entry_arrays));
+ if (JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth))
+ printf("Deepest field hash chain: %" PRIu64"\n",
+ f->header->field_hash_chain_depth);
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth))
+ printf("Deepest data hash chain: %" PRIu64"\n",
+ f->header->data_hash_chain_depth);
+
if (fstat(f->fd, &st) >= 0)
printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (uint64_t) st.st_blocks * 512ULL));
}
JournalFile *f;
void *h;
int r;
- char bytes[FORMAT_BYTES_MAX];
assert(ret);
assert(fd >= 0 || fname);
.prot = prot_from_flags(flags),
.writable = (flags & O_ACCMODE) != O_RDONLY,
-#if HAVE_LZ4
+#if HAVE_ZSTD
+ .compress_zstd = compress,
+#elif HAVE_LZ4
.compress_lz4 = compress,
#elif HAVE_XZ
.compress_xz = compress,
#endif
};
- log_debug("Journal effective settings seal=%s compress=%s compress_threshold_bytes=%s",
- yes_no(f->seal), yes_no(JOURNAL_FILE_COMPRESS(f)),
- format_bytes(bytes, sizeof(bytes), f->compress_threshold_bytes));
+ /* We turn on keyed hashes by default, but provide an environment variable to turn them off, if
+ * people really want that */
+ r = getenv_bool("SYSTEMD_JOURNAL_KEYED_HASH");
+ if (r < 0) {
+ if (r != -ENXIO)
+ log_debug_errno(r, "Failed to parse $SYSTEMD_JOURNAL_KEYED_HASH environment variable, ignoring.");
+ f->keyed_hash = true;
+ } else
+ f->keyed_hash = r;
+
+ if (DEBUG_LOGGING) {
+ static int last_seal = -1, last_compress = -1, last_keyed_hash = -1;
+ static uint64_t last_bytes = UINT64_MAX;
+ char bytes[FORMAT_BYTES_MAX];
+
+ if (last_seal != f->seal ||
+ last_keyed_hash != f->keyed_hash ||
+ last_compress != JOURNAL_FILE_COMPRESS(f) ||
+ last_bytes != f->compress_threshold_bytes) {
+
+ log_debug("Journal effective settings seal=%s keyed_hash=%s compress=%s compress_threshold_bytes=%s",
+ yes_no(f->seal), yes_no(f->keyed_hash), yes_no(JOURNAL_FILE_COMPRESS(f)),
+ format_bytes(bytes, sizeof bytes, f->compress_threshold_bytes));
+ last_seal = f->seal;
+ last_keyed_hash = f->keyed_hash;
+ last_compress = JOURNAL_FILE_COMPRESS(f);
+ last_bytes = f->compress_threshold_bytes;
+ }
+ }
if (mmap_cache)
f->mmap = mmap_cache_ref(mmap_cache);
if (le_hash != o->data.hash)
return -EBADMSG;
- l = le64toh(o->object.size) - offsetof(Object, data.payload);
+ l = le64toh(READ_NOW(o->object.size));
+ if (l < offsetof(Object, data.payload))
+ return -EBADMSG;
+
+ l -= offsetof(Object, data.payload);
t = (size_t) l;
/* We hit the limit on 32bit machines */
return -E2BIG;
if (o->object.flags & OBJECT_COMPRESSION_MASK) {
-#if HAVE_XZ || HAVE_LZ4
+#if HAVE_COMPRESSION
size_t rsize = 0;
r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
if (r < 0)
return r;
- xor_hash ^= le64toh(u->data.hash);
+ if (JOURNAL_HEADER_KEYED_HASH(to->header))
+ xor_hash ^= jenkins_hash64(data, l);
+ else
+ xor_hash ^= le64toh(u->data.hash);
+
items[i].object_offset = htole64(h);
items[i].hash = u->data.hash;
void journal_default_metrics(JournalMetrics *m, int fd) {
char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX], e[FORMAT_BYTES_MAX];
struct statvfs ss;
- uint64_t fs_size;
+ uint64_t fs_size = 0;
assert(m);
assert(fd >= 0);
if (fstatvfs(fd, &ss) >= 0)
fs_size = ss.f_frsize * ss.f_blocks;
- else {
+ else
log_debug_errno(errno, "Failed to determine disk size: %m");
- fs_size = 0;
- }
if (m->max_use == (uint64_t) -1) {
- if (fs_size > 0) {
- m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
-
- if (m->max_use > DEFAULT_MAX_USE_UPPER)
- m->max_use = DEFAULT_MAX_USE_UPPER;
-
- if (m->max_use < DEFAULT_MAX_USE_LOWER)
- m->max_use = DEFAULT_MAX_USE_LOWER;
- } else
- m->max_use = DEFAULT_MAX_USE_LOWER;
+ if (fs_size > 0)
+ m->max_use = CLAMP(PAGE_ALIGN(fs_size / 10), /* 10% of file system size */
+ MAX_USE_LOWER, MAX_USE_UPPER);
+ else
+ m->max_use = MAX_USE_LOWER;
} else {
m->max_use = PAGE_ALIGN(m->max_use);
m->max_use = JOURNAL_FILE_SIZE_MIN*2;
}
- if (m->min_use == (uint64_t) -1)
- m->min_use = DEFAULT_MIN_USE;
+ if (m->min_use == (uint64_t) -1) {
+ if (fs_size > 0)
+ m->min_use = CLAMP(PAGE_ALIGN(fs_size / 50), /* 2% of file system size */
+ MIN_USE_LOW, MIN_USE_HIGH);
+ else
+ m->min_use = MIN_USE_LOW;
+ }
if (m->min_use > m->max_use)
m->min_use = m->max_use;
- if (m->max_size == (uint64_t) -1) {
- m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
-
- if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
- m->max_size = DEFAULT_MAX_SIZE_UPPER;
- } else
+ if (m->max_size == (uint64_t) -1)
+ m->max_size = MIN(PAGE_ALIGN(m->max_use / 8), /* 8 chunks */
+ MAX_SIZE_UPPER);
+ else
m->max_size = PAGE_ALIGN(m->max_size);
if (m->max_size != 0) {
if (m->min_size == (uint64_t) -1)
m->min_size = JOURNAL_FILE_SIZE_MIN;
- else {
- m->min_size = PAGE_ALIGN(m->min_size);
-
- if (m->min_size < JOURNAL_FILE_SIZE_MIN)
- m->min_size = JOURNAL_FILE_SIZE_MIN;
-
- if (m->max_size != 0 && m->min_size > m->max_size)
- m->max_size = m->min_size;
- }
+ else
+ m->min_size = CLAMP(PAGE_ALIGN(m->min_size),
+ JOURNAL_FILE_SIZE_MIN,
+ m->max_size ?: UINT64_MAX);
if (m->keep_free == (uint64_t) -1) {
-
- if (fs_size > 0) {
- m->keep_free = PAGE_ALIGN(fs_size * 3 / 20); /* 15% of file system size */
-
- if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
- m->keep_free = DEFAULT_KEEP_FREE_UPPER;
-
- } else
+ if (fs_size > 0)
+ m->keep_free = MIN(PAGE_ALIGN(fs_size / 20), /* 5% of file system size */
+ KEEP_FREE_UPPER);
+ else
m->keep_free = DEFAULT_KEEP_FREE;
}
return true;
}
- /* Let's check if the hash tables grew over a certain fill
- * level (75%, borrowing this value from Java's hash table
- * implementation), and if so suggest a rotation. To calculate
- * the fill level we need the n_data field, which only exists
- * in newer versions. */
+ /* Let's check if the hash tables grew over a certain fill level (75%, borrowing this value from
+ * Java's hash table implementation), and if so suggest a rotation. To calculate the fill level we
+ * need the n_data field, which only exists in newer versions. */
if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
return true;
}
+ /* If there are too many hash collisions somebody is most likely playing games with us. Hence, if our
+ * longest chain is longer than some threshold, let's suggest rotation. */
+ if (JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth) &&
+ le64toh(f->header->data_hash_chain_depth) > HASH_CHAIN_DEPTH_MAX) {
+ log_debug("Data hash table of %s has deepest hash chain of length %" PRIu64 ", suggesting rotation.",
+ f->path, le64toh(f->header->data_hash_chain_depth));
+ return true;
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth) &&
+ le64toh(f->header->field_hash_chain_depth) > HASH_CHAIN_DEPTH_MAX) {
+ log_debug("Field hash table of %s has deepest hash chain of length at %" PRIu64 ", suggesting rotation.",
+ f->path, le64toh(f->header->field_hash_chain_depth));
+ return true;
+ }
+
/* Are the data objects properly indexed by field objects? */
if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&