#define DEFAULT_COMPRESS_THRESHOLD (512ULL)
#define MIN_COMPRESS_THRESHOLD (8ULL)
+#define U64_KB UINT64_C(1024)
+#define U64_MB (UINT64_C(1024) * U64_KB)
+#define U64_GB (UINT64_C(1024) * U64_MB)
+
/* This is the minimum journal file size */
-#define JOURNAL_FILE_SIZE_MIN (512 * 1024ULL) /* 512 KiB */
-#define JOURNAL_COMPACT_SIZE_MAX UINT32_MAX /* 4 GiB */
+#define JOURNAL_FILE_SIZE_MIN (512 * U64_KB) /* 512 KiB */
+#define JOURNAL_COMPACT_SIZE_MAX ((uint64_t) UINT32_MAX) /* 4 GiB */
-/* These are the lower and upper bounds if we deduce the max_use value
- * from the file system size */
-#define MAX_USE_LOWER (1 * 1024 * 1024ULL) /* 1 MiB */
-#define MAX_USE_UPPER (4 * 1024 * 1024 * 1024ULL) /* 4 GiB */
+/* These are the lower and upper bounds if we deduce the max_use value from the file system size */
+#define MAX_USE_LOWER (1 * U64_MB) /* 1 MiB */
+#define MAX_USE_UPPER (4 * U64_GB) /* 4 GiB */
/* Those are the lower and upper bounds for the minimal use limit,
* i.e. how much we'll use even if keep_free suggests otherwise. */
-#define MIN_USE_LOW (1 * 1024 * 1024ULL) /* 1 MiB */
-#define MIN_USE_HIGH (16 * 1024 * 1024ULL) /* 16 MiB */
+#define MIN_USE_LOW (1 * U64_MB) /* 1 MiB */
+#define MIN_USE_HIGH (16 * U64_MB) /* 16 MiB */
/* This is the upper bound if we deduce max_size from max_use */
-#define MAX_SIZE_UPPER (128 * 1024 * 1024ULL) /* 128 MiB */
+#define MAX_SIZE_UPPER (128 * U64_MB) /* 128 MiB */
-/* This is the upper bound if we deduce the keep_free value from the
- * file system size */
-#define KEEP_FREE_UPPER (4 * 1024 * 1024 * 1024ULL) /* 4 GiB */
+/* This is the upper bound if we deduce the keep_free value from the file system size */
+#define KEEP_FREE_UPPER (4 * U64_GB) /* 4 GiB */
-/* This is the keep_free value when we can't determine the system
- * size */
-#define DEFAULT_KEEP_FREE (1024 * 1024ULL) /* 1 MB */
+/* This is the keep_free value when we can't determine the system size */
+#define DEFAULT_KEEP_FREE (1 * U64_MB) /* 1 MB */
/* This is the default maximum number of journal files to keep around. */
#define DEFAULT_N_MAX_FILES 100
#define CHAIN_CACHE_MAX 20
/* How much to increase the journal file size at once each time we allocate something new. */
-#define FILE_SIZE_INCREASE (8 * 1024 * 1024ULL) /* 8MB */
+#define FILE_SIZE_INCREASE (8 * U64_MB) /* 8MB */
/* Reread fstat() of the file for detecting deletions at least this often */
#define LAST_STAT_REFRESH_USEC (5*USEC_PER_SEC)
return false;
}
+static bool offset_is_valid(uint64_t offset, uint64_t header_size, uint64_t tail_object_offset) {
+ if (offset == 0)
+ return true;
+ if (!VALID64(offset))
+ return false;
+ if (offset < header_size)
+ return false;
+ if (offset > tail_object_offset)
+ return false;
+ return true;
+}
+
+static bool hash_table_is_valid(uint64_t offset, uint64_t size, uint64_t header_size, uint64_t arena_size, uint64_t tail_object_offset) {
+ if ((offset == 0) != (size == 0))
+ return false;
+ if (offset == 0)
+ return true;
+ if (offset <= offsetof(Object, hash_table.items))
+ return false;
+ offset -= offsetof(Object, hash_table.items);
+ if (!offset_is_valid(offset, header_size, tail_object_offset))
+ return false;
+ assert(offset <= header_size + arena_size);
+ if (size > header_size + arena_size - offset)
+ return false;
+ return true;
+}
+
static int journal_file_verify_header(JournalFile *f) {
uint64_t arena_size, header_size;
if (journal_file_writable(f) && header_size != sizeof(Header))
return -EPROTONOSUPPORT;
+ /* Don't write to journal files without the new boot ID update behavior guarantee. */
+ if (journal_file_writable(f) && !JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(f->header))
+ return -EPROTONOSUPPORT;
+
if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
return -EBADMSG;
if (UINT64_MAX - header_size < arena_size || header_size + arena_size > (uint64_t) f->last_stat.st_size)
return -ENODATA;
- if (le64toh(f->header->tail_object_offset) > header_size + arena_size)
+ uint64_t tail_object_offset = le64toh(f->header->tail_object_offset);
+ if (!offset_is_valid(tail_object_offset, header_size, UINT64_MAX))
+ return -ENODATA;
+ if (header_size + arena_size < tail_object_offset)
+ return -ENODATA;
+ if (header_size + arena_size - tail_object_offset < sizeof(ObjectHeader))
+ return -ENODATA;
+
+ if (!hash_table_is_valid(le64toh(f->header->data_hash_table_offset),
+ le64toh(f->header->data_hash_table_size),
+ header_size, arena_size, tail_object_offset))
return -ENODATA;
- if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
- !VALID64(le64toh(f->header->field_hash_table_offset)) ||
- !VALID64(le64toh(f->header->tail_object_offset)) ||
- !VALID64(le64toh(f->header->entry_array_offset)))
+ if (!hash_table_is_valid(le64toh(f->header->field_hash_table_offset),
+ le64toh(f->header->field_hash_table_size),
+ header_size, arena_size, tail_object_offset))
return -ENODATA;
- if (JOURNAL_HEADER_CONTAINS(f->header, tail_entry_offset) &&
- le64toh(f->header->tail_entry_offset) != 0 &&
- !VALID64(le64toh(f->header->tail_entry_offset)))
+ uint64_t entry_array_offset = le64toh(f->header->entry_array_offset);
+ if (!offset_is_valid(entry_array_offset, header_size, tail_object_offset))
+ return -ENODATA;
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, tail_entry_array_offset)) {
+ uint32_t offset = le32toh(f->header->tail_entry_array_offset);
+ uint32_t n = le32toh(f->header->tail_entry_array_n_entries);
+
+ if (!offset_is_valid(offset, header_size, tail_object_offset))
+ return -ENODATA;
+ if (entry_array_offset > offset)
+ return -ENODATA;
+ if (entry_array_offset == 0 && offset != 0)
+ return -ENODATA;
+ if ((offset == 0) != (n == 0))
+ return -ENODATA;
+ assert(offset <= header_size + arena_size);
+ if ((uint64_t) n * journal_file_entry_array_item_size(f) > header_size + arena_size - offset)
+ return -ENODATA;
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, tail_entry_offset)) {
+ uint64_t offset = le64toh(f->header->tail_entry_offset);
+
+ if (!offset_is_valid(offset, header_size, tail_object_offset))
+ return -ENODATA;
+
+ if (offset > 0) {
+ /* When there is an entry object, then these fields must be filled. */
+ if (sd_id128_is_null(f->header->tail_entry_boot_id))
+ return -ENODATA;
+ if (!VALID_REALTIME(le64toh(f->header->head_entry_realtime)))
+ return -ENODATA;
+ if (!VALID_REALTIME(le64toh(f->header->tail_entry_realtime)))
+ return -ENODATA;
+ if (!VALID_MONOTONIC(le64toh(f->header->tail_entry_realtime)))
+ return -ENODATA;
+ } else {
+ /* Otherwise, the fields must be zero. */
+ if (JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(f->header) &&
+ !sd_id128_is_null(f->header->tail_entry_boot_id))
+ return -ENODATA;
+ if (f->header->head_entry_realtime != 0)
+ return -ENODATA;
+ if (f->header->tail_entry_realtime != 0)
+ return -ENODATA;
+ if (f->header->tail_entry_realtime != 0)
+ return -ENODATA;
+ }
+ }
+
+ /* Verify number of objects */
+ uint64_t n_objects = le64toh(f->header->n_objects);
+ if (n_objects > arena_size / sizeof(ObjectHeader))
+ return -ENODATA;
+
+ uint64_t n_entries = le64toh(f->header->n_entries);
+ if (n_entries > n_objects)
+ return -ENODATA;
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
+ le64toh(f->header->n_data) > n_objects)
+ return -ENODATA;
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
+ le64toh(f->header->n_fields) > n_objects)
+ return -ENODATA;
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_tags) &&
+ le64toh(f->header->n_tags) > n_objects)
+ return -ENODATA;
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays) &&
+ le64toh(f->header->n_entry_arrays) > n_objects)
+ return -ENODATA;
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, tail_entry_array_n_entries) &&
+ le32toh(f->header->tail_entry_array_n_entries) > n_entries)
return -ENODATA;
if (journal_file_writable(f)) {
int r;
r = sd_id128_get_machine(&machine_id);
- if (r < 0) {
- if (!ERRNO_IS_MACHINE_ID_UNSET(r)) /* handle graceful if machine ID is not initialized yet */
- return r;
-
+ if (ERRNO_IS_NEG_MACHINE_ID_UNSET(r)) /* Gracefully handle the machine ID not being initialized yet */
machine_id = SD_ID128_NULL;
- }
+ else if (r < 0)
+ return r;
if (!sd_id128_equal(machine_id, f->header->machine_id))
return log_debug_errno(SYNTHETIC_ERRNO(EHOSTDOWN),
/* We assume that this file is not sparse, and we know that for sure, since we always call
* posix_fallocate() ourselves */
- if (size > PAGE_ALIGN_DOWN(UINT64_MAX) - offset)
+ if (size > PAGE_ALIGN_DOWN_U64(UINT64_MAX) - offset)
return -EINVAL;
if (mmap_cache_fd_got_sigbus(f->cache_fd))
old_header_size = le64toh(READ_NOW(f->header->header_size));
old_arena_size = le64toh(READ_NOW(f->header->arena_size));
- if (old_arena_size > PAGE_ALIGN_DOWN(UINT64_MAX) - old_header_size)
+ if (old_arena_size > PAGE_ALIGN_DOWN_U64(UINT64_MAX) - old_header_size)
return -EBADMSG;
old_size = old_header_size + old_arena_size;
- new_size = MAX(PAGE_ALIGN(offset + size), old_header_size);
+ new_size = MAX(PAGE_ALIGN_U64(offset + size), old_header_size);
if (new_size <= old_size) {
if (fstatvfs(f->fd, &svfs) >= 0) {
uint64_t available;
- available = LESS_BY((uint64_t) svfs.f_bfree * (uint64_t) svfs.f_bsize, f->metrics.keep_free);
+ available = LESS_BY(u64_multiply_safe(svfs.f_bfree, svfs.f_bsize), f->metrics.keep_free);
if (new_size - old_size > available)
return -E2BIG;
}
/* Increase by larger blocks at once */
- new_size = DIV_ROUND_UP(new_size, FILE_SIZE_INCREASE) * FILE_SIZE_INCREASE;
+ new_size = ROUND_UP(new_size, FILE_SIZE_INCREASE);
if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
new_size = f->metrics.max_size;
assert(f);
assert(ret);
- /* This function may clear, overwrite, or alter previously cached entries. After this function has
- * been called, all objects except for one obtained by this function are invalidated and must be
- * re-read before use. */
+ /* This function may clear, overwrite, or alter previously cached entries with the same type. After
+ * this function has been called, all previously read objects with the same type may be invalidated,
+ * hence must be re-read before use. */
if (size <= 0)
return -EINVAL;
le64toh(o->entry.monotonic),
offset);
+ if (sd_id128_is_null(o->entry.boot_id))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid object entry with an empty boot ID: %" PRIu64,
+ offset);
+
break;
}
assert(f);
- /* Even if this function fails, it may clear, overwrite, or alter previously cached entries. After
- * this function has been called, all objects except for one obtained by this function are
- * invalidated and must be re-read before use.. */
+ /* Even if this function fails, it may clear, overwrite, or alter previously cached entries with the
+ * same type. After this function has been called, all previously read objects with the same type may
+ * be invalidated, hence must be re-read before use. */
/* Objects may only be located at multiple of 64 bit */
if (!VALID64(offset))
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
- "Attempt to move to %s object at non-64bit boundary: %" PRIu64,
+ "Attempt to move to %s object at non-64-bit boundary: %" PRIu64,
journal_object_type_to_string(type),
offset);
/* Objects may only be located at multiple of 64 bit */
if (!VALID64(offset))
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
- "Attempt to read %s object at non-64bit boundary: %" PRIu64,
+ "Attempt to read %s object at non-64-bit boundary: %" PRIu64,
journal_object_type_to_string(type), offset);
/* Object may not be located in the file header */
assert(f);
- /* We can't read objects larger than 4G on a 32bit machine */
+ /* We can't read objects larger than 4G on a 32-bit machine */
if ((uint64_t) (size_t) size != size)
return -E2BIG;
assert(f);
assert(f->header);
assert(ts);
+ assert(boot_id);
+ assert(!sd_id128_is_null(*boot_id));
assert(items || n_items == 0);
if (f->strict_order) {
"timestamp %" PRIu64 ", refusing entry.",
ts->realtime, le64toh(f->header->tail_entry_realtime));
- if (!sd_id128_is_null(f->header->tail_entry_boot_id) && boot_id) {
-
- if (!sd_id128_equal(f->header->tail_entry_boot_id, *boot_id))
- return log_debug_errno(SYNTHETIC_ERRNO(EREMOTE),
- "Boot ID to write is different from previous boot id, refusing entry.");
-
- if (ts->monotonic < le64toh(f->header->tail_entry_monotonic))
- return log_debug_errno(SYNTHETIC_ERRNO(ENOTNAM),
- "Monotonic timestamp %" PRIu64 " smaller than previous monotonic "
- "timestamp %" PRIu64 ", refusing entry.",
- ts->monotonic, le64toh(f->header->tail_entry_monotonic));
- }
+ if (sd_id128_equal(*boot_id, f->header->tail_entry_boot_id) &&
+ ts->monotonic < le64toh(f->header->tail_entry_monotonic))
+ return log_debug_errno(
+ SYNTHETIC_ERRNO(ENOTNAM),
+ "Monotonic timestamp %" PRIu64
+ " smaller than previous monotonic timestamp %" PRIu64
+ " while having the same boot ID, refusing entry.",
+ ts->monotonic,
+ le64toh(f->header->tail_entry_monotonic));
}
if (seqnum_id) {
o->entry.realtime = htole64(ts->realtime);
o->entry.monotonic = htole64(ts->monotonic);
o->entry.xor_hash = htole64(xor_hash);
- if (boot_id)
- f->header->tail_entry_boot_id = *boot_id;
- o->entry.boot_id = f->header->tail_entry_boot_id;
+ o->entry.boot_id = f->header->tail_entry_boot_id = *boot_id;
for (size_t i = 0; i < n_items; i++)
write_entry_item(f, o, i, &items[i]);
ts = &_ts;
}
- if (!boot_id) {
+ if (boot_id) {
+ if (sd_id128_is_null(*boot_id))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "Empty boot ID, refusing entry.");
+ } else {
r = sd_id128_get_boot(&_boot_id);
if (r < 0)
return r;
}
r = sd_id128_get_machine(&_machine_id);
- if (r < 0) {
- if (!ERRNO_IS_MACHINE_ID_UNSET(r))
- return r;
-
- /* If the machine ID is not initialized yet, handle gracefully */
+ if (ERRNO_IS_NEG_MACHINE_ID_UNSET(r))
+ /* Gracefully handle the machine ID not being initialized yet */
machine_id = NULL;
- } else
+ else if (r < 0)
+ return r;
+ else
machine_id = &_machine_id;
#if HAVE_GCRYPT
}
typedef struct ChainCacheItem {
- uint64_t first; /* the array at the beginning of the chain */
- uint64_t array; /* the cached array */
- uint64_t begin; /* the first item in the cached array */
- uint64_t total; /* the total number of items in all arrays before this one in the chain */
- uint64_t last_index; /* the last index we looked at, to optimize locality when bisecting */
+ uint64_t first; /* The offset of the entry array object at the beginning of the chain,
+ * i.e., le64toh(f->header->entry_array_offset), or le64toh(o->data.entry_offset). */
+ uint64_t array; /* The offset of the cached entry array object. */
+ uint64_t begin; /* The offset of the first item in the cached array. */
+ uint64_t total; /* The total number of items in all arrays before the cached one in the chain. */
+ uint64_t last_index; /* The last index we looked at in the cached array, to optimize locality when bisecting. */
} ChainCacheItem;
static void chain_cache_put(
static int bump_entry_array(
JournalFile *f,
- Object *o,
- uint64_t offset,
- uint64_t first,
+ Object *o, /* the current entry array object. */
+ uint64_t offset, /* the offset of the entry array object. */
+ uint64_t first, /* The offset of the first entry array object in the chain. */
direction_t direction,
uint64_t *ret) {
- uint64_t p, q = 0;
int r;
assert(f);
- assert(offset);
assert(ret);
if (direction == DIRECTION_DOWN) {
assert(o);
+ assert(o->object.type == OBJECT_ENTRY_ARRAY);
+
*ret = le64toh(o->entry_array.next_entry_array_offset);
- return 0;
- }
+ } else {
- /* Entry array chains are a singly linked list, so to find the previous array in the chain, we have
- * to start iterating from the top. */
+ /* Entry array chains are a singly linked list, so to find the previous array in the chain, we have
+ * to start iterating from the top. */
- p = first;
+ assert(offset > 0);
- while (p > 0 && p != offset) {
- r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, p, &o);
- if (r < 0)
- return r;
+ uint64_t p = first, q = 0;
+ while (p > 0 && p != offset) {
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, p, &o);
+ if (r < 0)
+ return r;
- q = p;
- p = le64toh(o->entry_array.next_entry_array_offset);
- }
+ q = p;
+ p = le64toh(o->entry_array.next_entry_array_offset);
+ }
- /* If we can't find the previous entry array in the entry array chain, we're likely dealing with a
- * corrupted journal file. */
- if (p == 0)
- return -EBADMSG;
+ /* If we can't find the previous entry array in the entry array chain, we're likely dealing with a
+ * corrupted journal file. */
+ if (p == 0)
+ return -EBADMSG;
- *ret = q;
+ *ret = q;
+ }
- return 0;
+ return *ret > 0;
}
static int generic_array_get(
JournalFile *f,
- uint64_t first,
- uint64_t i,
+ uint64_t first, /* The offset of the first entry array object in the chain. */
+ uint64_t i, /* The index of the target object counted from the beginning of the entry array chain. */
direction_t direction,
- Object **ret_object,
- uint64_t *ret_offset) {
+ Object **ret_object, /* The found object. */
+ uint64_t *ret_offset) { /* The offset of the found object. */
uint64_t a, t = 0, k;
ChainCacheItem *ci;
- Object *o;
+ Object *o = NULL;
int r;
assert(f);
/* If there's corruption and we're going upwards, move back to the previous entry
* array and start iterating entries from there. */
- r = bump_entry_array(f, NULL, a, first, DIRECTION_UP, &a);
- if (r < 0)
- return r;
-
i = UINT64_MAX;
-
break;
}
if (r < 0)
return r;
k = journal_file_entry_array_n_items(f, o);
+ if (k == 0)
+ return 0;
+
if (i < k)
break;
+ /* The index is larger than the number of elements in the array. Let's move to the next array. */
i -= k;
t += k;
a = le64toh(o->entry_array.next_entry_array_offset);
* direction). */
while (a > 0) {
- /* In the first iteration of the while loop, we reuse i, k and o from the previous while
- * loop. */
if (i == UINT64_MAX) {
+ r = bump_entry_array(f, o, a, first, direction, &a);
+ if (r <= 0)
+ return r;
+
r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
if (r < 0)
return r;
if (k == 0)
break;
- i = direction == DIRECTION_DOWN ? 0 : k - 1;
+ if (direction == DIRECTION_DOWN)
+ i = 0;
+ else {
+ /* We moved to the previous array. The total must be decreased. */
+ if (t < k)
+ return -EBADMSG; /* chain cache is broken ? */
+
+ i = k - 1;
+ t -= k;
+ }
}
do {
* disk properly, let's see if the next one might work for us instead. */
log_debug_errno(r, "Entry item %" PRIu64 " is bad, skipping over it.", i);
- r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
- if (r < 0)
- return r;
-
} while (bump_array_index(&i, direction, k) > 0);
- r = bump_entry_array(f, o, a, first, direction, &a);
- if (r < 0)
- return r;
+ /* All entries tried in the above do-while loop are broken. Let's move to the next (or previous) array. */
+
+ if (direction == DIRECTION_DOWN)
+ /* We are going to the next array, the total must be incremented. */
+ t += k;
- t += k;
i = UINT64_MAX;
}
return 0;
}
-static int generic_array_get_plus_one(
- JournalFile *f,
- uint64_t extra,
- uint64_t first,
- uint64_t i,
- direction_t direction,
- Object **ret_object,
- uint64_t *ret_offset) {
-
- int r;
-
- assert(f);
-
- /* FIXME: fix return value assignment on success. */
-
- if (i == 0) {
- r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, ret_object);
- if (IN_SET(r, -EADDRNOTAVAIL, -EBADMSG))
- return generic_array_get(f, first, 0, direction, ret_object, ret_offset);
- if (r < 0)
- return r;
-
- if (ret_offset)
- *ret_offset = extra;
-
- return 1;
- }
-
- return generic_array_get(f, first, i - 1, direction, ret_object, ret_offset);
-}
-
enum {
TEST_FOUND,
TEST_LEFT,
int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
direction_t direction,
Object **ret_object,
- uint64_t *ret_offset,
- uint64_t *ret_idx) {
+ uint64_t *ret_offset) {
int r;
- bool step_back = false;
assert(f);
assert(test_object);
if (n <= 0)
return 0;
- /* This bisects the array in object 'first', but first checks
- * an extra */
+ /* This bisects the array in object 'first', but first checks an extra. */
r = test_object(f, extra, needle);
if (r < 0)
return r;
- if (r == TEST_FOUND)
- r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
-
- /* if we are looking with DIRECTION_UP then we need to first
- see if in the actual array there is a matching entry, and
- return the last one of that. But if there isn't any we need
- to return this one. Hence remember this, and return it
- below. */
- if (r == TEST_LEFT)
- step_back = direction == DIRECTION_UP;
+ if (direction == DIRECTION_DOWN) {
+ /* If we are going downwards, then we need to return the first object that passes the test.
+ * When there is no object that passes the test, we need to return the first object that
+ * test_object() returns TEST_RIGHT for. */
+ if (IN_SET(r,
+ TEST_FOUND, /* The 'extra' object passes the test. Hence, this is the first
+ * object that passes the test. */
+ TEST_RIGHT)) /* The 'extra' object is the first object that test_object() returns
+ * TEST_RIGHT for, and no object exists even in the chained arrays
+ * that passes the test. */
+ goto use_extra; /* The 'extra' object is exactly the one we are looking for. It is
+ * not necessary to bisect the chained arrays. */
+
+ /* Otherwise, the 'extra' object is not the one we are looking for. Search in the arrays. */
- if (r == TEST_RIGHT) {
- if (direction == DIRECTION_DOWN)
- goto found;
- else
- return 0;
+ } else {
+ /* If we are going upwards, then we need to return the last object that passes the test.
+ * When there is no object that passes the test, we need to return the the last object that
+ * test_object() returns TEST_LEFT for. */
+ if (r == TEST_RIGHT)
+ return 0; /* Not only the 'extra' object, but also all objects in the chained arrays
+ * will never get TEST_FOUND or TEST_LEFT. The object we are looking for
+ * does not exist. */
+
+ /* Even if the 'extra' object passes the test, there may be multiple objects in the arrays
+ * that also pass the test. Hence, we need to bisect the arrays for finding the last matching
+ * object. */
}
- r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret_object, ret_offset, ret_idx);
-
- if (r == 0 && step_back)
- goto found;
+ r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret_object, ret_offset, NULL);
+ if (r != 0)
+ return r; /* When > 0, the found object is the first (or last, when DIRECTION_UP) object.
+ * Hence, return the found object now. */
- if (r > 0 && ret_idx)
- (*ret_idx)++;
-
- return r;
+ /* No matching object found in the chained arrays.
+ * DIRECTION_DOWN : the 'extra' object neither matches the condition. There is no matching object.
+ * DIRECTION_UP : the 'extra' object matches the condition. So, return it. */
+ if (direction == DIRECTION_DOWN)
+ return 0;
-found:
+use_extra:
if (ret_object) {
r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, ret_object);
if (r < 0)
if (ret_offset)
*ret_offset = extra;
- if (ret_idx)
- *ret_idx = 0;
-
return 1;
}
-_pure_ static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
+static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
assert(f);
assert(p > 0);
assert(f);
r = find_data_object_by_boot_id(f, boot_id, &o, NULL);
- if (r < 0)
+ if (r <= 0)
return r;
- if (r == 0)
- return -ENOENT;
return generic_array_bisect_plus_one(
f,
monotonic,
test_object_monotonic,
direction,
- ret_object, ret_offset, NULL);
+ ret_object, ret_offset);
}
void journal_file_reset_location(JournalFile *f) {
f->current_monotonic = 0;
zero(f->current_boot_id);
f->current_xor_hash = 0;
+
+ /* Also reset the previous reading direction. Otherwise, next_beyond_location() may wrongly handle we
+ * already hit EOF. See issue #29216. */
+ f->last_direction = _DIRECTION_INVALID;
}
void journal_file_save_location(JournalFile *f, Object *o, uint64_t offset) {
Object **ret_object,
uint64_t *ret_offset) {
- uint64_t i, n, ofs;
+ uint64_t i, n, q;
+ Object *o;
int r;
assert(f);
if (n <= 0)
return 0;
+ /* When the input offset 'p' is zero, return the first (or last on DIRECTION_UP) entry. */
if (p == 0)
- i = direction == DIRECTION_DOWN ? 0 : n - 1;
- else {
- r = generic_array_bisect(f,
+ return generic_array_get(f,
le64toh(f->header->entry_array_offset),
- le64toh(f->header->n_entries),
- p,
- test_object_offset,
- DIRECTION_DOWN,
- NULL, NULL,
- &i);
- if (r <= 0)
- return r;
+ direction == DIRECTION_DOWN ? 0 : n - 1,
+ direction,
+ ret_object, ret_offset);
+
+ /* Otherwise, first find the nearest entry object. */
+ r = generic_array_bisect(f,
+ le64toh(f->header->entry_array_offset),
+ le64toh(f->header->n_entries),
+ p,
+ test_object_offset,
+ direction,
+ ret_object ? &o : NULL, &q, &i);
+ if (r <= 0)
+ return r;
- r = bump_array_index(&i, direction, n);
- if (r <= 0)
- return r;
- }
+ assert(direction == DIRECTION_DOWN ? p <= q : q <= p);
+
+ /* If the input offset 'p' points to an entry object, generic_array_bisect() should provides
+ * the same offset, and the index needs to be shifted. Otherwise, use the found object as is,
+ * as it is the nearest entry object from the input offset 'p'. */
+
+ if (p != q)
+ goto found;
+
+ r = bump_array_index(&i, direction, n);
+ if (r <= 0)
+ return r;
/* And jump to it */
- r = generic_array_get(f, le64toh(f->header->entry_array_offset), i, direction, ret_object, &ofs);
+ r = generic_array_get(f, le64toh(f->header->entry_array_offset), i, direction, ret_object ? &o : NULL, &q);
if (r <= 0)
return r;
/* Ensure our array is properly ordered. */
- if (p > 0 && !check_properly_ordered(ofs, p, direction))
+ if (!check_properly_ordered(q, p, direction))
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
- "%s: entry array not properly ordered at entry %" PRIu64,
+ "%s: entry array not properly ordered at entry index %" PRIu64,
f->path, i);
-
+found:
+ if (ret_object)
+ *ret_object = o;
if (ret_offset)
- *ret_offset = ofs;
+ *ret_offset = q;
return 1;
}
-int journal_file_next_entry_for_data(
+int journal_file_move_to_entry_for_data(
JournalFile *f,
Object *d,
direction_t direction,
Object **ret_object,
uint64_t *ret_offset) {
- uint64_t i, n, ofs;
- int r;
+ uint64_t extra, first, n;
+ int r = 0;
assert(f);
assert(d);
assert(d->object.type == OBJECT_DATA);
+ assert(IN_SET(direction, DIRECTION_DOWN, DIRECTION_UP));
/* FIXME: fix return value assignment. */
- n = le64toh(READ_NOW(d->data.n_entries));
+ /* This returns the first (when the direction is down, otherwise the last) entry linked to the
+ * specified data object. */
+
+ n = le64toh(d->data.n_entries);
if (n <= 0)
- return n;
+ return 0;
+ n--; /* n_entries is the number of entries linked to the data object, including the 'extra' entry. */
- i = direction == DIRECTION_DOWN ? 0 : n - 1;
+ extra = le64toh(d->data.entry_offset);
+ first = le64toh(d->data.entry_array_offset);
- r = generic_array_get_plus_one(f,
- le64toh(d->data.entry_offset),
- le64toh(d->data.entry_array_offset),
- i,
- direction,
- ret_object, &ofs);
- if (r <= 0)
- return r;
+ if (direction == DIRECTION_DOWN && extra > 0) {
+ /* When we are going downwards, first try to read the extra entry. */
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, ret_object);
+ if (r >= 0)
+ goto use_extra;
+ if (!IN_SET(r, -EADDRNOTAVAIL, -EBADMSG))
+ return r;
+ }
+
+ if (n > 0) {
+ /* DIRECTION_DOWN : The extra entry is broken, falling back to the entries in the array.
+ * DIRECTION_UP : Try to find a valid entry in the array from the tail. */
+ r = generic_array_get(f,
+ first,
+ direction == DIRECTION_DOWN ? 0 : n - 1,
+ direction,
+ ret_object, ret_offset);
+ if (!IN_SET(r, 0, -EADDRNOTAVAIL, -EBADMSG))
+ return r; /* found or critical error. */
+ }
+
+ if (direction == DIRECTION_UP && extra > 0) {
+ /* No valid entry exists in the chained array, falling back to the extra entry. */
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, ret_object);
+ if (r >= 0)
+ goto use_extra;
+ }
+ return r;
+
+use_extra:
if (ret_offset)
- *ret_offset = ofs;
+ *ret_offset = extra;
return 1;
}
p,
test_object_offset,
direction,
- ret, ret_offset, NULL);
+ ret, ret_offset);
}
int journal_file_move_to_entry_by_monotonic_for_data(
Object **ret_object,
uint64_t *ret_offset) {
- uint64_t b, z, entry_offset, entry_array_offset, n_entries;
- Object *o;
+ uint64_t z, entry_offset, entry_array_offset, n_entries;
+ Object *o, *entry;
int r;
assert(f);
n_entries = le64toh(READ_NOW(d->data.n_entries));
/* First, seek by time */
- r = find_data_object_by_boot_id(f, boot_id, &o, &b);
- if (r < 0)
+ r = find_data_object_by_boot_id(f, boot_id, &o, NULL);
+ if (r <= 0)
return r;
- if (r == 0)
- return -ENOENT;
r = generic_array_bisect_plus_one(f,
le64toh(o->data.entry_offset),
monotonic,
test_object_monotonic,
direction,
- NULL, &z, NULL);
+ NULL, &z);
if (r <= 0)
return r;
- /* And now, continue seeking until we find an entry that
- * exists in both bisection arrays */
-
- r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
- if (r < 0)
- return r;
-
+ /* And now, continue seeking until we find an entry that exists in both bisection arrays. */
for (;;) {
- uint64_t p, q;
+ uint64_t p;
+
+ /* The journal entry found by the above bisect_plus_one() may not have the specified data,
+ * that is, it may not be linked in the data object. So, we need to check that. */
r = generic_array_bisect_plus_one(f,
entry_offset,
z,
test_object_offset,
direction,
- NULL, &p, NULL);
+ ret_object ? &entry : NULL, &p);
if (r <= 0)
return r;
+ if (p == z)
+ break; /* The journal entry has the specified data. Yay! */
+
+ /* If the entry does not have the data, then move to the next (or previous, depends on the
+ * 'direction') entry linked to the data object. But, the next entry may be in another boot.
+ * So, we need to check that the entry has the matching boot ID. */
r = generic_array_bisect_plus_one(f,
le64toh(o->data.entry_offset),
p,
test_object_offset,
direction,
- NULL, &q, NULL);
-
+ ret_object ? &entry : NULL, &z);
if (r <= 0)
return r;
+ if (p == z)
+ break; /* The journal entry has the specified boot ID. Yay! */
- if (p == q) {
- if (ret_object) {
- r = journal_file_move_to_object(f, OBJECT_ENTRY, q, ret_object);
- if (r < 0)
- return r;
- }
-
- if (ret_offset)
- *ret_offset = q;
-
- return 1;
- }
-
- z = q;
+ /* If not, let's try to the next entry... */
}
+
+ if (ret_object)
+ *ret_object = entry;
+ if (ret_offset)
+ *ret_offset = z;
+ return 1;
}
int journal_file_move_to_entry_by_seqnum_for_data(
seqnum,
test_object_seqnum,
direction,
- ret_object, ret_offset, NULL);
+ ret_object, ret_offset);
}
int journal_file_move_to_entry_by_realtime_for_data(
realtime,
test_object_realtime,
direction,
- ret, ret_offset, NULL);
+ ret, ret_offset);
}
void journal_file_dump(JournalFile *f) {
r = fd_is_fs_type(f->fd, BTRFS_SUPER_MAGIC);
if (r < 0)
return log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to determine if journal is on btrfs: %m");
- if (!r)
+ if (r == 0)
return 0;
r = read_attr_fd(f->fd, &attrs);
assert(fd >= 0);
if (fstatvfs(fd, &ss) >= 0)
- fs_size = ss.f_frsize * ss.f_blocks;
+ fs_size = u64_multiply_safe(ss.f_frsize, ss.f_blocks);
else
log_debug_errno(errno, "Failed to determine disk size: %m");
if (m->max_use == UINT64_MAX) {
if (fs_size > 0)
- m->max_use = CLAMP(PAGE_ALIGN(fs_size / 10), /* 10% of file system size */
+ m->max_use = CLAMP(PAGE_ALIGN_U64(fs_size / 10), /* 10% of file system size */
MAX_USE_LOWER, MAX_USE_UPPER);
else
m->max_use = MAX_USE_LOWER;
} else {
- m->max_use = PAGE_ALIGN(m->max_use);
+ m->max_use = PAGE_ALIGN_U64(m->max_use);
if (m->max_use != 0 && m->max_use < JOURNAL_FILE_SIZE_MIN*2)
m->max_use = JOURNAL_FILE_SIZE_MIN*2;
if (m->min_use == UINT64_MAX) {
if (fs_size > 0)
- m->min_use = CLAMP(PAGE_ALIGN(fs_size / 50), /* 2% of file system size */
+ m->min_use = CLAMP(PAGE_ALIGN_U64(fs_size / 50), /* 2% of file system size */
MIN_USE_LOW, MIN_USE_HIGH);
else
m->min_use = MIN_USE_LOW;
m->min_use = m->max_use;
if (m->max_size == UINT64_MAX)
- m->max_size = MIN(PAGE_ALIGN(m->max_use / 8), /* 8 chunks */
+ m->max_size = MIN(PAGE_ALIGN_U64(m->max_use / 8), /* 8 chunks */
MAX_SIZE_UPPER);
else
- m->max_size = PAGE_ALIGN(m->max_size);
+ m->max_size = PAGE_ALIGN_U64(m->max_size);
if (compact && m->max_size > JOURNAL_COMPACT_SIZE_MAX)
m->max_size = JOURNAL_COMPACT_SIZE_MAX;
if (m->min_size == UINT64_MAX)
m->min_size = JOURNAL_FILE_SIZE_MIN;
else
- m->min_size = CLAMP(PAGE_ALIGN(m->min_size),
+ m->min_size = CLAMP(PAGE_ALIGN_U64(m->min_size),
JOURNAL_FILE_SIZE_MIN,
m->max_size ?: UINT64_MAX);
if (m->keep_free == UINT64_MAX) {
if (fs_size > 0)
- m->keep_free = MIN(PAGE_ALIGN(fs_size / 20), /* 5% of file system size */
+ m->keep_free = MIN(PAGE_ALIGN_U64(fs_size / 20), /* 5% of file system size */
KEEP_FREE_UPPER);
else
m->keep_free = DEFAULT_KEEP_FREE;
MAX(MIN_COMPRESS_THRESHOLD, compress_threshold_bytes),
.strict_order = FLAGS_SET(file_flags, JOURNAL_STRICT_ORDER),
.newest_boot_id_prioq_idx = PRIOQ_IDX_NULL,
+ .last_direction = _DIRECTION_INVALID,
};
if (fname) {
newly_created = f->last_stat.st_size == 0 && journal_file_writable(f);
}
- f->cache_fd = mmap_cache_add_fd(mmap_cache, f->fd, mmap_prot_from_open_flags(open_flags));
- if (!f->cache_fd) {
- r = -ENOMEM;
+ r = mmap_cache_add_fd(mmap_cache, f->fd, mmap_prot_from_open_flags(open_flags), &f->cache_fd);
+ if (r < 0)
goto fail;
- }
if (newly_created) {
(void) journal_file_warn_btrfs(f);
r = journal_file_data_payload(from, NULL, q, NULL, 0, 0, &data, &l);
if (IN_SET(r, -EADDRNOTAVAIL, -EBADMSG)) {
log_debug_errno(r, "Entry item %"PRIu64" data object is bad, skipping over it: %m", i);
- goto next;
+ continue;
}
if (r < 0)
return r;
.object_offset = h,
.hash = le64toh(u->data.hash),
};
-
- next:
- /* The above journal_file_data_payload() may clear or overwrite cached object. Hence, we need
- * to re-read the object from the cache. */
- r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
- if (r < 0)
- return r;
}
if (m == 0)
if (r < 0)
return r;
- r = generic_array_get_plus_one(f,
- le64toh(o->data.entry_offset),
- le64toh(o->data.entry_array_offset),
- le64toh(o->data.n_entries) - 1,
- DIRECTION_UP,
- &o, NULL);
+ r = journal_file_move_to_entry_for_data(f, o, DIRECTION_UP, &o, NULL);
if (r <= 0)
return r;