]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
test: extend test-send to send some weirder data
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
fb0951b0
LP
30#ifdef HAVE_XATTR
31#include <attr/xattr.h>
32#endif
33
cec736d2
LP
34#include "journal-def.h"
35#include "journal-file.h"
0284adc6 36#include "journal-authenticate.h"
cec736d2 37#include "lookup3.h"
807e17f0 38#include "compress.h"
7560fffc 39#include "fsprg.h"
cec736d2 40
4a92baf3
LP
41#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
42#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
cec736d2 43
be19b7df 44#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 45
babfc091 46/* This is the minimum journal file size */
b47ffcfd 47#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
babfc091
LP
48
49/* These are the lower and upper bounds if we deduce the max_use value
50 * from the file system size */
51#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
52#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
53
54/* This is the upper bound if we deduce max_size from max_use */
71100051 55#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
56
57/* This is the upper bound if we deduce the keep_free value from the
58 * file system size */
59#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
60
61/* This is the keep_free value when we can't determine the system
62 * size */
63#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
64
dca6219e
LP
65/* n_data was the first entry we added after the initial file format design */
66#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
cec736d2 67
cec736d2 68void journal_file_close(JournalFile *f) {
de190aef 69 assert(f);
cec736d2 70
feb12d3e 71#ifdef HAVE_GCRYPT
b0af6f41 72 /* Write the final tag */
c586dbf1 73 if (f->seal && f->writable)
b0af6f41 74 journal_file_append_tag(f);
feb12d3e 75#endif
b0af6f41 76
7560fffc 77 /* Sync everything to disk, before we mark the file offline */
16e9f408
LP
78 if (f->mmap && f->fd >= 0)
79 mmap_cache_close_fd(f->mmap, f->fd);
7560fffc
LP
80
81 if (f->writable && f->fd >= 0)
82 fdatasync(f->fd);
83
d384c7a8 84 if (f->header) {
cd96b3b8
LP
85 /* Mark the file offline. Don't override the archived state if it already is set */
86 if (f->writable && f->header->state == STATE_ONLINE)
d384c7a8 87 f->header->state = STATE_OFFLINE;
cec736d2 88
d384c7a8
MS
89 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
90 }
cec736d2 91
0ac38b70
LP
92 if (f->fd >= 0)
93 close_nointr_nofail(f->fd);
94
cec736d2 95 free(f->path);
807e17f0 96
16e9f408
LP
97 if (f->mmap)
98 mmap_cache_unref(f->mmap);
99
807e17f0
LP
100#ifdef HAVE_XZ
101 free(f->compress_buffer);
102#endif
103
7560fffc 104#ifdef HAVE_GCRYPT
baed47c3
LP
105 if (f->fss_file)
106 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
b7c9ae91
LP
107 else if (f->fsprg_state)
108 free(f->fsprg_state);
109
110 free(f->fsprg_seed);
7560fffc
LP
111
112 if (f->hmac)
113 gcry_md_close(f->hmac);
114#endif
115
cec736d2
LP
116 free(f);
117}
118
0ac38b70 119static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
120 Header h;
121 ssize_t k;
122 int r;
123
124 assert(f);
125
126 zero(h);
7560fffc 127 memcpy(h.signature, HEADER_SIGNATURE, 8);
23b0b2b2 128 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2 129
7560fffc
LP
130 h.incompatible_flags =
131 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
132
133 h.compatible_flags =
baed47c3 134 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
7560fffc 135
cec736d2
LP
136 r = sd_id128_randomize(&h.file_id);
137 if (r < 0)
138 return r;
139
0ac38b70
LP
140 if (template) {
141 h.seqnum_id = template->header->seqnum_id;
beec0085 142 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
0ac38b70
LP
143 } else
144 h.seqnum_id = h.file_id;
cec736d2
LP
145
146 k = pwrite(f->fd, &h, sizeof(h), 0);
147 if (k < 0)
148 return -errno;
149
150 if (k != sizeof(h))
151 return -EIO;
152
153 return 0;
154}
155
156static int journal_file_refresh_header(JournalFile *f) {
157 int r;
de190aef 158 sd_id128_t boot_id;
cec736d2
LP
159
160 assert(f);
161
162 r = sd_id128_get_machine(&f->header->machine_id);
163 if (r < 0)
164 return r;
165
de190aef 166 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
167 if (r < 0)
168 return r;
169
de190aef
LP
170 if (sd_id128_equal(boot_id, f->header->boot_id))
171 f->tail_entry_monotonic_valid = true;
172
173 f->header->boot_id = boot_id;
174
175 f->header->state = STATE_ONLINE;
b788cc23 176
7560fffc
LP
177 /* Sync the online state to disk */
178 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
179 fdatasync(f->fd);
b788cc23 180
cec736d2
LP
181 return 0;
182}
183
184static int journal_file_verify_header(JournalFile *f) {
185 assert(f);
186
7560fffc 187 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
cec736d2
LP
188 return -EBADMSG;
189
7560fffc
LP
190 /* In both read and write mode we refuse to open files with
191 * incompatible flags we don't know */
807e17f0 192#ifdef HAVE_XZ
7560fffc 193 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
807e17f0
LP
194 return -EPROTONOSUPPORT;
195#else
cec736d2
LP
196 if (f->header->incompatible_flags != 0)
197 return -EPROTONOSUPPORT;
807e17f0 198#endif
cec736d2 199
7560fffc
LP
200 /* When open for writing we refuse to open files with
201 * compatible flags, too */
202 if (f->writable) {
203#ifdef HAVE_GCRYPT
baed47c3 204 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
7560fffc
LP
205 return -EPROTONOSUPPORT;
206#else
207 if (f->header->compatible_flags != 0)
208 return -EPROTONOSUPPORT;
209#endif
210 }
211
db11ac1a
LP
212 if (f->header->state >= _STATE_MAX)
213 return -EBADMSG;
214
dca6219e
LP
215 /* The first addition was n_data, so check that we are at least this large */
216 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
23b0b2b2
LP
217 return -EBADMSG;
218
8088cbd3 219 if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
beec0085
LP
220 return -EBADMSG;
221
db11ac1a
LP
222 if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
223 return -ENODATA;
224
225 if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
226 return -ENODATA;
227
7762e02b
LP
228 if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
229 !VALID64(le64toh(f->header->field_hash_table_offset)) ||
230 !VALID64(le64toh(f->header->tail_object_offset)) ||
231 !VALID64(le64toh(f->header->entry_array_offset)))
232 return -ENODATA;
233
234 if (le64toh(f->header->data_hash_table_offset) < le64toh(f->header->header_size) ||
235 le64toh(f->header->field_hash_table_offset) < le64toh(f->header->header_size) ||
236 le64toh(f->header->tail_object_offset) < le64toh(f->header->header_size) ||
237 le64toh(f->header->entry_array_offset) < le64toh(f->header->header_size))
cec736d2
LP
238 return -ENODATA;
239
240 if (f->writable) {
ccdbaf91 241 uint8_t state;
cec736d2
LP
242 sd_id128_t machine_id;
243 int r;
244
245 r = sd_id128_get_machine(&machine_id);
246 if (r < 0)
247 return r;
248
249 if (!sd_id128_equal(machine_id, f->header->machine_id))
250 return -EHOSTDOWN;
251
de190aef 252 state = f->header->state;
cec736d2 253
71fa6f00
LP
254 if (state == STATE_ONLINE) {
255 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
256 return -EBUSY;
257 } else if (state == STATE_ARCHIVED)
cec736d2 258 return -ESHUTDOWN;
71fa6f00
LP
259 else if (state != STATE_OFFLINE) {
260 log_debug("Journal file %s has unknown state %u.", f->path, state);
261 return -EBUSY;
262 }
cec736d2
LP
263 }
264
8088cbd3 265 f->compress = JOURNAL_HEADER_COMPRESSED(f->header);
c586dbf1 266
f1889c91 267 f->seal = JOURNAL_HEADER_SEALED(f->header);
7560fffc 268
cec736d2
LP
269 return 0;
270}
271
272static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
cec736d2 273 uint64_t old_size, new_size;
fec2aa2f 274 int r;
cec736d2
LP
275
276 assert(f);
277
cec736d2 278 /* We assume that this file is not sparse, and we know that
38ac38b2 279 * for sure, since we always call posix_fallocate()
cec736d2
LP
280 * ourselves */
281
282 old_size =
23b0b2b2 283 le64toh(f->header->header_size) +
cec736d2
LP
284 le64toh(f->header->arena_size);
285
bc85bfee 286 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
287 if (new_size < le64toh(f->header->header_size))
288 new_size = le64toh(f->header->header_size);
bc85bfee
LP
289
290 if (new_size <= old_size)
cec736d2
LP
291 return 0;
292
bc85bfee
LP
293 if (f->metrics.max_size > 0 &&
294 new_size > f->metrics.max_size)
295 return -E2BIG;
cec736d2 296
bc85bfee
LP
297 if (new_size > f->metrics.min_size &&
298 f->metrics.keep_free > 0) {
cec736d2
LP
299 struct statvfs svfs;
300
301 if (fstatvfs(f->fd, &svfs) >= 0) {
302 uint64_t available;
303
304 available = svfs.f_bfree * svfs.f_bsize;
305
bc85bfee
LP
306 if (available >= f->metrics.keep_free)
307 available -= f->metrics.keep_free;
cec736d2
LP
308 else
309 available = 0;
310
311 if (new_size - old_size > available)
312 return -E2BIG;
313 }
314 }
315
bc85bfee
LP
316 /* Note that the glibc fallocate() fallback is very
317 inefficient, hence we try to minimize the allocation area
318 as we can. */
fec2aa2f
GV
319 r = posix_fallocate(f->fd, old_size, new_size - old_size);
320 if (r != 0)
321 return -r;
cec736d2
LP
322
323 if (fstat(f->fd, &f->last_stat) < 0)
324 return -errno;
325
23b0b2b2 326 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
327
328 return 0;
329}
330
fcde2389 331static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
cec736d2 332 assert(f);
cec736d2
LP
333 assert(ret);
334
7762e02b
LP
335 if (size <= 0)
336 return -EINVAL;
337
2a59ea54 338 /* Avoid SIGBUS on invalid accesses */
4bbdcdb3
LP
339 if (offset + size > (uint64_t) f->last_stat.st_size) {
340 /* Hmm, out of range? Let's refresh the fstat() data
341 * first, before we trust that check. */
342
343 if (fstat(f->fd, &f->last_stat) < 0 ||
344 offset + size > (uint64_t) f->last_stat.st_size)
345 return -EADDRNOTAVAIL;
346 }
347
fcde2389 348 return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
cec736d2
LP
349}
350
16e9f408
LP
351static uint64_t minimum_header_size(Object *o) {
352
353 static uint64_t table[] = {
354 [OBJECT_DATA] = sizeof(DataObject),
355 [OBJECT_FIELD] = sizeof(FieldObject),
356 [OBJECT_ENTRY] = sizeof(EntryObject),
357 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
358 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
359 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
360 [OBJECT_TAG] = sizeof(TagObject),
361 };
362
363 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
364 return sizeof(ObjectHeader);
365
366 return table[o->object.type];
367}
368
de190aef 369int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
370 int r;
371 void *t;
372 Object *o;
373 uint64_t s;
16e9f408 374 unsigned context;
cec736d2
LP
375
376 assert(f);
377 assert(ret);
378
db11ac1a
LP
379 /* Objects may only be located at multiple of 64 bit */
380 if (!VALID64(offset))
381 return -EFAULT;
382
16e9f408
LP
383 /* One context for each type, plus one catch-all for the rest */
384 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
385
fcde2389 386 r = journal_file_move_to(f, context, false, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
387 if (r < 0)
388 return r;
389
390 o = (Object*) t;
391 s = le64toh(o->object.size);
392
393 if (s < sizeof(ObjectHeader))
394 return -EBADMSG;
395
16e9f408
LP
396 if (o->object.type <= OBJECT_UNUSED)
397 return -EBADMSG;
398
399 if (s < minimum_header_size(o))
400 return -EBADMSG;
401
3c1668da 402 if (type > 0 && o->object.type != type)
cec736d2
LP
403 return -EBADMSG;
404
405 if (s > sizeof(ObjectHeader)) {
fcde2389 406 r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
cec736d2
LP
407 if (r < 0)
408 return r;
409
410 o = (Object*) t;
411 }
412
cec736d2
LP
413 *ret = o;
414 return 0;
415}
416
d98cc1f2 417static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
418 uint64_t r;
419
420 assert(f);
421
beec0085 422 r = le64toh(f->header->tail_entry_seqnum) + 1;
c2373f84
LP
423
424 if (seqnum) {
de190aef 425 /* If an external seqnum counter was passed, we update
c2373f84
LP
426 * both the local and the external one, and set it to
427 * the maximum of both */
428
429 if (*seqnum + 1 > r)
430 r = *seqnum + 1;
431
432 *seqnum = r;
433 }
434
beec0085 435 f->header->tail_entry_seqnum = htole64(r);
cec736d2 436
beec0085
LP
437 if (f->header->head_entry_seqnum == 0)
438 f->header->head_entry_seqnum = htole64(r);
de190aef 439
cec736d2
LP
440 return r;
441}
442
0284adc6 443int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
444 int r;
445 uint64_t p;
446 Object *tail, *o;
447 void *t;
448
449 assert(f);
16e9f408 450 assert(type > 0 && type < _OBJECT_TYPE_MAX);
cec736d2
LP
451 assert(size >= sizeof(ObjectHeader));
452 assert(offset);
453 assert(ret);
454
455 p = le64toh(f->header->tail_object_offset);
cec736d2 456 if (p == 0)
23b0b2b2 457 p = le64toh(f->header->header_size);
cec736d2 458 else {
de190aef 459 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
460 if (r < 0)
461 return r;
462
463 p += ALIGN64(le64toh(tail->object.size));
464 }
465
466 r = journal_file_allocate(f, p, size);
467 if (r < 0)
468 return r;
469
fcde2389 470 r = journal_file_move_to(f, type, false, p, size, &t);
cec736d2
LP
471 if (r < 0)
472 return r;
473
474 o = (Object*) t;
475
476 zero(o->object);
de190aef 477 o->object.type = type;
cec736d2
LP
478 o->object.size = htole64(size);
479
480 f->header->tail_object_offset = htole64(p);
cec736d2
LP
481 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
482
483 *ret = o;
484 *offset = p;
485
486 return 0;
487}
488
de190aef 489static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
490 uint64_t s, p;
491 Object *o;
492 int r;
493
494 assert(f);
495
dfabe643 496 /* We estimate that we need 1 hash table entry per 768 of
4a92baf3
LP
497 journal file and we want to make sure we never get beyond
498 75% fill level. Calculate the hash table size for the
499 maximum file size based on these metrics. */
500
dfabe643 501 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
4a92baf3
LP
502 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
503 s = DEFAULT_DATA_HASH_TABLE_SIZE;
504
2b43f939 505 log_debug("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
4a92baf3 506
de190aef
LP
507 r = journal_file_append_object(f,
508 OBJECT_DATA_HASH_TABLE,
509 offsetof(Object, hash_table.items) + s,
510 &o, &p);
cec736d2
LP
511 if (r < 0)
512 return r;
513
de190aef 514 memset(o->hash_table.items, 0, s);
cec736d2 515
de190aef
LP
516 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
517 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
518
519 return 0;
520}
521
de190aef 522static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
523 uint64_t s, p;
524 Object *o;
525 int r;
526
527 assert(f);
528
3c1668da
LP
529 /* We use a fixed size hash table for the fields as this
530 * number should grow very slowly only */
531
de190aef
LP
532 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
533 r = journal_file_append_object(f,
534 OBJECT_FIELD_HASH_TABLE,
535 offsetof(Object, hash_table.items) + s,
536 &o, &p);
cec736d2
LP
537 if (r < 0)
538 return r;
539
de190aef 540 memset(o->hash_table.items, 0, s);
cec736d2 541
de190aef
LP
542 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
543 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
544
545 return 0;
546}
547
de190aef 548static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
549 uint64_t s, p;
550 void *t;
551 int r;
552
553 assert(f);
554
de190aef
LP
555 p = le64toh(f->header->data_hash_table_offset);
556 s = le64toh(f->header->data_hash_table_size);
cec736d2 557
de190aef 558 r = journal_file_move_to(f,
16e9f408 559 OBJECT_DATA_HASH_TABLE,
fcde2389 560 true,
de190aef
LP
561 p, s,
562 &t);
cec736d2
LP
563 if (r < 0)
564 return r;
565
de190aef 566 f->data_hash_table = t;
cec736d2
LP
567 return 0;
568}
569
de190aef 570static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
571 uint64_t s, p;
572 void *t;
573 int r;
574
575 assert(f);
576
de190aef
LP
577 p = le64toh(f->header->field_hash_table_offset);
578 s = le64toh(f->header->field_hash_table_size);
cec736d2 579
de190aef 580 r = journal_file_move_to(f,
16e9f408 581 OBJECT_FIELD_HASH_TABLE,
fcde2389 582 true,
de190aef
LP
583 p, s,
584 &t);
cec736d2
LP
585 if (r < 0)
586 return r;
587
de190aef 588 f->field_hash_table = t;
cec736d2
LP
589 return 0;
590}
591
3c1668da
LP
592static int journal_file_link_field(
593 JournalFile *f,
594 Object *o,
595 uint64_t offset,
596 uint64_t hash) {
597
598 uint64_t p, h;
599 int r;
600
601 assert(f);
602 assert(o);
603 assert(offset > 0);
604
605 if (o->object.type != OBJECT_FIELD)
606 return -EINVAL;
607
608 /* This might alter the window we are looking at */
609
610 o->field.next_hash_offset = o->field.head_data_offset = 0;
611
612 h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
613 p = le64toh(f->field_hash_table[h].tail_hash_offset);
614 if (p == 0)
615 f->field_hash_table[h].head_hash_offset = htole64(offset);
616 else {
617 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
618 if (r < 0)
619 return r;
620
621 o->field.next_hash_offset = htole64(offset);
622 }
623
624 f->field_hash_table[h].tail_hash_offset = htole64(offset);
625
626 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
627 f->header->n_fields = htole64(le64toh(f->header->n_fields) + 1);
628
629 return 0;
630}
631
632static int journal_file_link_data(
633 JournalFile *f,
634 Object *o,
635 uint64_t offset,
636 uint64_t hash) {
637
de190aef 638 uint64_t p, h;
cec736d2
LP
639 int r;
640
641 assert(f);
642 assert(o);
643 assert(offset > 0);
b588975f
LP
644
645 if (o->object.type != OBJECT_DATA)
646 return -EINVAL;
cec736d2 647
48496df6
LP
648 /* This might alter the window we are looking at */
649
de190aef
LP
650 o->data.next_hash_offset = o->data.next_field_offset = 0;
651 o->data.entry_offset = o->data.entry_array_offset = 0;
652 o->data.n_entries = 0;
cec736d2 653
de190aef 654 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
8db4213e 655 p = le64toh(f->data_hash_table[h].tail_hash_offset);
3c1668da 656 if (p == 0)
cec736d2 657 /* Only entry in the hash table is easy */
de190aef 658 f->data_hash_table[h].head_hash_offset = htole64(offset);
3c1668da 659 else {
48496df6
LP
660 /* Move back to the previous data object, to patch in
661 * pointer */
cec736d2 662
de190aef 663 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
664 if (r < 0)
665 return r;
666
de190aef 667 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
668 }
669
de190aef 670 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2 671
dca6219e
LP
672 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
673 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
674
cec736d2
LP
675 return 0;
676}
677
3c1668da
LP
678int journal_file_find_field_object_with_hash(
679 JournalFile *f,
680 const void *field, uint64_t size, uint64_t hash,
681 Object **ret, uint64_t *offset) {
682
683 uint64_t p, osize, h;
684 int r;
685
686 assert(f);
687 assert(field && size > 0);
688
689 osize = offsetof(Object, field.payload) + size;
690
691 if (f->header->field_hash_table_size == 0)
692 return -EBADMSG;
693
694 h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
695 p = le64toh(f->field_hash_table[h].head_hash_offset);
696
697 while (p > 0) {
698 Object *o;
699
700 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
701 if (r < 0)
702 return r;
703
704 if (le64toh(o->field.hash) == hash &&
705 le64toh(o->object.size) == osize &&
706 memcmp(o->field.payload, field, size) == 0) {
707
708 if (ret)
709 *ret = o;
710 if (offset)
711 *offset = p;
712
713 return 1;
714 }
715
716 p = le64toh(o->field.next_hash_offset);
717 }
718
719 return 0;
720}
721
722int journal_file_find_field_object(
723 JournalFile *f,
724 const void *field, uint64_t size,
725 Object **ret, uint64_t *offset) {
726
727 uint64_t hash;
728
729 assert(f);
730 assert(field && size > 0);
731
732 hash = hash64(field, size);
733
734 return journal_file_find_field_object_with_hash(f,
735 field, size, hash,
736 ret, offset);
737}
738
de190aef
LP
739int journal_file_find_data_object_with_hash(
740 JournalFile *f,
741 const void *data, uint64_t size, uint64_t hash,
742 Object **ret, uint64_t *offset) {
48496df6 743
de190aef 744 uint64_t p, osize, h;
cec736d2
LP
745 int r;
746
747 assert(f);
748 assert(data || size == 0);
749
750 osize = offsetof(Object, data.payload) + size;
751
bc85bfee
LP
752 if (f->header->data_hash_table_size == 0)
753 return -EBADMSG;
754
de190aef
LP
755 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
756 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 757
de190aef
LP
758 while (p > 0) {
759 Object *o;
cec736d2 760
de190aef 761 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
762 if (r < 0)
763 return r;
764
807e17f0 765 if (le64toh(o->data.hash) != hash)
85a131e8 766 goto next;
807e17f0
LP
767
768 if (o->object.flags & OBJECT_COMPRESSED) {
769#ifdef HAVE_XZ
b785c858 770 uint64_t l, rsize;
cec736d2 771
807e17f0
LP
772 l = le64toh(o->object.size);
773 if (l <= offsetof(Object, data.payload))
cec736d2
LP
774 return -EBADMSG;
775
807e17f0
LP
776 l -= offsetof(Object, data.payload);
777
778 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
779 return -EBADMSG;
780
b785c858 781 if (rsize == size &&
807e17f0
LP
782 memcmp(f->compress_buffer, data, size) == 0) {
783
784 if (ret)
785 *ret = o;
786
787 if (offset)
788 *offset = p;
789
790 return 1;
791 }
792#else
793 return -EPROTONOSUPPORT;
794#endif
795
796 } else if (le64toh(o->object.size) == osize &&
797 memcmp(o->data.payload, data, size) == 0) {
798
cec736d2
LP
799 if (ret)
800 *ret = o;
801
802 if (offset)
803 *offset = p;
804
de190aef 805 return 1;
cec736d2
LP
806 }
807
85a131e8 808 next:
cec736d2
LP
809 p = le64toh(o->data.next_hash_offset);
810 }
811
de190aef
LP
812 return 0;
813}
814
815int journal_file_find_data_object(
816 JournalFile *f,
817 const void *data, uint64_t size,
818 Object **ret, uint64_t *offset) {
819
820 uint64_t hash;
821
822 assert(f);
823 assert(data || size == 0);
824
825 hash = hash64(data, size);
826
827 return journal_file_find_data_object_with_hash(f,
828 data, size, hash,
829 ret, offset);
830}
831
3c1668da
LP
832static int journal_file_append_field(
833 JournalFile *f,
834 const void *field, uint64_t size,
835 Object **ret, uint64_t *offset) {
836
837 uint64_t hash, p;
838 uint64_t osize;
839 Object *o;
840 int r;
841
842 assert(f);
843 assert(field && size > 0);
844
845 hash = hash64(field, size);
846
847 r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
848 if (r < 0)
849 return r;
850 else if (r > 0) {
851
852 if (ret)
853 *ret = o;
854
855 if (offset)
856 *offset = p;
857
858 return 0;
859 }
860
861 osize = offsetof(Object, field.payload) + size;
862 r = journal_file_append_object(f, OBJECT_FIELD, osize, &o, &p);
863
864 o->field.hash = htole64(hash);
865 memcpy(o->field.payload, field, size);
866
867 r = journal_file_link_field(f, o, p, hash);
868 if (r < 0)
869 return r;
870
871 /* The linking might have altered the window, so let's
872 * refresh our pointer */
873 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
874 if (r < 0)
875 return r;
876
877#ifdef HAVE_GCRYPT
878 r = journal_file_hmac_put_object(f, OBJECT_FIELD, o, p);
879 if (r < 0)
880 return r;
881#endif
882
883 if (ret)
884 *ret = o;
885
886 if (offset)
887 *offset = p;
888
889 return 0;
890}
891
48496df6
LP
892static int journal_file_append_data(
893 JournalFile *f,
894 const void *data, uint64_t size,
895 Object **ret, uint64_t *offset) {
896
de190aef
LP
897 uint64_t hash, p;
898 uint64_t osize;
899 Object *o;
900 int r;
807e17f0 901 bool compressed = false;
3c1668da 902 const void *eq;
de190aef
LP
903
904 assert(f);
905 assert(data || size == 0);
906
907 hash = hash64(data, size);
908
909 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
910 if (r < 0)
911 return r;
912 else if (r > 0) {
913
914 if (ret)
915 *ret = o;
916
917 if (offset)
918 *offset = p;
919
920 return 0;
921 }
922
923 osize = offsetof(Object, data.payload) + size;
924 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
925 if (r < 0)
926 return r;
927
cec736d2 928 o->data.hash = htole64(hash);
807e17f0
LP
929
930#ifdef HAVE_XZ
931 if (f->compress &&
932 size >= COMPRESSION_SIZE_THRESHOLD) {
933 uint64_t rsize;
934
935 compressed = compress_blob(data, size, o->data.payload, &rsize);
936
937 if (compressed) {
938 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
939 o->object.flags |= OBJECT_COMPRESSED;
940
807e17f0
LP
941 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
942 }
943 }
944#endif
945
64825d3c 946 if (!compressed && size > 0)
807e17f0 947 memcpy(o->data.payload, data, size);
cec736d2 948
de190aef 949 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
950 if (r < 0)
951 return r;
952
48496df6
LP
953 /* The linking might have altered the window, so let's
954 * refresh our pointer */
955 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
956 if (r < 0)
957 return r;
958
3c1668da
LP
959 eq = memchr(data, '=', size);
960 if (eq && eq > data) {
961 uint64_t fp;
962 Object *fo;
963
964 /* Create field object ... */
965 r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp);
966 if (r < 0)
967 return r;
968
969 /* ... and link it in. */
970 o->data.next_field_offset = fo->field.head_data_offset;
971 fo->field.head_data_offset = le64toh(p);
972 }
973
5996c7c2
LP
974#ifdef HAVE_GCRYPT
975 r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
976 if (r < 0)
977 return r;
978#endif
979
cec736d2
LP
980 if (ret)
981 *ret = o;
982
983 if (offset)
de190aef 984 *offset = p;
cec736d2
LP
985
986 return 0;
987}
988
989uint64_t journal_file_entry_n_items(Object *o) {
990 assert(o);
b588975f
LP
991
992 if (o->object.type != OBJECT_ENTRY)
993 return 0;
cec736d2
LP
994
995 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
996}
997
0284adc6 998uint64_t journal_file_entry_array_n_items(Object *o) {
de190aef 999 assert(o);
b588975f
LP
1000
1001 if (o->object.type != OBJECT_ENTRY_ARRAY)
1002 return 0;
de190aef
LP
1003
1004 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
1005}
1006
fb9a24b6
LP
1007uint64_t journal_file_hash_table_n_items(Object *o) {
1008 assert(o);
b588975f
LP
1009
1010 if (o->object.type != OBJECT_DATA_HASH_TABLE &&
1011 o->object.type != OBJECT_FIELD_HASH_TABLE)
1012 return 0;
fb9a24b6
LP
1013
1014 return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
1015}
1016
de190aef 1017static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
1018 le64_t *first,
1019 le64_t *idx,
de190aef 1020 uint64_t p) {
cec736d2 1021 int r;
de190aef
LP
1022 uint64_t n = 0, ap = 0, q, i, a, hidx;
1023 Object *o;
1024
cec736d2 1025 assert(f);
de190aef
LP
1026 assert(first);
1027 assert(idx);
1028 assert(p > 0);
cec736d2 1029
de190aef
LP
1030 a = le64toh(*first);
1031 i = hidx = le64toh(*idx);
1032 while (a > 0) {
1033
1034 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1035 if (r < 0)
1036 return r;
cec736d2 1037
de190aef
LP
1038 n = journal_file_entry_array_n_items(o);
1039 if (i < n) {
1040 o->entry_array.items[i] = htole64(p);
1041 *idx = htole64(hidx + 1);
1042 return 0;
1043 }
cec736d2 1044
de190aef
LP
1045 i -= n;
1046 ap = a;
1047 a = le64toh(o->entry_array.next_entry_array_offset);
1048 }
1049
1050 if (hidx > n)
1051 n = (hidx+1) * 2;
1052 else
1053 n = n * 2;
1054
1055 if (n < 4)
1056 n = 4;
1057
1058 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
1059 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
1060 &o, &q);
cec736d2
LP
1061 if (r < 0)
1062 return r;
1063
feb12d3e 1064#ifdef HAVE_GCRYPT
5996c7c2 1065 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
b0af6f41
LP
1066 if (r < 0)
1067 return r;
feb12d3e 1068#endif
b0af6f41 1069
de190aef 1070 o->entry_array.items[i] = htole64(p);
cec736d2 1071
de190aef 1072 if (ap == 0)
7be3aa17 1073 *first = htole64(q);
cec736d2 1074 else {
de190aef 1075 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
1076 if (r < 0)
1077 return r;
1078
de190aef
LP
1079 o->entry_array.next_entry_array_offset = htole64(q);
1080 }
cec736d2 1081
2dee23eb
LP
1082 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1083 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
1084
de190aef
LP
1085 *idx = htole64(hidx + 1);
1086
1087 return 0;
1088}
cec736d2 1089
de190aef 1090static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
1091 le64_t *extra,
1092 le64_t *first,
1093 le64_t *idx,
de190aef
LP
1094 uint64_t p) {
1095
1096 int r;
1097
1098 assert(f);
1099 assert(extra);
1100 assert(first);
1101 assert(idx);
1102 assert(p > 0);
1103
1104 if (*idx == 0)
1105 *extra = htole64(p);
1106 else {
4fd052ae 1107 le64_t i;
de190aef 1108
7be3aa17 1109 i = htole64(le64toh(*idx) - 1);
de190aef
LP
1110 r = link_entry_into_array(f, first, &i, p);
1111 if (r < 0)
1112 return r;
cec736d2
LP
1113 }
1114
de190aef
LP
1115 *idx = htole64(le64toh(*idx) + 1);
1116 return 0;
1117}
1118
1119static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
1120 uint64_t p;
1121 int r;
1122 assert(f);
1123 assert(o);
1124 assert(offset > 0);
1125
1126 p = le64toh(o->entry.items[i].object_offset);
1127 if (p == 0)
1128 return -EINVAL;
1129
1130 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
1131 if (r < 0)
1132 return r;
1133
de190aef
LP
1134 return link_entry_into_array_plus_one(f,
1135 &o->data.entry_offset,
1136 &o->data.entry_array_offset,
1137 &o->data.n_entries,
1138 offset);
cec736d2
LP
1139}
1140
1141static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 1142 uint64_t n, i;
cec736d2
LP
1143 int r;
1144
1145 assert(f);
1146 assert(o);
1147 assert(offset > 0);
b588975f
LP
1148
1149 if (o->object.type != OBJECT_ENTRY)
1150 return -EINVAL;
cec736d2 1151
b788cc23
LP
1152 __sync_synchronize();
1153
cec736d2 1154 /* Link up the entry itself */
de190aef
LP
1155 r = link_entry_into_array(f,
1156 &f->header->entry_array_offset,
1157 &f->header->n_entries,
1158 offset);
1159 if (r < 0)
1160 return r;
cec736d2 1161
aaf53376 1162 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
cec736d2 1163
de190aef 1164 if (f->header->head_entry_realtime == 0)
0ac38b70 1165 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 1166
0ac38b70 1167 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
1168 f->header->tail_entry_monotonic = o->entry.monotonic;
1169
1170 f->tail_entry_monotonic_valid = true;
cec736d2
LP
1171
1172 /* Link up the items */
1173 n = journal_file_entry_n_items(o);
1174 for (i = 0; i < n; i++) {
1175 r = journal_file_link_entry_item(f, o, offset, i);
1176 if (r < 0)
1177 return r;
1178 }
1179
cec736d2
LP
1180 return 0;
1181}
1182
1183static int journal_file_append_entry_internal(
1184 JournalFile *f,
1185 const dual_timestamp *ts,
1186 uint64_t xor_hash,
1187 const EntryItem items[], unsigned n_items,
de190aef 1188 uint64_t *seqnum,
cec736d2
LP
1189 Object **ret, uint64_t *offset) {
1190 uint64_t np;
1191 uint64_t osize;
1192 Object *o;
1193 int r;
1194
1195 assert(f);
1196 assert(items || n_items == 0);
de190aef 1197 assert(ts);
cec736d2
LP
1198
1199 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1200
de190aef 1201 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
1202 if (r < 0)
1203 return r;
1204
d98cc1f2 1205 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
cec736d2 1206 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
1207 o->entry.realtime = htole64(ts->realtime);
1208 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
1209 o->entry.xor_hash = htole64(xor_hash);
1210 o->entry.boot_id = f->header->boot_id;
1211
feb12d3e 1212#ifdef HAVE_GCRYPT
5996c7c2 1213 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
b0af6f41
LP
1214 if (r < 0)
1215 return r;
feb12d3e 1216#endif
b0af6f41 1217
cec736d2
LP
1218 r = journal_file_link_entry(f, o, np);
1219 if (r < 0)
1220 return r;
1221
1222 if (ret)
1223 *ret = o;
1224
1225 if (offset)
1226 *offset = np;
1227
1228 return 0;
1229}
1230
cf244689 1231void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
1232 assert(f);
1233
1234 /* inotify() does not receive IN_MODIFY events from file
1235 * accesses done via mmap(). After each access we hence
1236 * trigger IN_MODIFY by truncating the journal file to its
1237 * current size which triggers IN_MODIFY. */
1238
bc85bfee
LP
1239 __sync_synchronize();
1240
50f20cfd 1241 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
c5315881 1242 log_error("Failed to truncate file to its own size: %m");
50f20cfd
LP
1243}
1244
1f2da9ec
LP
1245static int entry_item_cmp(const void *_a, const void *_b) {
1246 const EntryItem *a = _a, *b = _b;
1247
1248 if (le64toh(a->object_offset) < le64toh(b->object_offset))
1249 return -1;
1250 if (le64toh(a->object_offset) > le64toh(b->object_offset))
1251 return 1;
1252 return 0;
1253}
1254
de190aef 1255int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1256 unsigned i;
1257 EntryItem *items;
1258 int r;
1259 uint64_t xor_hash = 0;
de190aef 1260 struct dual_timestamp _ts;
cec736d2
LP
1261
1262 assert(f);
1263 assert(iovec || n_iovec == 0);
1264
de190aef
LP
1265 if (!f->writable)
1266 return -EPERM;
1267
1268 if (!ts) {
1269 dual_timestamp_get(&_ts);
1270 ts = &_ts;
1271 }
1272
1273 if (f->tail_entry_monotonic_valid &&
1274 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1275 return -EINVAL;
1276
feb12d3e 1277#ifdef HAVE_GCRYPT
7560fffc
LP
1278 r = journal_file_maybe_append_tag(f, ts->realtime);
1279 if (r < 0)
1280 return r;
feb12d3e 1281#endif
7560fffc 1282
64825d3c
LP
1283 /* alloca() can't take 0, hence let's allocate at least one */
1284 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
cec736d2
LP
1285
1286 for (i = 0; i < n_iovec; i++) {
1287 uint64_t p;
1288 Object *o;
1289
1290 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1291 if (r < 0)
cf244689 1292 return r;
cec736d2
LP
1293
1294 xor_hash ^= le64toh(o->data.hash);
1295 items[i].object_offset = htole64(p);
de7b95cd 1296 items[i].hash = o->data.hash;
cec736d2
LP
1297 }
1298
1f2da9ec
LP
1299 /* Order by the position on disk, in order to improve seek
1300 * times for rotating media. */
1301 qsort(items, n_iovec, sizeof(EntryItem), entry_item_cmp);
1302
de190aef 1303 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1304
50f20cfd
LP
1305 journal_file_post_change(f);
1306
cec736d2
LP
1307 return r;
1308}
1309
de190aef
LP
1310static int generic_array_get(JournalFile *f,
1311 uint64_t first,
1312 uint64_t i,
1313 Object **ret, uint64_t *offset) {
1314
cec736d2 1315 Object *o;
6c8a39b8 1316 uint64_t p = 0, a;
cec736d2
LP
1317 int r;
1318
1319 assert(f);
1320
de190aef
LP
1321 a = first;
1322 while (a > 0) {
1323 uint64_t n;
cec736d2 1324
de190aef
LP
1325 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1326 if (r < 0)
1327 return r;
cec736d2 1328
de190aef
LP
1329 n = journal_file_entry_array_n_items(o);
1330 if (i < n) {
1331 p = le64toh(o->entry_array.items[i]);
1332 break;
cec736d2
LP
1333 }
1334
de190aef
LP
1335 i -= n;
1336 a = le64toh(o->entry_array.next_entry_array_offset);
1337 }
1338
1339 if (a <= 0 || p <= 0)
1340 return 0;
1341
1342 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1343 if (r < 0)
1344 return r;
1345
1346 if (ret)
1347 *ret = o;
1348
1349 if (offset)
1350 *offset = p;
1351
1352 return 1;
1353}
1354
1355static int generic_array_get_plus_one(JournalFile *f,
1356 uint64_t extra,
1357 uint64_t first,
1358 uint64_t i,
1359 Object **ret, uint64_t *offset) {
1360
1361 Object *o;
1362
1363 assert(f);
1364
1365 if (i == 0) {
1366 int r;
1367
1368 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1369 if (r < 0)
1370 return r;
1371
de190aef
LP
1372 if (ret)
1373 *ret = o;
cec736d2 1374
de190aef
LP
1375 if (offset)
1376 *offset = extra;
cec736d2 1377
de190aef 1378 return 1;
cec736d2
LP
1379 }
1380
de190aef
LP
1381 return generic_array_get(f, first, i-1, ret, offset);
1382}
cec736d2 1383
de190aef
LP
1384enum {
1385 TEST_FOUND,
1386 TEST_LEFT,
1387 TEST_RIGHT
1388};
cec736d2 1389
de190aef
LP
1390static int generic_array_bisect(JournalFile *f,
1391 uint64_t first,
1392 uint64_t n,
1393 uint64_t needle,
1394 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1395 direction_t direction,
1396 Object **ret,
1397 uint64_t *offset,
1398 uint64_t *idx) {
1399
1400 uint64_t a, p, t = 0, i = 0, last_p = 0;
1401 bool subtract_one = false;
1402 Object *o, *array = NULL;
1403 int r;
cec736d2 1404
de190aef
LP
1405 assert(f);
1406 assert(test_object);
cec736d2 1407
de190aef
LP
1408 a = first;
1409 while (a > 0) {
1410 uint64_t left, right, k, lp;
1411
1412 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1413 if (r < 0)
1414 return r;
1415
de190aef
LP
1416 k = journal_file_entry_array_n_items(array);
1417 right = MIN(k, n);
1418 if (right <= 0)
1419 return 0;
cec736d2 1420
de190aef
LP
1421 i = right - 1;
1422 lp = p = le64toh(array->entry_array.items[i]);
1423 if (p <= 0)
1424 return -EBADMSG;
cec736d2 1425
de190aef
LP
1426 r = test_object(f, p, needle);
1427 if (r < 0)
1428 return r;
cec736d2 1429
de190aef
LP
1430 if (r == TEST_FOUND)
1431 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1432
1433 if (r == TEST_RIGHT) {
1434 left = 0;
1435 right -= 1;
1436 for (;;) {
1437 if (left == right) {
1438 if (direction == DIRECTION_UP)
1439 subtract_one = true;
1440
1441 i = left;
1442 goto found;
1443 }
1444
1445 assert(left < right);
1446
1447 i = (left + right) / 2;
1448 p = le64toh(array->entry_array.items[i]);
1449 if (p <= 0)
1450 return -EBADMSG;
1451
1452 r = test_object(f, p, needle);
1453 if (r < 0)
1454 return r;
cec736d2 1455
de190aef
LP
1456 if (r == TEST_FOUND)
1457 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1458
1459 if (r == TEST_RIGHT)
1460 right = i;
1461 else
1462 left = i + 1;
1463 }
1464 }
1465
cbdca852
LP
1466 if (k > n) {
1467 if (direction == DIRECTION_UP) {
1468 i = n;
1469 subtract_one = true;
1470 goto found;
1471 }
1472
cec736d2 1473 return 0;
cbdca852 1474 }
cec736d2 1475
de190aef
LP
1476 last_p = lp;
1477
1478 n -= k;
1479 t += k;
1480 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1481 }
1482
1483 return 0;
de190aef
LP
1484
1485found:
1486 if (subtract_one && t == 0 && i == 0)
1487 return 0;
1488
1489 if (subtract_one && i == 0)
1490 p = last_p;
1491 else if (subtract_one)
1492 p = le64toh(array->entry_array.items[i-1]);
1493 else
1494 p = le64toh(array->entry_array.items[i]);
1495
1496 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1497 if (r < 0)
1498 return r;
1499
1500 if (ret)
1501 *ret = o;
1502
1503 if (offset)
1504 *offset = p;
1505
1506 if (idx)
cbdca852 1507 *idx = t + i + (subtract_one ? -1 : 0);
de190aef
LP
1508
1509 return 1;
cec736d2
LP
1510}
1511
de190aef
LP
1512static int generic_array_bisect_plus_one(JournalFile *f,
1513 uint64_t extra,
1514 uint64_t first,
1515 uint64_t n,
1516 uint64_t needle,
1517 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1518 direction_t direction,
1519 Object **ret,
1520 uint64_t *offset,
1521 uint64_t *idx) {
1522
cec736d2 1523 int r;
cbdca852
LP
1524 bool step_back = false;
1525 Object *o;
cec736d2
LP
1526
1527 assert(f);
de190aef 1528 assert(test_object);
cec736d2 1529
de190aef
LP
1530 if (n <= 0)
1531 return 0;
cec736d2 1532
de190aef
LP
1533 /* This bisects the array in object 'first', but first checks
1534 * an extra */
de190aef
LP
1535 r = test_object(f, extra, needle);
1536 if (r < 0)
1537 return r;
a536e261
LP
1538
1539 if (r == TEST_FOUND)
1540 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1541
cbdca852
LP
1542 /* if we are looking with DIRECTION_UP then we need to first
1543 see if in the actual array there is a matching entry, and
1544 return the last one of that. But if there isn't any we need
1545 to return this one. Hence remember this, and return it
1546 below. */
1547 if (r == TEST_LEFT)
1548 step_back = direction == DIRECTION_UP;
de190aef 1549
cbdca852
LP
1550 if (r == TEST_RIGHT) {
1551 if (direction == DIRECTION_DOWN)
1552 goto found;
1553 else
1554 return 0;
a536e261 1555 }
cec736d2 1556
de190aef
LP
1557 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1558
cbdca852
LP
1559 if (r == 0 && step_back)
1560 goto found;
1561
ecf68b1d 1562 if (r > 0 && idx)
de190aef
LP
1563 (*idx) ++;
1564
1565 return r;
cbdca852
LP
1566
1567found:
1568 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1569 if (r < 0)
1570 return r;
1571
1572 if (ret)
1573 *ret = o;
1574
1575 if (offset)
1576 *offset = extra;
1577
1578 if (idx)
1579 *idx = 0;
1580
1581 return 1;
1582}
1583
1584static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1585 assert(f);
1586 assert(p > 0);
1587
1588 if (p == needle)
1589 return TEST_FOUND;
1590 else if (p < needle)
1591 return TEST_LEFT;
1592 else
1593 return TEST_RIGHT;
1594}
1595
1596int journal_file_move_to_entry_by_offset(
1597 JournalFile *f,
1598 uint64_t p,
1599 direction_t direction,
1600 Object **ret,
1601 uint64_t *offset) {
1602
1603 return generic_array_bisect(f,
1604 le64toh(f->header->entry_array_offset),
1605 le64toh(f->header->n_entries),
1606 p,
1607 test_object_offset,
1608 direction,
1609 ret, offset, NULL);
de190aef
LP
1610}
1611
cbdca852 1612
de190aef
LP
1613static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1614 Object *o;
1615 int r;
1616
1617 assert(f);
1618 assert(p > 0);
1619
1620 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1621 if (r < 0)
1622 return r;
1623
de190aef
LP
1624 if (le64toh(o->entry.seqnum) == needle)
1625 return TEST_FOUND;
1626 else if (le64toh(o->entry.seqnum) < needle)
1627 return TEST_LEFT;
1628 else
1629 return TEST_RIGHT;
1630}
cec736d2 1631
de190aef
LP
1632int journal_file_move_to_entry_by_seqnum(
1633 JournalFile *f,
1634 uint64_t seqnum,
1635 direction_t direction,
1636 Object **ret,
1637 uint64_t *offset) {
1638
1639 return generic_array_bisect(f,
1640 le64toh(f->header->entry_array_offset),
1641 le64toh(f->header->n_entries),
1642 seqnum,
1643 test_object_seqnum,
1644 direction,
1645 ret, offset, NULL);
1646}
cec736d2 1647
de190aef
LP
1648static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1649 Object *o;
1650 int r;
1651
1652 assert(f);
1653 assert(p > 0);
1654
1655 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1656 if (r < 0)
1657 return r;
1658
1659 if (le64toh(o->entry.realtime) == needle)
1660 return TEST_FOUND;
1661 else if (le64toh(o->entry.realtime) < needle)
1662 return TEST_LEFT;
1663 else
1664 return TEST_RIGHT;
cec736d2
LP
1665}
1666
de190aef
LP
1667int journal_file_move_to_entry_by_realtime(
1668 JournalFile *f,
1669 uint64_t realtime,
1670 direction_t direction,
1671 Object **ret,
1672 uint64_t *offset) {
1673
1674 return generic_array_bisect(f,
1675 le64toh(f->header->entry_array_offset),
1676 le64toh(f->header->n_entries),
1677 realtime,
1678 test_object_realtime,
1679 direction,
1680 ret, offset, NULL);
1681}
1682
1683static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1684 Object *o;
1685 int r;
1686
1687 assert(f);
1688 assert(p > 0);
1689
1690 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1691 if (r < 0)
1692 return r;
1693
1694 if (le64toh(o->entry.monotonic) == needle)
1695 return TEST_FOUND;
1696 else if (le64toh(o->entry.monotonic) < needle)
1697 return TEST_LEFT;
1698 else
1699 return TEST_RIGHT;
1700}
1701
1702int journal_file_move_to_entry_by_monotonic(
1703 JournalFile *f,
1704 sd_id128_t boot_id,
1705 uint64_t monotonic,
1706 direction_t direction,
1707 Object **ret,
1708 uint64_t *offset) {
1709
10b6f904 1710 char t[9+32+1] = "_BOOT_ID=";
de190aef
LP
1711 Object *o;
1712 int r;
1713
cbdca852 1714 assert(f);
de190aef 1715
cbdca852 1716 sd_id128_to_string(boot_id, t + 9);
de190aef
LP
1717 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1718 if (r < 0)
1719 return r;
cbdca852 1720 if (r == 0)
de190aef
LP
1721 return -ENOENT;
1722
1723 return generic_array_bisect_plus_one(f,
1724 le64toh(o->data.entry_offset),
1725 le64toh(o->data.entry_array_offset),
1726 le64toh(o->data.n_entries),
1727 monotonic,
1728 test_object_monotonic,
1729 direction,
1730 ret, offset, NULL);
1731}
1732
de190aef
LP
1733int journal_file_next_entry(
1734 JournalFile *f,
1735 Object *o, uint64_t p,
1736 direction_t direction,
1737 Object **ret, uint64_t *offset) {
1738
1739 uint64_t i, n;
cec736d2
LP
1740 int r;
1741
1742 assert(f);
de190aef
LP
1743 assert(p > 0 || !o);
1744
1745 n = le64toh(f->header->n_entries);
1746 if (n <= 0)
1747 return 0;
cec736d2
LP
1748
1749 if (!o)
de190aef 1750 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1751 else {
de190aef 1752 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1753 return -EINVAL;
1754
de190aef
LP
1755 r = generic_array_bisect(f,
1756 le64toh(f->header->entry_array_offset),
1757 le64toh(f->header->n_entries),
1758 p,
1759 test_object_offset,
1760 DIRECTION_DOWN,
1761 NULL, NULL,
1762 &i);
1763 if (r <= 0)
1764 return r;
1765
1766 if (direction == DIRECTION_DOWN) {
1767 if (i >= n - 1)
1768 return 0;
1769
1770 i++;
1771 } else {
1772 if (i <= 0)
1773 return 0;
1774
1775 i--;
1776 }
cec736d2
LP
1777 }
1778
de190aef
LP
1779 /* And jump to it */
1780 return generic_array_get(f,
1781 le64toh(f->header->entry_array_offset),
1782 i,
1783 ret, offset);
1784}
cec736d2 1785
de190aef
LP
1786int journal_file_skip_entry(
1787 JournalFile *f,
1788 Object *o, uint64_t p,
1789 int64_t skip,
1790 Object **ret, uint64_t *offset) {
1791
1792 uint64_t i, n;
1793 int r;
1794
1795 assert(f);
1796 assert(o);
1797 assert(p > 0);
1798
1799 if (o->object.type != OBJECT_ENTRY)
1800 return -EINVAL;
1801
1802 r = generic_array_bisect(f,
1803 le64toh(f->header->entry_array_offset),
1804 le64toh(f->header->n_entries),
1805 p,
1806 test_object_offset,
1807 DIRECTION_DOWN,
1808 NULL, NULL,
1809 &i);
1810 if (r <= 0)
cec736d2
LP
1811 return r;
1812
de190aef
LP
1813 /* Calculate new index */
1814 if (skip < 0) {
1815 if ((uint64_t) -skip >= i)
1816 i = 0;
1817 else
1818 i = i - (uint64_t) -skip;
1819 } else
1820 i += (uint64_t) skip;
cec736d2 1821
de190aef
LP
1822 n = le64toh(f->header->n_entries);
1823 if (n <= 0)
1824 return -EBADMSG;
cec736d2 1825
de190aef
LP
1826 if (i >= n)
1827 i = n-1;
1828
1829 return generic_array_get(f,
1830 le64toh(f->header->entry_array_offset),
1831 i,
1832 ret, offset);
cec736d2
LP
1833}
1834
de190aef
LP
1835int journal_file_next_entry_for_data(
1836 JournalFile *f,
1837 Object *o, uint64_t p,
1838 uint64_t data_offset,
1839 direction_t direction,
1840 Object **ret, uint64_t *offset) {
1841
1842 uint64_t n, i;
cec736d2 1843 int r;
de190aef 1844 Object *d;
cec736d2
LP
1845
1846 assert(f);
de190aef 1847 assert(p > 0 || !o);
cec736d2 1848
de190aef 1849 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 1850 if (r < 0)
de190aef 1851 return r;
cec736d2 1852
de190aef
LP
1853 n = le64toh(d->data.n_entries);
1854 if (n <= 0)
1855 return n;
cec736d2 1856
de190aef
LP
1857 if (!o)
1858 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1859 else {
1860 if (o->object.type != OBJECT_ENTRY)
1861 return -EINVAL;
cec736d2 1862
de190aef
LP
1863 r = generic_array_bisect_plus_one(f,
1864 le64toh(d->data.entry_offset),
1865 le64toh(d->data.entry_array_offset),
1866 le64toh(d->data.n_entries),
1867 p,
1868 test_object_offset,
1869 DIRECTION_DOWN,
1870 NULL, NULL,
1871 &i);
1872
1873 if (r <= 0)
cec736d2
LP
1874 return r;
1875
de190aef
LP
1876 if (direction == DIRECTION_DOWN) {
1877 if (i >= n - 1)
1878 return 0;
cec736d2 1879
de190aef
LP
1880 i++;
1881 } else {
1882 if (i <= 0)
1883 return 0;
cec736d2 1884
de190aef
LP
1885 i--;
1886 }
cec736d2 1887
de190aef 1888 }
cec736d2 1889
de190aef
LP
1890 return generic_array_get_plus_one(f,
1891 le64toh(d->data.entry_offset),
1892 le64toh(d->data.entry_array_offset),
1893 i,
1894 ret, offset);
1895}
cec736d2 1896
cbdca852
LP
1897int journal_file_move_to_entry_by_offset_for_data(
1898 JournalFile *f,
1899 uint64_t data_offset,
1900 uint64_t p,
1901 direction_t direction,
1902 Object **ret, uint64_t *offset) {
1903
1904 int r;
1905 Object *d;
1906
1907 assert(f);
1908
1909 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1910 if (r < 0)
1911 return r;
1912
1913 return generic_array_bisect_plus_one(f,
1914 le64toh(d->data.entry_offset),
1915 le64toh(d->data.entry_array_offset),
1916 le64toh(d->data.n_entries),
1917 p,
1918 test_object_offset,
1919 direction,
1920 ret, offset, NULL);
1921}
1922
1923int journal_file_move_to_entry_by_monotonic_for_data(
1924 JournalFile *f,
1925 uint64_t data_offset,
1926 sd_id128_t boot_id,
1927 uint64_t monotonic,
1928 direction_t direction,
1929 Object **ret, uint64_t *offset) {
1930
1931 char t[9+32+1] = "_BOOT_ID=";
1932 Object *o, *d;
1933 int r;
1934 uint64_t b, z;
1935
1936 assert(f);
1937
1938 /* First, seek by time */
1939 sd_id128_to_string(boot_id, t + 9);
1940 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1941 if (r < 0)
1942 return r;
1943 if (r == 0)
1944 return -ENOENT;
1945
1946 r = generic_array_bisect_plus_one(f,
1947 le64toh(o->data.entry_offset),
1948 le64toh(o->data.entry_array_offset),
1949 le64toh(o->data.n_entries),
1950 monotonic,
1951 test_object_monotonic,
1952 direction,
1953 NULL, &z, NULL);
1954 if (r <= 0)
1955 return r;
1956
1957 /* And now, continue seeking until we find an entry that
1958 * exists in both bisection arrays */
1959
1960 for (;;) {
1961 Object *qo;
1962 uint64_t p, q;
1963
1964 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1965 if (r < 0)
1966 return r;
1967
1968 r = generic_array_bisect_plus_one(f,
1969 le64toh(d->data.entry_offset),
1970 le64toh(d->data.entry_array_offset),
1971 le64toh(d->data.n_entries),
1972 z,
1973 test_object_offset,
1974 direction,
1975 NULL, &p, NULL);
1976 if (r <= 0)
1977 return r;
1978
1979 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1980 if (r < 0)
1981 return r;
1982
1983 r = generic_array_bisect_plus_one(f,
1984 le64toh(o->data.entry_offset),
1985 le64toh(o->data.entry_array_offset),
1986 le64toh(o->data.n_entries),
1987 p,
1988 test_object_offset,
1989 direction,
1990 &qo, &q, NULL);
1991
1992 if (r <= 0)
1993 return r;
1994
1995 if (p == q) {
1996 if (ret)
1997 *ret = qo;
1998 if (offset)
1999 *offset = q;
2000
2001 return 1;
2002 }
2003
2004 z = q;
2005 }
2006
2007 return 0;
2008}
2009
de190aef
LP
2010int journal_file_move_to_entry_by_seqnum_for_data(
2011 JournalFile *f,
2012 uint64_t data_offset,
2013 uint64_t seqnum,
2014 direction_t direction,
2015 Object **ret, uint64_t *offset) {
cec736d2 2016
de190aef
LP
2017 Object *d;
2018 int r;
cec736d2 2019
91a31dde
LP
2020 assert(f);
2021
de190aef 2022 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 2023 if (r < 0)
de190aef 2024 return r;
cec736d2 2025
de190aef
LP
2026 return generic_array_bisect_plus_one(f,
2027 le64toh(d->data.entry_offset),
2028 le64toh(d->data.entry_array_offset),
2029 le64toh(d->data.n_entries),
2030 seqnum,
2031 test_object_seqnum,
2032 direction,
2033 ret, offset, NULL);
2034}
cec736d2 2035
de190aef
LP
2036int journal_file_move_to_entry_by_realtime_for_data(
2037 JournalFile *f,
2038 uint64_t data_offset,
2039 uint64_t realtime,
2040 direction_t direction,
2041 Object **ret, uint64_t *offset) {
2042
2043 Object *d;
2044 int r;
2045
91a31dde
LP
2046 assert(f);
2047
de190aef 2048 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 2049 if (r < 0)
de190aef
LP
2050 return r;
2051
2052 return generic_array_bisect_plus_one(f,
2053 le64toh(d->data.entry_offset),
2054 le64toh(d->data.entry_array_offset),
2055 le64toh(d->data.n_entries),
2056 realtime,
2057 test_object_realtime,
2058 direction,
2059 ret, offset, NULL);
cec736d2
LP
2060}
2061
0284adc6 2062void journal_file_dump(JournalFile *f) {
7560fffc 2063 Object *o;
7560fffc 2064 int r;
0284adc6 2065 uint64_t p;
7560fffc
LP
2066
2067 assert(f);
2068
0284adc6 2069 journal_file_print_header(f);
7560fffc 2070
0284adc6
LP
2071 p = le64toh(f->header->header_size);
2072 while (p != 0) {
2073 r = journal_file_move_to_object(f, -1, p, &o);
2074 if (r < 0)
2075 goto fail;
7560fffc 2076
0284adc6 2077 switch (o->object.type) {
d98cc1f2 2078
0284adc6
LP
2079 case OBJECT_UNUSED:
2080 printf("Type: OBJECT_UNUSED\n");
2081 break;
d98cc1f2 2082
0284adc6
LP
2083 case OBJECT_DATA:
2084 printf("Type: OBJECT_DATA\n");
2085 break;
7560fffc 2086
3c1668da
LP
2087 case OBJECT_FIELD:
2088 printf("Type: OBJECT_FIELD\n");
2089 break;
2090
0284adc6 2091 case OBJECT_ENTRY:
f7fab8a5 2092 printf("Type: OBJECT_ENTRY seqnum=%llu monotonic=%llu realtime=%llu\n",
0284adc6
LP
2093 (unsigned long long) le64toh(o->entry.seqnum),
2094 (unsigned long long) le64toh(o->entry.monotonic),
2095 (unsigned long long) le64toh(o->entry.realtime));
2096 break;
7560fffc 2097
0284adc6
LP
2098 case OBJECT_FIELD_HASH_TABLE:
2099 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
2100 break;
7560fffc 2101
0284adc6
LP
2102 case OBJECT_DATA_HASH_TABLE:
2103 printf("Type: OBJECT_DATA_HASH_TABLE\n");
2104 break;
7560fffc 2105
0284adc6
LP
2106 case OBJECT_ENTRY_ARRAY:
2107 printf("Type: OBJECT_ENTRY_ARRAY\n");
2108 break;
7560fffc 2109
0284adc6 2110 case OBJECT_TAG:
f7fab8a5
LP
2111 printf("Type: OBJECT_TAG seqnum=%llu epoch=%llu\n",
2112 (unsigned long long) le64toh(o->tag.seqnum),
2113 (unsigned long long) le64toh(o->tag.epoch));
0284adc6 2114 break;
3c1668da
LP
2115
2116 default:
2117 printf("Type: unknown (%u)\n", o->object.type);
2118 break;
0284adc6 2119 }
7560fffc 2120
0284adc6
LP
2121 if (o->object.flags & OBJECT_COMPRESSED)
2122 printf("Flags: COMPRESSED\n");
7560fffc 2123
0284adc6
LP
2124 if (p == le64toh(f->header->tail_object_offset))
2125 p = 0;
2126 else
2127 p = p + ALIGN64(le64toh(o->object.size));
2128 }
7560fffc 2129
0284adc6
LP
2130 return;
2131fail:
2132 log_error("File corrupt");
7560fffc
LP
2133}
2134
0284adc6
LP
2135void journal_file_print_header(JournalFile *f) {
2136 char a[33], b[33], c[33];
2137 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
a1a03e30
LP
2138 struct stat st;
2139 char bytes[FORMAT_BYTES_MAX];
7560fffc
LP
2140
2141 assert(f);
7560fffc 2142
0284adc6
LP
2143 printf("File Path: %s\n"
2144 "File ID: %s\n"
2145 "Machine ID: %s\n"
2146 "Boot ID: %s\n"
2147 "Sequential Number ID: %s\n"
2148 "State: %s\n"
2149 "Compatible Flags:%s%s\n"
2150 "Incompatible Flags:%s%s\n"
2151 "Header size: %llu\n"
2152 "Arena size: %llu\n"
2153 "Data Hash Table Size: %llu\n"
2154 "Field Hash Table Size: %llu\n"
0284adc6
LP
2155 "Rotate Suggested: %s\n"
2156 "Head Sequential Number: %llu\n"
2157 "Tail Sequential Number: %llu\n"
2158 "Head Realtime Timestamp: %s\n"
3223f44f
LP
2159 "Tail Realtime Timestamp: %s\n"
2160 "Objects: %llu\n"
2161 "Entry Objects: %llu\n",
0284adc6
LP
2162 f->path,
2163 sd_id128_to_string(f->header->file_id, a),
2164 sd_id128_to_string(f->header->machine_id, b),
2165 sd_id128_to_string(f->header->boot_id, c),
2166 sd_id128_to_string(f->header->seqnum_id, c),
3223f44f
LP
2167 f->header->state == STATE_OFFLINE ? "OFFLINE" :
2168 f->header->state == STATE_ONLINE ? "ONLINE" :
2169 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
8088cbd3
LP
2170 JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
2171 (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
2172 JOURNAL_HEADER_COMPRESSED(f->header) ? " COMPRESSED" : "",
2173 (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
0284adc6
LP
2174 (unsigned long long) le64toh(f->header->header_size),
2175 (unsigned long long) le64toh(f->header->arena_size),
2176 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2177 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
fb0951b0 2178 yes_no(journal_file_rotate_suggested(f, 0)),
0284adc6
LP
2179 (unsigned long long) le64toh(f->header->head_entry_seqnum),
2180 (unsigned long long) le64toh(f->header->tail_entry_seqnum),
2181 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
3223f44f
LP
2182 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
2183 (unsigned long long) le64toh(f->header->n_objects),
2184 (unsigned long long) le64toh(f->header->n_entries));
7560fffc 2185
0284adc6
LP
2186 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2187 printf("Data Objects: %llu\n"
2188 "Data Hash Table Fill: %.1f%%\n",
2189 (unsigned long long) le64toh(f->header->n_data),
2190 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
7560fffc 2191
0284adc6
LP
2192 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2193 printf("Field Objects: %llu\n"
2194 "Field Hash Table Fill: %.1f%%\n",
2195 (unsigned long long) le64toh(f->header->n_fields),
2196 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
3223f44f
LP
2197
2198 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
2199 printf("Tag Objects: %llu\n",
2200 (unsigned long long) le64toh(f->header->n_tags));
2201 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
2202 printf("Entry Array Objects: %llu\n",
2203 (unsigned long long) le64toh(f->header->n_entry_arrays));
a1a03e30
LP
2204
2205 if (fstat(f->fd, &st) >= 0)
2206 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
7560fffc
LP
2207}
2208
0284adc6
LP
2209int journal_file_open(
2210 const char *fname,
2211 int flags,
2212 mode_t mode,
2213 bool compress,
baed47c3 2214 bool seal,
0284adc6
LP
2215 JournalMetrics *metrics,
2216 MMapCache *mmap_cache,
2217 JournalFile *template,
2218 JournalFile **ret) {
7560fffc 2219
0284adc6
LP
2220 JournalFile *f;
2221 int r;
2222 bool newly_created = false;
7560fffc 2223
0284adc6 2224 assert(fname);
0559d3a5 2225 assert(ret);
7560fffc 2226
0284adc6
LP
2227 if ((flags & O_ACCMODE) != O_RDONLY &&
2228 (flags & O_ACCMODE) != O_RDWR)
2229 return -EINVAL;
7560fffc 2230
a0108012
LP
2231 if (!endswith(fname, ".journal") &&
2232 !endswith(fname, ".journal~"))
0284adc6 2233 return -EINVAL;
7560fffc 2234
0284adc6
LP
2235 f = new0(JournalFile, 1);
2236 if (!f)
2237 return -ENOMEM;
7560fffc 2238
0284adc6
LP
2239 f->fd = -1;
2240 f->mode = mode;
7560fffc 2241
0284adc6
LP
2242 f->flags = flags;
2243 f->prot = prot_from_flags(flags);
2244 f->writable = (flags & O_ACCMODE) != O_RDONLY;
48b61739 2245#ifdef HAVE_XZ
0284adc6 2246 f->compress = compress;
48b61739 2247#endif
49a32d43 2248#ifdef HAVE_GCRYPT
baed47c3 2249 f->seal = seal;
49a32d43 2250#endif
7560fffc 2251
0284adc6
LP
2252 if (mmap_cache)
2253 f->mmap = mmap_cache_ref(mmap_cache);
2254 else {
84168d80 2255 f->mmap = mmap_cache_new();
0284adc6
LP
2256 if (!f->mmap) {
2257 r = -ENOMEM;
2258 goto fail;
2259 }
2260 }
7560fffc 2261
0284adc6
LP
2262 f->path = strdup(fname);
2263 if (!f->path) {
2264 r = -ENOMEM;
2265 goto fail;
2266 }
7560fffc 2267
0284adc6
LP
2268 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2269 if (f->fd < 0) {
2270 r = -errno;
2271 goto fail;
7560fffc 2272 }
7560fffc 2273
0284adc6
LP
2274 if (fstat(f->fd, &f->last_stat) < 0) {
2275 r = -errno;
2276 goto fail;
2277 }
7560fffc 2278
0284adc6 2279 if (f->last_stat.st_size == 0 && f->writable) {
fb0951b0
LP
2280#ifdef HAVE_XATTR
2281 uint64_t crtime;
2282
2283 /* Let's attach the creation time to the journal file,
2284 * so that the vacuuming code knows the age of this
2285 * file even if the file might end up corrupted one
2286 * day... Ideally we'd just use the creation time many
2287 * file systems maintain for each file, but there is
2288 * currently no usable API to query this, hence let's
2289 * emulate this via extended attributes. If extended
2290 * attributes are not supported we'll just skip this,
2291 * and rely solely on mtime/atime/ctime of the file.*/
2292
2293 crtime = htole64((uint64_t) now(CLOCK_REALTIME));
2294 fsetxattr(f->fd, "user.crtime_usec", &crtime, sizeof(crtime), XATTR_CREATE);
2295#endif
7560fffc 2296
feb12d3e 2297#ifdef HAVE_GCRYPT
0284adc6 2298 /* Try to load the FSPRG state, and if we can't, then
baed47c3 2299 * just don't do sealing */
49a32d43
LP
2300 if (f->seal) {
2301 r = journal_file_fss_load(f);
2302 if (r < 0)
2303 f->seal = false;
2304 }
feb12d3e 2305#endif
7560fffc 2306
0284adc6
LP
2307 r = journal_file_init_header(f, template);
2308 if (r < 0)
2309 goto fail;
7560fffc 2310
0284adc6
LP
2311 if (fstat(f->fd, &f->last_stat) < 0) {
2312 r = -errno;
2313 goto fail;
2314 }
fb0951b0
LP
2315
2316 newly_created = true;
0284adc6 2317 }
7560fffc 2318
0284adc6
LP
2319 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2320 r = -EIO;
2321 goto fail;
2322 }
7560fffc 2323
0284adc6
LP
2324 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2325 if (f->header == MAP_FAILED) {
2326 f->header = NULL;
2327 r = -errno;
2328 goto fail;
2329 }
7560fffc 2330
0284adc6
LP
2331 if (!newly_created) {
2332 r = journal_file_verify_header(f);
2333 if (r < 0)
2334 goto fail;
2335 }
7560fffc 2336
feb12d3e 2337#ifdef HAVE_GCRYPT
0284adc6 2338 if (!newly_created && f->writable) {
baed47c3 2339 r = journal_file_fss_load(f);
0284adc6
LP
2340 if (r < 0)
2341 goto fail;
2342 }
feb12d3e 2343#endif
cec736d2
LP
2344
2345 if (f->writable) {
4a92baf3
LP
2346 if (metrics) {
2347 journal_default_metrics(metrics, f->fd);
2348 f->metrics = *metrics;
2349 } else if (template)
2350 f->metrics = template->metrics;
2351
cec736d2
LP
2352 r = journal_file_refresh_header(f);
2353 if (r < 0)
2354 goto fail;
2355 }
2356
feb12d3e 2357#ifdef HAVE_GCRYPT
baed47c3 2358 r = journal_file_hmac_setup(f);
14d10188
LP
2359 if (r < 0)
2360 goto fail;
feb12d3e 2361#endif
14d10188 2362
cec736d2 2363 if (newly_created) {
de190aef 2364 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
2365 if (r < 0)
2366 goto fail;
2367
de190aef 2368 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
2369 if (r < 0)
2370 goto fail;
7560fffc 2371
feb12d3e 2372#ifdef HAVE_GCRYPT
7560fffc
LP
2373 r = journal_file_append_first_tag(f);
2374 if (r < 0)
2375 goto fail;
feb12d3e 2376#endif
cec736d2
LP
2377 }
2378
de190aef 2379 r = journal_file_map_field_hash_table(f);
cec736d2
LP
2380 if (r < 0)
2381 goto fail;
2382
de190aef 2383 r = journal_file_map_data_hash_table(f);
cec736d2
LP
2384 if (r < 0)
2385 goto fail;
2386
0559d3a5 2387 *ret = f;
cec736d2
LP
2388 return 0;
2389
2390fail:
2391 journal_file_close(f);
2392
2393 return r;
2394}
0ac38b70 2395
baed47c3 2396int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
0ac38b70
LP
2397 char *p;
2398 size_t l;
2399 JournalFile *old_file, *new_file = NULL;
2400 int r;
2401
2402 assert(f);
2403 assert(*f);
2404
2405 old_file = *f;
2406
2407 if (!old_file->writable)
2408 return -EINVAL;
2409
2410 if (!endswith(old_file->path, ".journal"))
2411 return -EINVAL;
2412
2413 l = strlen(old_file->path);
2414
9447a7f1 2415 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
0ac38b70
LP
2416 if (!p)
2417 return -ENOMEM;
2418
2419 memcpy(p, old_file->path, l - 8);
2420 p[l-8] = '@';
2421 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2422 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2423 "-%016llx-%016llx.journal",
fb0951b0
LP
2424 (unsigned long long) le64toh((*f)->header->head_entry_seqnum),
2425 (unsigned long long) le64toh((*f)->header->head_entry_realtime));
0ac38b70
LP
2426
2427 r = rename(old_file->path, p);
2428 free(p);
2429
2430 if (r < 0)
2431 return -errno;
2432
ccdbaf91 2433 old_file->header->state = STATE_ARCHIVED;
0ac38b70 2434
baed47c3 2435 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
0ac38b70
LP
2436 journal_file_close(old_file);
2437
2438 *f = new_file;
2439 return r;
2440}
2441
9447a7f1
LP
2442int journal_file_open_reliably(
2443 const char *fname,
2444 int flags,
2445 mode_t mode,
7560fffc 2446 bool compress,
baed47c3 2447 bool seal,
4a92baf3 2448 JournalMetrics *metrics,
27370278 2449 MMapCache *mmap_cache,
9447a7f1
LP
2450 JournalFile *template,
2451 JournalFile **ret) {
2452
2453 int r;
2454 size_t l;
2455 char *p;
2456
baed47c3 2457 r = journal_file_open(fname, flags, mode, compress, seal,
27370278 2458 metrics, mmap_cache, template, ret);
0071d9f1
LP
2459 if (r != -EBADMSG && /* corrupted */
2460 r != -ENODATA && /* truncated */
2461 r != -EHOSTDOWN && /* other machine */
a1a1898f
LP
2462 r != -EPROTONOSUPPORT && /* incompatible feature */
2463 r != -EBUSY && /* unclean shutdown */
2464 r != -ESHUTDOWN /* already archived */)
9447a7f1
LP
2465 return r;
2466
2467 if ((flags & O_ACCMODE) == O_RDONLY)
2468 return r;
2469
2470 if (!(flags & O_CREAT))
2471 return r;
2472
7560fffc
LP
2473 if (!endswith(fname, ".journal"))
2474 return r;
2475
5c70eab4
LP
2476 /* The file is corrupted. Rotate it away and try it again (but only once) */
2477
9447a7f1
LP
2478 l = strlen(fname);
2479 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2480 (int) (l-8), fname,
2481 (unsigned long long) now(CLOCK_REALTIME),
2482 random_ull()) < 0)
2483 return -ENOMEM;
2484
2485 r = rename(fname, p);
2486 free(p);
2487 if (r < 0)
2488 return -errno;
2489
a1a1898f 2490 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
9447a7f1 2491
baed47c3 2492 return journal_file_open(fname, flags, mode, compress, seal,
27370278 2493 metrics, mmap_cache, template, ret);
9447a7f1
LP
2494}
2495
cf244689
LP
2496
2497int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2498 uint64_t i, n;
2499 uint64_t q, xor_hash = 0;
2500 int r;
2501 EntryItem *items;
2502 dual_timestamp ts;
2503
2504 assert(from);
2505 assert(to);
2506 assert(o);
2507 assert(p);
2508
2509 if (!to->writable)
2510 return -EPERM;
2511
2512 ts.monotonic = le64toh(o->entry.monotonic);
2513 ts.realtime = le64toh(o->entry.realtime);
2514
2515 if (to->tail_entry_monotonic_valid &&
2516 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2517 return -EINVAL;
2518
cf244689
LP
2519 n = journal_file_entry_n_items(o);
2520 items = alloca(sizeof(EntryItem) * n);
2521
2522 for (i = 0; i < n; i++) {
4fd052ae
FC
2523 uint64_t l, h;
2524 le64_t le_hash;
cf244689
LP
2525 size_t t;
2526 void *data;
2527 Object *u;
2528
2529 q = le64toh(o->entry.items[i].object_offset);
2530 le_hash = o->entry.items[i].hash;
2531
2532 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2533 if (r < 0)
2534 return r;
2535
2536 if (le_hash != o->data.hash)
2537 return -EBADMSG;
2538
2539 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2540 t = (size_t) l;
2541
2542 /* We hit the limit on 32bit machines */
2543 if ((uint64_t) t != l)
2544 return -E2BIG;
2545
2546 if (o->object.flags & OBJECT_COMPRESSED) {
2547#ifdef HAVE_XZ
2548 uint64_t rsize;
2549
2550 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2551 return -EBADMSG;
2552
2553 data = from->compress_buffer;
2554 l = rsize;
2555#else
2556 return -EPROTONOSUPPORT;
2557#endif
2558 } else
2559 data = o->data.payload;
2560
2561 r = journal_file_append_data(to, data, l, &u, &h);
2562 if (r < 0)
2563 return r;
2564
2565 xor_hash ^= le64toh(u->data.hash);
2566 items[i].object_offset = htole64(h);
2567 items[i].hash = u->data.hash;
2568
2569 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2570 if (r < 0)
2571 return r;
2572 }
2573
2574 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2575}
babfc091
LP
2576
2577void journal_default_metrics(JournalMetrics *m, int fd) {
2578 uint64_t fs_size = 0;
2579 struct statvfs ss;
a7bc2c2a 2580 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2581
2582 assert(m);
2583 assert(fd >= 0);
2584
2585 if (fstatvfs(fd, &ss) >= 0)
2586 fs_size = ss.f_frsize * ss.f_blocks;
2587
2588 if (m->max_use == (uint64_t) -1) {
2589
2590 if (fs_size > 0) {
2591 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2592
2593 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2594 m->max_use = DEFAULT_MAX_USE_UPPER;
2595
2596 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2597 m->max_use = DEFAULT_MAX_USE_LOWER;
2598 } else
2599 m->max_use = DEFAULT_MAX_USE_LOWER;
2600 } else {
2601 m->max_use = PAGE_ALIGN(m->max_use);
2602
2603 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2604 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2605 }
2606
2607 if (m->max_size == (uint64_t) -1) {
2608 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2609
2610 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2611 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2612 } else
2613 m->max_size = PAGE_ALIGN(m->max_size);
2614
2615 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2616 m->max_size = JOURNAL_FILE_SIZE_MIN;
2617
2618 if (m->max_size*2 > m->max_use)
2619 m->max_use = m->max_size*2;
2620
2621 if (m->min_size == (uint64_t) -1)
2622 m->min_size = JOURNAL_FILE_SIZE_MIN;
2623 else {
2624 m->min_size = PAGE_ALIGN(m->min_size);
2625
2626 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2627 m->min_size = JOURNAL_FILE_SIZE_MIN;
2628
2629 if (m->min_size > m->max_size)
2630 m->max_size = m->min_size;
2631 }
2632
2633 if (m->keep_free == (uint64_t) -1) {
2634
2635 if (fs_size > 0) {
2636 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2637
2638 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2639 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2640
2641 } else
2642 m->keep_free = DEFAULT_KEEP_FREE;
2643 }
2644
2b43f939
LP
2645 log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2646 format_bytes(a, sizeof(a), m->max_use),
2647 format_bytes(b, sizeof(b), m->max_size),
2648 format_bytes(c, sizeof(c), m->min_size),
2649 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2650}
08984293
LP
2651
2652int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
08984293
LP
2653 assert(f);
2654 assert(from || to);
2655
2656 if (from) {
162566a4
LP
2657 if (f->header->head_entry_realtime == 0)
2658 return -ENOENT;
08984293 2659
162566a4 2660 *from = le64toh(f->header->head_entry_realtime);
08984293
LP
2661 }
2662
2663 if (to) {
162566a4
LP
2664 if (f->header->tail_entry_realtime == 0)
2665 return -ENOENT;
08984293 2666
162566a4 2667 *to = le64toh(f->header->tail_entry_realtime);
08984293
LP
2668 }
2669
2670 return 1;
2671}
2672
2673int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2674 char t[9+32+1] = "_BOOT_ID=";
2675 Object *o;
2676 uint64_t p;
2677 int r;
2678
2679 assert(f);
2680 assert(from || to);
2681
2682 sd_id128_to_string(boot_id, t + 9);
2683
2684 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2685 if (r <= 0)
2686 return r;
2687
2688 if (le64toh(o->data.n_entries) <= 0)
2689 return 0;
2690
2691 if (from) {
2692 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2693 if (r < 0)
2694 return r;
2695
2696 *from = le64toh(o->entry.monotonic);
2697 }
2698
2699 if (to) {
2700 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2701 if (r < 0)
2702 return r;
2703
2704 r = generic_array_get_plus_one(f,
2705 le64toh(o->data.entry_offset),
2706 le64toh(o->data.entry_array_offset),
2707 le64toh(o->data.n_entries)-1,
2708 &o, NULL);
2709 if (r <= 0)
2710 return r;
2711
2712 *to = le64toh(o->entry.monotonic);
2713 }
2714
2715 return 1;
2716}
dca6219e 2717
fb0951b0 2718bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
dca6219e
LP
2719 assert(f);
2720
2721 /* If we gained new header fields we gained new features,
2722 * hence suggest a rotation */
361f9cbc
LP
2723 if (le64toh(f->header->header_size) < sizeof(Header)) {
2724 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
dca6219e 2725 return true;
361f9cbc 2726 }
dca6219e
LP
2727
2728 /* Let's check if the hash tables grew over a certain fill
2729 * level (75%, borrowing this value from Java's hash table
2730 * implementation), and if so suggest a rotation. To calculate
2731 * the fill level we need the n_data field, which only exists
2732 * in newer versions. */
2733
2734 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
361f9cbc
LP
2735 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2736 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2737 f->path,
2738 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2739 (unsigned long long) le64toh(f->header->n_data),
2740 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2741 (unsigned long long) (f->last_stat.st_size),
2742 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
dca6219e 2743 return true;
361f9cbc 2744 }
dca6219e
LP
2745
2746 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
361f9cbc
LP
2747 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2748 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2749 f->path,
2750 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2751 (unsigned long long) le64toh(f->header->n_fields),
2752 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
dca6219e 2753 return true;
361f9cbc 2754 }
dca6219e 2755
0598fd4a
LP
2756 /* Are the data objects properly indexed by field objects? */
2757 if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
2758 JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
2759 le64toh(f->header->n_data) > 0 &&
2760 le64toh(f->header->n_fields) == 0)
2761 return true;
2762
fb0951b0
LP
2763 if (max_file_usec > 0) {
2764 usec_t t, h;
2765
2766 h = le64toh(f->header->head_entry_realtime);
2767 t = now(CLOCK_REALTIME);
2768
2769 if (h > 0 && t > h + max_file_usec)
2770 return true;
2771 }
2772
dca6219e
LP
2773 return false;
2774}