]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
tree-wide usage of %m specifier instead of strerror(errno)
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
fb0951b0
LP
30#ifdef HAVE_XATTR
31#include <attr/xattr.h>
32#endif
33
cec736d2
LP
34#include "journal-def.h"
35#include "journal-file.h"
0284adc6 36#include "journal-authenticate.h"
cec736d2 37#include "lookup3.h"
807e17f0 38#include "compress.h"
7560fffc 39#include "fsprg.h"
cec736d2 40
4a92baf3
LP
41#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
42#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
cec736d2 43
be19b7df 44#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 45
babfc091 46/* This is the minimum journal file size */
253f59df 47#define JOURNAL_FILE_SIZE_MIN (4ULL*1024ULL*1024ULL) /* 4 MiB */
babfc091
LP
48
49/* These are the lower and upper bounds if we deduce the max_use value
50 * from the file system size */
51#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
52#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
53
54/* This is the upper bound if we deduce max_size from max_use */
71100051 55#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
56
57/* This is the upper bound if we deduce the keep_free value from the
58 * file system size */
59#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
60
61/* This is the keep_free value when we can't determine the system
62 * size */
63#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
64
dca6219e
LP
65/* n_data was the first entry we added after the initial file format design */
66#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
cec736d2 67
a4bcff5b
LP
68/* How many entries to keep in the entry array chain cache at max */
69#define CHAIN_CACHE_MAX 20
70
9588bc32 71static int journal_file_set_online(JournalFile *f) {
26687bf8
OS
72 assert(f);
73
74 if (!f->writable)
75 return -EPERM;
76
77 if (!(f->fd >= 0 && f->header))
78 return -EINVAL;
79
80 switch(f->header->state) {
81 case STATE_ONLINE:
82 return 0;
83
84 case STATE_OFFLINE:
85 f->header->state = STATE_ONLINE;
86 fsync(f->fd);
87 return 0;
88
89 default:
90 return -EINVAL;
91 }
92}
93
94int journal_file_set_offline(JournalFile *f) {
95 assert(f);
96
97 if (!f->writable)
98 return -EPERM;
99
100 if (!(f->fd >= 0 && f->header))
101 return -EINVAL;
102
103 if (f->header->state != STATE_ONLINE)
104 return 0;
105
106 fsync(f->fd);
107
108 f->header->state = STATE_OFFLINE;
109
110 fsync(f->fd);
111
112 return 0;
113}
114
cec736d2 115void journal_file_close(JournalFile *f) {
de190aef 116 assert(f);
cec736d2 117
feb12d3e 118#ifdef HAVE_GCRYPT
b0af6f41 119 /* Write the final tag */
c586dbf1 120 if (f->seal && f->writable)
b0af6f41 121 journal_file_append_tag(f);
feb12d3e 122#endif
b0af6f41 123
7560fffc 124 /* Sync everything to disk, before we mark the file offline */
16e9f408
LP
125 if (f->mmap && f->fd >= 0)
126 mmap_cache_close_fd(f->mmap, f->fd);
7560fffc 127
26687bf8 128 journal_file_set_offline(f);
cec736d2 129
26687bf8 130 if (f->header)
d384c7a8 131 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
cec736d2 132
0ac38b70
LP
133 if (f->fd >= 0)
134 close_nointr_nofail(f->fd);
135
cec736d2 136 free(f->path);
807e17f0 137
16e9f408
LP
138 if (f->mmap)
139 mmap_cache_unref(f->mmap);
140
a4bcff5b
LP
141 hashmap_free_free(f->chain_cache);
142
807e17f0
LP
143#ifdef HAVE_XZ
144 free(f->compress_buffer);
145#endif
146
7560fffc 147#ifdef HAVE_GCRYPT
baed47c3
LP
148 if (f->fss_file)
149 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
b7c9ae91
LP
150 else if (f->fsprg_state)
151 free(f->fsprg_state);
152
153 free(f->fsprg_seed);
7560fffc
LP
154
155 if (f->hmac)
156 gcry_md_close(f->hmac);
157#endif
158
cec736d2
LP
159 free(f);
160}
161
0ac38b70 162static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
163 Header h;
164 ssize_t k;
165 int r;
166
167 assert(f);
168
169 zero(h);
7560fffc 170 memcpy(h.signature, HEADER_SIGNATURE, 8);
23b0b2b2 171 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2 172
7560fffc
LP
173 h.incompatible_flags =
174 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
175
176 h.compatible_flags =
baed47c3 177 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
7560fffc 178
cec736d2
LP
179 r = sd_id128_randomize(&h.file_id);
180 if (r < 0)
181 return r;
182
0ac38b70
LP
183 if (template) {
184 h.seqnum_id = template->header->seqnum_id;
beec0085 185 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
0ac38b70
LP
186 } else
187 h.seqnum_id = h.file_id;
cec736d2
LP
188
189 k = pwrite(f->fd, &h, sizeof(h), 0);
190 if (k < 0)
191 return -errno;
192
193 if (k != sizeof(h))
194 return -EIO;
195
196 return 0;
197}
198
199static int journal_file_refresh_header(JournalFile *f) {
200 int r;
de190aef 201 sd_id128_t boot_id;
cec736d2
LP
202
203 assert(f);
204
205 r = sd_id128_get_machine(&f->header->machine_id);
206 if (r < 0)
207 return r;
208
de190aef 209 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
210 if (r < 0)
211 return r;
212
de190aef
LP
213 if (sd_id128_equal(boot_id, f->header->boot_id))
214 f->tail_entry_monotonic_valid = true;
215
216 f->header->boot_id = boot_id;
217
26687bf8 218 journal_file_set_online(f);
b788cc23 219
7560fffc
LP
220 /* Sync the online state to disk */
221 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
222 fdatasync(f->fd);
b788cc23 223
cec736d2
LP
224 return 0;
225}
226
227static int journal_file_verify_header(JournalFile *f) {
228 assert(f);
229
7560fffc 230 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
cec736d2
LP
231 return -EBADMSG;
232
7560fffc
LP
233 /* In both read and write mode we refuse to open files with
234 * incompatible flags we don't know */
807e17f0 235#ifdef HAVE_XZ
7560fffc 236 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
807e17f0
LP
237 return -EPROTONOSUPPORT;
238#else
cec736d2
LP
239 if (f->header->incompatible_flags != 0)
240 return -EPROTONOSUPPORT;
807e17f0 241#endif
cec736d2 242
7560fffc
LP
243 /* When open for writing we refuse to open files with
244 * compatible flags, too */
245 if (f->writable) {
246#ifdef HAVE_GCRYPT
baed47c3 247 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
7560fffc
LP
248 return -EPROTONOSUPPORT;
249#else
250 if (f->header->compatible_flags != 0)
251 return -EPROTONOSUPPORT;
252#endif
253 }
254
db11ac1a
LP
255 if (f->header->state >= _STATE_MAX)
256 return -EBADMSG;
257
dca6219e
LP
258 /* The first addition was n_data, so check that we are at least this large */
259 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
23b0b2b2
LP
260 return -EBADMSG;
261
8088cbd3 262 if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
beec0085
LP
263 return -EBADMSG;
264
db11ac1a
LP
265 if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
266 return -ENODATA;
267
268 if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
269 return -ENODATA;
270
7762e02b
LP
271 if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
272 !VALID64(le64toh(f->header->field_hash_table_offset)) ||
273 !VALID64(le64toh(f->header->tail_object_offset)) ||
274 !VALID64(le64toh(f->header->entry_array_offset)))
275 return -ENODATA;
276
277 if (le64toh(f->header->data_hash_table_offset) < le64toh(f->header->header_size) ||
278 le64toh(f->header->field_hash_table_offset) < le64toh(f->header->header_size) ||
279 le64toh(f->header->tail_object_offset) < le64toh(f->header->header_size) ||
280 le64toh(f->header->entry_array_offset) < le64toh(f->header->header_size))
cec736d2
LP
281 return -ENODATA;
282
283 if (f->writable) {
ccdbaf91 284 uint8_t state;
cec736d2
LP
285 sd_id128_t machine_id;
286 int r;
287
288 r = sd_id128_get_machine(&machine_id);
289 if (r < 0)
290 return r;
291
292 if (!sd_id128_equal(machine_id, f->header->machine_id))
293 return -EHOSTDOWN;
294
de190aef 295 state = f->header->state;
cec736d2 296
71fa6f00
LP
297 if (state == STATE_ONLINE) {
298 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
299 return -EBUSY;
300 } else if (state == STATE_ARCHIVED)
cec736d2 301 return -ESHUTDOWN;
71fa6f00
LP
302 else if (state != STATE_OFFLINE) {
303 log_debug("Journal file %s has unknown state %u.", f->path, state);
304 return -EBUSY;
305 }
cec736d2
LP
306 }
307
8088cbd3 308 f->compress = JOURNAL_HEADER_COMPRESSED(f->header);
c586dbf1 309
f1889c91 310 f->seal = JOURNAL_HEADER_SEALED(f->header);
7560fffc 311
cec736d2
LP
312 return 0;
313}
314
315static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
cec736d2 316 uint64_t old_size, new_size;
fec2aa2f 317 int r;
cec736d2
LP
318
319 assert(f);
320
cec736d2 321 /* We assume that this file is not sparse, and we know that
38ac38b2 322 * for sure, since we always call posix_fallocate()
cec736d2
LP
323 * ourselves */
324
325 old_size =
23b0b2b2 326 le64toh(f->header->header_size) +
cec736d2
LP
327 le64toh(f->header->arena_size);
328
bc85bfee 329 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
330 if (new_size < le64toh(f->header->header_size))
331 new_size = le64toh(f->header->header_size);
bc85bfee
LP
332
333 if (new_size <= old_size)
cec736d2
LP
334 return 0;
335
bc85bfee
LP
336 if (f->metrics.max_size > 0 &&
337 new_size > f->metrics.max_size)
338 return -E2BIG;
cec736d2 339
bc85bfee
LP
340 if (new_size > f->metrics.min_size &&
341 f->metrics.keep_free > 0) {
cec736d2
LP
342 struct statvfs svfs;
343
344 if (fstatvfs(f->fd, &svfs) >= 0) {
345 uint64_t available;
346
347 available = svfs.f_bfree * svfs.f_bsize;
348
bc85bfee
LP
349 if (available >= f->metrics.keep_free)
350 available -= f->metrics.keep_free;
cec736d2
LP
351 else
352 available = 0;
353
354 if (new_size - old_size > available)
355 return -E2BIG;
356 }
357 }
358
bc85bfee
LP
359 /* Note that the glibc fallocate() fallback is very
360 inefficient, hence we try to minimize the allocation area
361 as we can. */
fec2aa2f
GV
362 r = posix_fallocate(f->fd, old_size, new_size - old_size);
363 if (r != 0)
364 return -r;
cec736d2
LP
365
366 if (fstat(f->fd, &f->last_stat) < 0)
367 return -errno;
368
23b0b2b2 369 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
370
371 return 0;
372}
373
fcde2389 374static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
cec736d2 375 assert(f);
cec736d2
LP
376 assert(ret);
377
7762e02b
LP
378 if (size <= 0)
379 return -EINVAL;
380
2a59ea54 381 /* Avoid SIGBUS on invalid accesses */
4bbdcdb3
LP
382 if (offset + size > (uint64_t) f->last_stat.st_size) {
383 /* Hmm, out of range? Let's refresh the fstat() data
384 * first, before we trust that check. */
385
386 if (fstat(f->fd, &f->last_stat) < 0 ||
387 offset + size > (uint64_t) f->last_stat.st_size)
388 return -EADDRNOTAVAIL;
389 }
390
fcde2389 391 return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
cec736d2
LP
392}
393
16e9f408
LP
394static uint64_t minimum_header_size(Object *o) {
395
396 static uint64_t table[] = {
397 [OBJECT_DATA] = sizeof(DataObject),
398 [OBJECT_FIELD] = sizeof(FieldObject),
399 [OBJECT_ENTRY] = sizeof(EntryObject),
400 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
401 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
402 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
403 [OBJECT_TAG] = sizeof(TagObject),
404 };
405
406 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
407 return sizeof(ObjectHeader);
408
409 return table[o->object.type];
410}
411
de190aef 412int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
413 int r;
414 void *t;
415 Object *o;
416 uint64_t s;
16e9f408 417 unsigned context;
cec736d2
LP
418
419 assert(f);
420 assert(ret);
421
db11ac1a
LP
422 /* Objects may only be located at multiple of 64 bit */
423 if (!VALID64(offset))
424 return -EFAULT;
425
16e9f408
LP
426 /* One context for each type, plus one catch-all for the rest */
427 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
428
fcde2389 429 r = journal_file_move_to(f, context, false, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
430 if (r < 0)
431 return r;
432
433 o = (Object*) t;
434 s = le64toh(o->object.size);
435
436 if (s < sizeof(ObjectHeader))
437 return -EBADMSG;
438
16e9f408
LP
439 if (o->object.type <= OBJECT_UNUSED)
440 return -EBADMSG;
441
442 if (s < minimum_header_size(o))
443 return -EBADMSG;
444
3c1668da 445 if (type > 0 && o->object.type != type)
cec736d2
LP
446 return -EBADMSG;
447
448 if (s > sizeof(ObjectHeader)) {
fcde2389 449 r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
cec736d2
LP
450 if (r < 0)
451 return r;
452
453 o = (Object*) t;
454 }
455
cec736d2
LP
456 *ret = o;
457 return 0;
458}
459
d98cc1f2 460static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
461 uint64_t r;
462
463 assert(f);
464
beec0085 465 r = le64toh(f->header->tail_entry_seqnum) + 1;
c2373f84
LP
466
467 if (seqnum) {
de190aef 468 /* If an external seqnum counter was passed, we update
c2373f84
LP
469 * both the local and the external one, and set it to
470 * the maximum of both */
471
472 if (*seqnum + 1 > r)
473 r = *seqnum + 1;
474
475 *seqnum = r;
476 }
477
beec0085 478 f->header->tail_entry_seqnum = htole64(r);
cec736d2 479
beec0085
LP
480 if (f->header->head_entry_seqnum == 0)
481 f->header->head_entry_seqnum = htole64(r);
de190aef 482
cec736d2
LP
483 return r;
484}
485
0284adc6 486int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
487 int r;
488 uint64_t p;
489 Object *tail, *o;
490 void *t;
491
492 assert(f);
16e9f408 493 assert(type > 0 && type < _OBJECT_TYPE_MAX);
cec736d2
LP
494 assert(size >= sizeof(ObjectHeader));
495 assert(offset);
496 assert(ret);
497
26687bf8
OS
498 r = journal_file_set_online(f);
499 if (r < 0)
500 return r;
501
cec736d2 502 p = le64toh(f->header->tail_object_offset);
cec736d2 503 if (p == 0)
23b0b2b2 504 p = le64toh(f->header->header_size);
cec736d2 505 else {
de190aef 506 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
507 if (r < 0)
508 return r;
509
510 p += ALIGN64(le64toh(tail->object.size));
511 }
512
513 r = journal_file_allocate(f, p, size);
514 if (r < 0)
515 return r;
516
fcde2389 517 r = journal_file_move_to(f, type, false, p, size, &t);
cec736d2
LP
518 if (r < 0)
519 return r;
520
521 o = (Object*) t;
522
523 zero(o->object);
de190aef 524 o->object.type = type;
cec736d2
LP
525 o->object.size = htole64(size);
526
527 f->header->tail_object_offset = htole64(p);
cec736d2
LP
528 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
529
530 *ret = o;
531 *offset = p;
532
533 return 0;
534}
535
de190aef 536static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
537 uint64_t s, p;
538 Object *o;
539 int r;
540
541 assert(f);
542
dfabe643 543 /* We estimate that we need 1 hash table entry per 768 of
4a92baf3
LP
544 journal file and we want to make sure we never get beyond
545 75% fill level. Calculate the hash table size for the
546 maximum file size based on these metrics. */
547
dfabe643 548 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
4a92baf3
LP
549 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
550 s = DEFAULT_DATA_HASH_TABLE_SIZE;
551
507f22bd 552 log_debug("Reserving %"PRIu64" entries in hash table.", s / sizeof(HashItem));
4a92baf3 553
de190aef
LP
554 r = journal_file_append_object(f,
555 OBJECT_DATA_HASH_TABLE,
556 offsetof(Object, hash_table.items) + s,
557 &o, &p);
cec736d2
LP
558 if (r < 0)
559 return r;
560
de190aef 561 memset(o->hash_table.items, 0, s);
cec736d2 562
de190aef
LP
563 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
564 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
565
566 return 0;
567}
568
de190aef 569static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
570 uint64_t s, p;
571 Object *o;
572 int r;
573
574 assert(f);
575
3c1668da
LP
576 /* We use a fixed size hash table for the fields as this
577 * number should grow very slowly only */
578
de190aef
LP
579 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
580 r = journal_file_append_object(f,
581 OBJECT_FIELD_HASH_TABLE,
582 offsetof(Object, hash_table.items) + s,
583 &o, &p);
cec736d2
LP
584 if (r < 0)
585 return r;
586
de190aef 587 memset(o->hash_table.items, 0, s);
cec736d2 588
de190aef
LP
589 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
590 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
591
592 return 0;
593}
594
de190aef 595static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
596 uint64_t s, p;
597 void *t;
598 int r;
599
600 assert(f);
601
de190aef
LP
602 p = le64toh(f->header->data_hash_table_offset);
603 s = le64toh(f->header->data_hash_table_size);
cec736d2 604
de190aef 605 r = journal_file_move_to(f,
16e9f408 606 OBJECT_DATA_HASH_TABLE,
fcde2389 607 true,
de190aef
LP
608 p, s,
609 &t);
cec736d2
LP
610 if (r < 0)
611 return r;
612
de190aef 613 f->data_hash_table = t;
cec736d2
LP
614 return 0;
615}
616
de190aef 617static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
618 uint64_t s, p;
619 void *t;
620 int r;
621
622 assert(f);
623
de190aef
LP
624 p = le64toh(f->header->field_hash_table_offset);
625 s = le64toh(f->header->field_hash_table_size);
cec736d2 626
de190aef 627 r = journal_file_move_to(f,
16e9f408 628 OBJECT_FIELD_HASH_TABLE,
fcde2389 629 true,
de190aef
LP
630 p, s,
631 &t);
cec736d2
LP
632 if (r < 0)
633 return r;
634
de190aef 635 f->field_hash_table = t;
cec736d2
LP
636 return 0;
637}
638
3c1668da
LP
639static int journal_file_link_field(
640 JournalFile *f,
641 Object *o,
642 uint64_t offset,
643 uint64_t hash) {
644
645 uint64_t p, h;
646 int r;
647
648 assert(f);
649 assert(o);
650 assert(offset > 0);
651
652 if (o->object.type != OBJECT_FIELD)
653 return -EINVAL;
654
655 /* This might alter the window we are looking at */
656
657 o->field.next_hash_offset = o->field.head_data_offset = 0;
658
659 h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
660 p = le64toh(f->field_hash_table[h].tail_hash_offset);
661 if (p == 0)
662 f->field_hash_table[h].head_hash_offset = htole64(offset);
663 else {
664 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
665 if (r < 0)
666 return r;
667
668 o->field.next_hash_offset = htole64(offset);
669 }
670
671 f->field_hash_table[h].tail_hash_offset = htole64(offset);
672
673 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
674 f->header->n_fields = htole64(le64toh(f->header->n_fields) + 1);
675
676 return 0;
677}
678
679static int journal_file_link_data(
680 JournalFile *f,
681 Object *o,
682 uint64_t offset,
683 uint64_t hash) {
684
de190aef 685 uint64_t p, h;
cec736d2
LP
686 int r;
687
688 assert(f);
689 assert(o);
690 assert(offset > 0);
b588975f
LP
691
692 if (o->object.type != OBJECT_DATA)
693 return -EINVAL;
cec736d2 694
48496df6
LP
695 /* This might alter the window we are looking at */
696
de190aef
LP
697 o->data.next_hash_offset = o->data.next_field_offset = 0;
698 o->data.entry_offset = o->data.entry_array_offset = 0;
699 o->data.n_entries = 0;
cec736d2 700
de190aef 701 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
8db4213e 702 p = le64toh(f->data_hash_table[h].tail_hash_offset);
3c1668da 703 if (p == 0)
cec736d2 704 /* Only entry in the hash table is easy */
de190aef 705 f->data_hash_table[h].head_hash_offset = htole64(offset);
3c1668da 706 else {
48496df6
LP
707 /* Move back to the previous data object, to patch in
708 * pointer */
cec736d2 709
de190aef 710 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
711 if (r < 0)
712 return r;
713
de190aef 714 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
715 }
716
de190aef 717 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2 718
dca6219e
LP
719 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
720 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
721
cec736d2
LP
722 return 0;
723}
724
3c1668da
LP
725int journal_file_find_field_object_with_hash(
726 JournalFile *f,
727 const void *field, uint64_t size, uint64_t hash,
728 Object **ret, uint64_t *offset) {
729
730 uint64_t p, osize, h;
731 int r;
732
733 assert(f);
734 assert(field && size > 0);
735
736 osize = offsetof(Object, field.payload) + size;
737
738 if (f->header->field_hash_table_size == 0)
739 return -EBADMSG;
740
741 h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
742 p = le64toh(f->field_hash_table[h].head_hash_offset);
743
744 while (p > 0) {
745 Object *o;
746
747 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
748 if (r < 0)
749 return r;
750
751 if (le64toh(o->field.hash) == hash &&
752 le64toh(o->object.size) == osize &&
753 memcmp(o->field.payload, field, size) == 0) {
754
755 if (ret)
756 *ret = o;
757 if (offset)
758 *offset = p;
759
760 return 1;
761 }
762
763 p = le64toh(o->field.next_hash_offset);
764 }
765
766 return 0;
767}
768
769int journal_file_find_field_object(
770 JournalFile *f,
771 const void *field, uint64_t size,
772 Object **ret, uint64_t *offset) {
773
774 uint64_t hash;
775
776 assert(f);
777 assert(field && size > 0);
778
779 hash = hash64(field, size);
780
781 return journal_file_find_field_object_with_hash(f,
782 field, size, hash,
783 ret, offset);
784}
785
de190aef
LP
786int journal_file_find_data_object_with_hash(
787 JournalFile *f,
788 const void *data, uint64_t size, uint64_t hash,
789 Object **ret, uint64_t *offset) {
48496df6 790
de190aef 791 uint64_t p, osize, h;
cec736d2
LP
792 int r;
793
794 assert(f);
795 assert(data || size == 0);
796
797 osize = offsetof(Object, data.payload) + size;
798
bc85bfee
LP
799 if (f->header->data_hash_table_size == 0)
800 return -EBADMSG;
801
de190aef
LP
802 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
803 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 804
de190aef
LP
805 while (p > 0) {
806 Object *o;
cec736d2 807
de190aef 808 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
809 if (r < 0)
810 return r;
811
807e17f0 812 if (le64toh(o->data.hash) != hash)
85a131e8 813 goto next;
807e17f0
LP
814
815 if (o->object.flags & OBJECT_COMPRESSED) {
816#ifdef HAVE_XZ
b785c858 817 uint64_t l, rsize;
cec736d2 818
807e17f0
LP
819 l = le64toh(o->object.size);
820 if (l <= offsetof(Object, data.payload))
cec736d2
LP
821 return -EBADMSG;
822
807e17f0
LP
823 l -= offsetof(Object, data.payload);
824
93b73b06 825 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize, 0))
807e17f0
LP
826 return -EBADMSG;
827
b785c858 828 if (rsize == size &&
807e17f0
LP
829 memcmp(f->compress_buffer, data, size) == 0) {
830
831 if (ret)
832 *ret = o;
833
834 if (offset)
835 *offset = p;
836
837 return 1;
838 }
839#else
840 return -EPROTONOSUPPORT;
841#endif
842
843 } else if (le64toh(o->object.size) == osize &&
844 memcmp(o->data.payload, data, size) == 0) {
845
cec736d2
LP
846 if (ret)
847 *ret = o;
848
849 if (offset)
850 *offset = p;
851
de190aef 852 return 1;
cec736d2
LP
853 }
854
85a131e8 855 next:
cec736d2
LP
856 p = le64toh(o->data.next_hash_offset);
857 }
858
de190aef
LP
859 return 0;
860}
861
862int journal_file_find_data_object(
863 JournalFile *f,
864 const void *data, uint64_t size,
865 Object **ret, uint64_t *offset) {
866
867 uint64_t hash;
868
869 assert(f);
870 assert(data || size == 0);
871
872 hash = hash64(data, size);
873
874 return journal_file_find_data_object_with_hash(f,
875 data, size, hash,
876 ret, offset);
877}
878
3c1668da
LP
879static int journal_file_append_field(
880 JournalFile *f,
881 const void *field, uint64_t size,
882 Object **ret, uint64_t *offset) {
883
884 uint64_t hash, p;
885 uint64_t osize;
886 Object *o;
887 int r;
888
889 assert(f);
890 assert(field && size > 0);
891
892 hash = hash64(field, size);
893
894 r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
895 if (r < 0)
896 return r;
897 else if (r > 0) {
898
899 if (ret)
900 *ret = o;
901
902 if (offset)
903 *offset = p;
904
905 return 0;
906 }
907
908 osize = offsetof(Object, field.payload) + size;
909 r = journal_file_append_object(f, OBJECT_FIELD, osize, &o, &p);
8c92d4bb
LP
910 if (r < 0)
911 return r;
3c1668da
LP
912
913 o->field.hash = htole64(hash);
914 memcpy(o->field.payload, field, size);
915
916 r = journal_file_link_field(f, o, p, hash);
917 if (r < 0)
918 return r;
919
920 /* The linking might have altered the window, so let's
921 * refresh our pointer */
922 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
923 if (r < 0)
924 return r;
925
926#ifdef HAVE_GCRYPT
927 r = journal_file_hmac_put_object(f, OBJECT_FIELD, o, p);
928 if (r < 0)
929 return r;
930#endif
931
932 if (ret)
933 *ret = o;
934
935 if (offset)
936 *offset = p;
937
938 return 0;
939}
940
48496df6
LP
941static int journal_file_append_data(
942 JournalFile *f,
943 const void *data, uint64_t size,
944 Object **ret, uint64_t *offset) {
945
de190aef
LP
946 uint64_t hash, p;
947 uint64_t osize;
948 Object *o;
949 int r;
807e17f0 950 bool compressed = false;
3c1668da 951 const void *eq;
de190aef
LP
952
953 assert(f);
954 assert(data || size == 0);
955
956 hash = hash64(data, size);
957
958 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
959 if (r < 0)
960 return r;
961 else if (r > 0) {
962
963 if (ret)
964 *ret = o;
965
966 if (offset)
967 *offset = p;
968
969 return 0;
970 }
971
972 osize = offsetof(Object, data.payload) + size;
973 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
974 if (r < 0)
975 return r;
976
cec736d2 977 o->data.hash = htole64(hash);
807e17f0
LP
978
979#ifdef HAVE_XZ
980 if (f->compress &&
981 size >= COMPRESSION_SIZE_THRESHOLD) {
982 uint64_t rsize;
983
984 compressed = compress_blob(data, size, o->data.payload, &rsize);
985
986 if (compressed) {
987 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
988 o->object.flags |= OBJECT_COMPRESSED;
989
507f22bd 990 log_debug("Compressed data object %"PRIu64" -> %"PRIu64, size, rsize);
807e17f0
LP
991 }
992 }
993#endif
994
64825d3c 995 if (!compressed && size > 0)
807e17f0 996 memcpy(o->data.payload, data, size);
cec736d2 997
de190aef 998 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
999 if (r < 0)
1000 return r;
1001
48496df6
LP
1002 /* The linking might have altered the window, so let's
1003 * refresh our pointer */
1004 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1005 if (r < 0)
1006 return r;
1007
3c1668da
LP
1008 eq = memchr(data, '=', size);
1009 if (eq && eq > data) {
1010 uint64_t fp;
1011 Object *fo;
1012
1013 /* Create field object ... */
1014 r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp);
1015 if (r < 0)
1016 return r;
1017
1018 /* ... and link it in. */
1019 o->data.next_field_offset = fo->field.head_data_offset;
1020 fo->field.head_data_offset = le64toh(p);
1021 }
1022
5996c7c2
LP
1023#ifdef HAVE_GCRYPT
1024 r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
1025 if (r < 0)
1026 return r;
1027#endif
1028
cec736d2
LP
1029 if (ret)
1030 *ret = o;
1031
1032 if (offset)
de190aef 1033 *offset = p;
cec736d2
LP
1034
1035 return 0;
1036}
1037
1038uint64_t journal_file_entry_n_items(Object *o) {
1039 assert(o);
b588975f
LP
1040
1041 if (o->object.type != OBJECT_ENTRY)
1042 return 0;
cec736d2
LP
1043
1044 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
1045}
1046
0284adc6 1047uint64_t journal_file_entry_array_n_items(Object *o) {
de190aef 1048 assert(o);
b588975f
LP
1049
1050 if (o->object.type != OBJECT_ENTRY_ARRAY)
1051 return 0;
de190aef
LP
1052
1053 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
1054}
1055
fb9a24b6
LP
1056uint64_t journal_file_hash_table_n_items(Object *o) {
1057 assert(o);
b588975f
LP
1058
1059 if (o->object.type != OBJECT_DATA_HASH_TABLE &&
1060 o->object.type != OBJECT_FIELD_HASH_TABLE)
1061 return 0;
fb9a24b6
LP
1062
1063 return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
1064}
1065
de190aef 1066static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
1067 le64_t *first,
1068 le64_t *idx,
de190aef 1069 uint64_t p) {
cec736d2 1070 int r;
de190aef
LP
1071 uint64_t n = 0, ap = 0, q, i, a, hidx;
1072 Object *o;
1073
cec736d2 1074 assert(f);
de190aef
LP
1075 assert(first);
1076 assert(idx);
1077 assert(p > 0);
cec736d2 1078
de190aef
LP
1079 a = le64toh(*first);
1080 i = hidx = le64toh(*idx);
1081 while (a > 0) {
1082
1083 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1084 if (r < 0)
1085 return r;
cec736d2 1086
de190aef
LP
1087 n = journal_file_entry_array_n_items(o);
1088 if (i < n) {
1089 o->entry_array.items[i] = htole64(p);
1090 *idx = htole64(hidx + 1);
1091 return 0;
1092 }
cec736d2 1093
de190aef
LP
1094 i -= n;
1095 ap = a;
1096 a = le64toh(o->entry_array.next_entry_array_offset);
1097 }
1098
1099 if (hidx > n)
1100 n = (hidx+1) * 2;
1101 else
1102 n = n * 2;
1103
1104 if (n < 4)
1105 n = 4;
1106
1107 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
1108 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
1109 &o, &q);
cec736d2
LP
1110 if (r < 0)
1111 return r;
1112
feb12d3e 1113#ifdef HAVE_GCRYPT
5996c7c2 1114 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
b0af6f41
LP
1115 if (r < 0)
1116 return r;
feb12d3e 1117#endif
b0af6f41 1118
de190aef 1119 o->entry_array.items[i] = htole64(p);
cec736d2 1120
de190aef 1121 if (ap == 0)
7be3aa17 1122 *first = htole64(q);
cec736d2 1123 else {
de190aef 1124 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
1125 if (r < 0)
1126 return r;
1127
de190aef
LP
1128 o->entry_array.next_entry_array_offset = htole64(q);
1129 }
cec736d2 1130
2dee23eb
LP
1131 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1132 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
1133
de190aef
LP
1134 *idx = htole64(hidx + 1);
1135
1136 return 0;
1137}
cec736d2 1138
de190aef 1139static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
1140 le64_t *extra,
1141 le64_t *first,
1142 le64_t *idx,
de190aef
LP
1143 uint64_t p) {
1144
1145 int r;
1146
1147 assert(f);
1148 assert(extra);
1149 assert(first);
1150 assert(idx);
1151 assert(p > 0);
1152
1153 if (*idx == 0)
1154 *extra = htole64(p);
1155 else {
4fd052ae 1156 le64_t i;
de190aef 1157
7be3aa17 1158 i = htole64(le64toh(*idx) - 1);
de190aef
LP
1159 r = link_entry_into_array(f, first, &i, p);
1160 if (r < 0)
1161 return r;
cec736d2
LP
1162 }
1163
de190aef
LP
1164 *idx = htole64(le64toh(*idx) + 1);
1165 return 0;
1166}
1167
1168static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
1169 uint64_t p;
1170 int r;
1171 assert(f);
1172 assert(o);
1173 assert(offset > 0);
1174
1175 p = le64toh(o->entry.items[i].object_offset);
1176 if (p == 0)
1177 return -EINVAL;
1178
1179 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
1180 if (r < 0)
1181 return r;
1182
de190aef
LP
1183 return link_entry_into_array_plus_one(f,
1184 &o->data.entry_offset,
1185 &o->data.entry_array_offset,
1186 &o->data.n_entries,
1187 offset);
cec736d2
LP
1188}
1189
1190static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 1191 uint64_t n, i;
cec736d2
LP
1192 int r;
1193
1194 assert(f);
1195 assert(o);
1196 assert(offset > 0);
b588975f
LP
1197
1198 if (o->object.type != OBJECT_ENTRY)
1199 return -EINVAL;
cec736d2 1200
b788cc23
LP
1201 __sync_synchronize();
1202
cec736d2 1203 /* Link up the entry itself */
de190aef
LP
1204 r = link_entry_into_array(f,
1205 &f->header->entry_array_offset,
1206 &f->header->n_entries,
1207 offset);
1208 if (r < 0)
1209 return r;
cec736d2 1210
507f22bd 1211 /* log_debug("=> %s seqnr=%"PRIu64" n_entries=%"PRIu64, f->path, o->entry.seqnum, f->header->n_entries); */
cec736d2 1212
de190aef 1213 if (f->header->head_entry_realtime == 0)
0ac38b70 1214 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 1215
0ac38b70 1216 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
1217 f->header->tail_entry_monotonic = o->entry.monotonic;
1218
1219 f->tail_entry_monotonic_valid = true;
cec736d2
LP
1220
1221 /* Link up the items */
1222 n = journal_file_entry_n_items(o);
1223 for (i = 0; i < n; i++) {
1224 r = journal_file_link_entry_item(f, o, offset, i);
1225 if (r < 0)
1226 return r;
1227 }
1228
cec736d2
LP
1229 return 0;
1230}
1231
1232static int journal_file_append_entry_internal(
1233 JournalFile *f,
1234 const dual_timestamp *ts,
1235 uint64_t xor_hash,
1236 const EntryItem items[], unsigned n_items,
de190aef 1237 uint64_t *seqnum,
cec736d2
LP
1238 Object **ret, uint64_t *offset) {
1239 uint64_t np;
1240 uint64_t osize;
1241 Object *o;
1242 int r;
1243
1244 assert(f);
1245 assert(items || n_items == 0);
de190aef 1246 assert(ts);
cec736d2
LP
1247
1248 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1249
de190aef 1250 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
1251 if (r < 0)
1252 return r;
1253
d98cc1f2 1254 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
cec736d2 1255 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
1256 o->entry.realtime = htole64(ts->realtime);
1257 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
1258 o->entry.xor_hash = htole64(xor_hash);
1259 o->entry.boot_id = f->header->boot_id;
1260
feb12d3e 1261#ifdef HAVE_GCRYPT
5996c7c2 1262 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
b0af6f41
LP
1263 if (r < 0)
1264 return r;
feb12d3e 1265#endif
b0af6f41 1266
cec736d2
LP
1267 r = journal_file_link_entry(f, o, np);
1268 if (r < 0)
1269 return r;
1270
1271 if (ret)
1272 *ret = o;
1273
1274 if (offset)
1275 *offset = np;
1276
1277 return 0;
1278}
1279
cf244689 1280void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
1281 assert(f);
1282
1283 /* inotify() does not receive IN_MODIFY events from file
1284 * accesses done via mmap(). After each access we hence
1285 * trigger IN_MODIFY by truncating the journal file to its
1286 * current size which triggers IN_MODIFY. */
1287
bc85bfee
LP
1288 __sync_synchronize();
1289
50f20cfd 1290 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
c5315881 1291 log_error("Failed to truncate file to its own size: %m");
50f20cfd
LP
1292}
1293
1f2da9ec
LP
1294static int entry_item_cmp(const void *_a, const void *_b) {
1295 const EntryItem *a = _a, *b = _b;
1296
1297 if (le64toh(a->object_offset) < le64toh(b->object_offset))
1298 return -1;
1299 if (le64toh(a->object_offset) > le64toh(b->object_offset))
1300 return 1;
1301 return 0;
1302}
1303
de190aef 1304int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1305 unsigned i;
1306 EntryItem *items;
1307 int r;
1308 uint64_t xor_hash = 0;
de190aef 1309 struct dual_timestamp _ts;
cec736d2
LP
1310
1311 assert(f);
1312 assert(iovec || n_iovec == 0);
1313
de190aef
LP
1314 if (!ts) {
1315 dual_timestamp_get(&_ts);
1316 ts = &_ts;
1317 }
1318
1319 if (f->tail_entry_monotonic_valid &&
1320 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1321 return -EINVAL;
1322
feb12d3e 1323#ifdef HAVE_GCRYPT
7560fffc
LP
1324 r = journal_file_maybe_append_tag(f, ts->realtime);
1325 if (r < 0)
1326 return r;
feb12d3e 1327#endif
7560fffc 1328
64825d3c 1329 /* alloca() can't take 0, hence let's allocate at least one */
9607d947 1330 items = alloca(sizeof(EntryItem) * MAX(1u, n_iovec));
cec736d2
LP
1331
1332 for (i = 0; i < n_iovec; i++) {
1333 uint64_t p;
1334 Object *o;
1335
1336 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1337 if (r < 0)
cf244689 1338 return r;
cec736d2
LP
1339
1340 xor_hash ^= le64toh(o->data.hash);
1341 items[i].object_offset = htole64(p);
de7b95cd 1342 items[i].hash = o->data.hash;
cec736d2
LP
1343 }
1344
1f2da9ec
LP
1345 /* Order by the position on disk, in order to improve seek
1346 * times for rotating media. */
7ff7394d 1347 qsort_safe(items, n_iovec, sizeof(EntryItem), entry_item_cmp);
1f2da9ec 1348
de190aef 1349 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1350
50f20cfd
LP
1351 journal_file_post_change(f);
1352
cec736d2
LP
1353 return r;
1354}
1355
a4bcff5b
LP
1356typedef struct ChainCacheItem {
1357 uint64_t first; /* the array at the begin of the chain */
1358 uint64_t array; /* the cached array */
1359 uint64_t begin; /* the first item in the cached array */
1360 uint64_t total; /* the total number of items in all arrays before this one in the chain */
1361} ChainCacheItem;
1362
1363static void chain_cache_put(
1364 Hashmap *h,
1365 ChainCacheItem *ci,
1366 uint64_t first,
1367 uint64_t array,
1368 uint64_t begin,
1369 uint64_t total) {
1370
1371 if (!ci) {
34741aa3
LP
1372 /* If the chain item to cache for this chain is the
1373 * first one it's not worth caching anything */
1374 if (array == first)
1375 return;
1376
a4bcff5b
LP
1377 if (hashmap_size(h) >= CHAIN_CACHE_MAX)
1378 ci = hashmap_steal_first(h);
1379 else {
1380 ci = new(ChainCacheItem, 1);
1381 if (!ci)
1382 return;
1383 }
1384
1385 ci->first = first;
1386
1387 if (hashmap_put(h, &ci->first, ci) < 0) {
1388 free(ci);
1389 return;
1390 }
1391 } else
1392 assert(ci->first == first);
1393
1394 ci->array = array;
1395 ci->begin = begin;
1396 ci->total = total;
1397}
1398
de190aef
LP
1399static int generic_array_get(JournalFile *f,
1400 uint64_t first,
1401 uint64_t i,
1402 Object **ret, uint64_t *offset) {
1403
cec736d2 1404 Object *o;
a4bcff5b 1405 uint64_t p = 0, a, t = 0;
cec736d2 1406 int r;
a4bcff5b 1407 ChainCacheItem *ci;
cec736d2
LP
1408
1409 assert(f);
1410
de190aef 1411 a = first;
a4bcff5b
LP
1412
1413 /* Try the chain cache first */
1414 ci = hashmap_get(f->chain_cache, &first);
1415 if (ci && i > ci->total) {
1416 a = ci->array;
1417 i -= ci->total;
1418 t = ci->total;
1419 }
1420
de190aef 1421 while (a > 0) {
a4bcff5b 1422 uint64_t k;
cec736d2 1423
de190aef
LP
1424 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1425 if (r < 0)
1426 return r;
cec736d2 1427
a4bcff5b
LP
1428 k = journal_file_entry_array_n_items(o);
1429 if (i < k) {
de190aef 1430 p = le64toh(o->entry_array.items[i]);
a4bcff5b 1431 goto found;
cec736d2
LP
1432 }
1433
a4bcff5b
LP
1434 i -= k;
1435 t += k;
de190aef
LP
1436 a = le64toh(o->entry_array.next_entry_array_offset);
1437 }
1438
a4bcff5b
LP
1439 return 0;
1440
1441found:
1442 /* Let's cache this item for the next invocation */
1443 chain_cache_put(f->chain_cache, ci, first, a, o->entry_array.items[0], t);
de190aef
LP
1444
1445 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1446 if (r < 0)
1447 return r;
1448
1449 if (ret)
1450 *ret = o;
1451
1452 if (offset)
1453 *offset = p;
1454
1455 return 1;
1456}
1457
1458static int generic_array_get_plus_one(JournalFile *f,
1459 uint64_t extra,
1460 uint64_t first,
1461 uint64_t i,
1462 Object **ret, uint64_t *offset) {
1463
1464 Object *o;
1465
1466 assert(f);
1467
1468 if (i == 0) {
1469 int r;
1470
1471 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1472 if (r < 0)
1473 return r;
1474
de190aef
LP
1475 if (ret)
1476 *ret = o;
cec736d2 1477
de190aef
LP
1478 if (offset)
1479 *offset = extra;
cec736d2 1480
de190aef 1481 return 1;
cec736d2
LP
1482 }
1483
de190aef
LP
1484 return generic_array_get(f, first, i-1, ret, offset);
1485}
cec736d2 1486
de190aef
LP
1487enum {
1488 TEST_FOUND,
1489 TEST_LEFT,
1490 TEST_RIGHT
1491};
cec736d2 1492
de190aef
LP
1493static int generic_array_bisect(JournalFile *f,
1494 uint64_t first,
1495 uint64_t n,
1496 uint64_t needle,
1497 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1498 direction_t direction,
1499 Object **ret,
1500 uint64_t *offset,
1501 uint64_t *idx) {
1502
1503 uint64_t a, p, t = 0, i = 0, last_p = 0;
1504 bool subtract_one = false;
1505 Object *o, *array = NULL;
1506 int r;
a4bcff5b 1507 ChainCacheItem *ci;
cec736d2 1508
de190aef
LP
1509 assert(f);
1510 assert(test_object);
cec736d2 1511
a4bcff5b 1512 /* Start with the first array in the chain */
de190aef 1513 a = first;
a4bcff5b
LP
1514
1515 ci = hashmap_get(f->chain_cache, &first);
1516 if (ci && n > ci->total) {
1517 /* Ah, we have iterated this bisection array chain
1518 * previously! Let's see if we can skip ahead in the
1519 * chain, as far as the last time. But we can't jump
1520 * backwards in the chain, so let's check that
1521 * first. */
1522
1523 r = test_object(f, ci->begin, needle);
1524 if (r < 0)
1525 return r;
1526
1527 if (r == TEST_LEFT) {
1528 /* OK, what we are looking for is right of th
1529 * begin of this EntryArray, so let's jump
1530 * straight to previously cached array in the
1531 * chain */
1532
1533 a = ci->array;
1534 n -= ci->total;
1535 t = ci->total;
1536 }
1537 }
1538
de190aef
LP
1539 while (a > 0) {
1540 uint64_t left, right, k, lp;
1541
1542 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1543 if (r < 0)
1544 return r;
1545
de190aef
LP
1546 k = journal_file_entry_array_n_items(array);
1547 right = MIN(k, n);
1548 if (right <= 0)
1549 return 0;
cec736d2 1550
de190aef
LP
1551 i = right - 1;
1552 lp = p = le64toh(array->entry_array.items[i]);
1553 if (p <= 0)
1554 return -EBADMSG;
cec736d2 1555
de190aef
LP
1556 r = test_object(f, p, needle);
1557 if (r < 0)
1558 return r;
cec736d2 1559
de190aef
LP
1560 if (r == TEST_FOUND)
1561 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1562
1563 if (r == TEST_RIGHT) {
1564 left = 0;
1565 right -= 1;
1566 for (;;) {
1567 if (left == right) {
1568 if (direction == DIRECTION_UP)
1569 subtract_one = true;
1570
1571 i = left;
1572 goto found;
1573 }
1574
1575 assert(left < right);
1576
1577 i = (left + right) / 2;
1578 p = le64toh(array->entry_array.items[i]);
1579 if (p <= 0)
1580 return -EBADMSG;
1581
1582 r = test_object(f, p, needle);
1583 if (r < 0)
1584 return r;
cec736d2 1585
de190aef
LP
1586 if (r == TEST_FOUND)
1587 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1588
1589 if (r == TEST_RIGHT)
1590 right = i;
1591 else
1592 left = i + 1;
1593 }
1594 }
1595
cbdca852
LP
1596 if (k > n) {
1597 if (direction == DIRECTION_UP) {
1598 i = n;
1599 subtract_one = true;
1600 goto found;
1601 }
1602
cec736d2 1603 return 0;
cbdca852 1604 }
cec736d2 1605
de190aef
LP
1606 last_p = lp;
1607
1608 n -= k;
1609 t += k;
1610 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1611 }
1612
1613 return 0;
de190aef
LP
1614
1615found:
1616 if (subtract_one && t == 0 && i == 0)
1617 return 0;
1618
a4bcff5b
LP
1619 /* Let's cache this item for the next invocation */
1620 chain_cache_put(f->chain_cache, ci, first, a, array->entry_array.items[0], t);
1621
de190aef
LP
1622 if (subtract_one && i == 0)
1623 p = last_p;
1624 else if (subtract_one)
1625 p = le64toh(array->entry_array.items[i-1]);
1626 else
1627 p = le64toh(array->entry_array.items[i]);
1628
1629 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1630 if (r < 0)
1631 return r;
1632
1633 if (ret)
1634 *ret = o;
1635
1636 if (offset)
1637 *offset = p;
1638
1639 if (idx)
cbdca852 1640 *idx = t + i + (subtract_one ? -1 : 0);
de190aef
LP
1641
1642 return 1;
cec736d2
LP
1643}
1644
de190aef
LP
1645static int generic_array_bisect_plus_one(JournalFile *f,
1646 uint64_t extra,
1647 uint64_t first,
1648 uint64_t n,
1649 uint64_t needle,
1650 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1651 direction_t direction,
1652 Object **ret,
1653 uint64_t *offset,
1654 uint64_t *idx) {
1655
cec736d2 1656 int r;
cbdca852
LP
1657 bool step_back = false;
1658 Object *o;
cec736d2
LP
1659
1660 assert(f);
de190aef 1661 assert(test_object);
cec736d2 1662
de190aef
LP
1663 if (n <= 0)
1664 return 0;
cec736d2 1665
de190aef
LP
1666 /* This bisects the array in object 'first', but first checks
1667 * an extra */
de190aef
LP
1668 r = test_object(f, extra, needle);
1669 if (r < 0)
1670 return r;
a536e261
LP
1671
1672 if (r == TEST_FOUND)
1673 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1674
cbdca852
LP
1675 /* if we are looking with DIRECTION_UP then we need to first
1676 see if in the actual array there is a matching entry, and
1677 return the last one of that. But if there isn't any we need
1678 to return this one. Hence remember this, and return it
1679 below. */
1680 if (r == TEST_LEFT)
1681 step_back = direction == DIRECTION_UP;
de190aef 1682
cbdca852
LP
1683 if (r == TEST_RIGHT) {
1684 if (direction == DIRECTION_DOWN)
1685 goto found;
1686 else
1687 return 0;
a536e261 1688 }
cec736d2 1689
de190aef
LP
1690 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1691
cbdca852
LP
1692 if (r == 0 && step_back)
1693 goto found;
1694
ecf68b1d 1695 if (r > 0 && idx)
de190aef
LP
1696 (*idx) ++;
1697
1698 return r;
cbdca852
LP
1699
1700found:
1701 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1702 if (r < 0)
1703 return r;
1704
1705 if (ret)
1706 *ret = o;
1707
1708 if (offset)
1709 *offset = extra;
1710
1711 if (idx)
1712 *idx = 0;
1713
1714 return 1;
1715}
1716
44a6b1b6 1717_pure_ static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
cbdca852
LP
1718 assert(f);
1719 assert(p > 0);
1720
1721 if (p == needle)
1722 return TEST_FOUND;
1723 else if (p < needle)
1724 return TEST_LEFT;
1725 else
1726 return TEST_RIGHT;
1727}
1728
1729int journal_file_move_to_entry_by_offset(
1730 JournalFile *f,
1731 uint64_t p,
1732 direction_t direction,
1733 Object **ret,
1734 uint64_t *offset) {
1735
1736 return generic_array_bisect(f,
1737 le64toh(f->header->entry_array_offset),
1738 le64toh(f->header->n_entries),
1739 p,
1740 test_object_offset,
1741 direction,
1742 ret, offset, NULL);
de190aef
LP
1743}
1744
cbdca852 1745
de190aef
LP
1746static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1747 Object *o;
1748 int r;
1749
1750 assert(f);
1751 assert(p > 0);
1752
1753 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1754 if (r < 0)
1755 return r;
1756
de190aef
LP
1757 if (le64toh(o->entry.seqnum) == needle)
1758 return TEST_FOUND;
1759 else if (le64toh(o->entry.seqnum) < needle)
1760 return TEST_LEFT;
1761 else
1762 return TEST_RIGHT;
1763}
cec736d2 1764
de190aef
LP
1765int journal_file_move_to_entry_by_seqnum(
1766 JournalFile *f,
1767 uint64_t seqnum,
1768 direction_t direction,
1769 Object **ret,
1770 uint64_t *offset) {
1771
1772 return generic_array_bisect(f,
1773 le64toh(f->header->entry_array_offset),
1774 le64toh(f->header->n_entries),
1775 seqnum,
1776 test_object_seqnum,
1777 direction,
1778 ret, offset, NULL);
1779}
cec736d2 1780
de190aef
LP
1781static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1782 Object *o;
1783 int r;
1784
1785 assert(f);
1786 assert(p > 0);
1787
1788 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1789 if (r < 0)
1790 return r;
1791
1792 if (le64toh(o->entry.realtime) == needle)
1793 return TEST_FOUND;
1794 else if (le64toh(o->entry.realtime) < needle)
1795 return TEST_LEFT;
1796 else
1797 return TEST_RIGHT;
cec736d2
LP
1798}
1799
de190aef
LP
1800int journal_file_move_to_entry_by_realtime(
1801 JournalFile *f,
1802 uint64_t realtime,
1803 direction_t direction,
1804 Object **ret,
1805 uint64_t *offset) {
1806
1807 return generic_array_bisect(f,
1808 le64toh(f->header->entry_array_offset),
1809 le64toh(f->header->n_entries),
1810 realtime,
1811 test_object_realtime,
1812 direction,
1813 ret, offset, NULL);
1814}
1815
1816static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1817 Object *o;
1818 int r;
1819
1820 assert(f);
1821 assert(p > 0);
1822
1823 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1824 if (r < 0)
1825 return r;
1826
1827 if (le64toh(o->entry.monotonic) == needle)
1828 return TEST_FOUND;
1829 else if (le64toh(o->entry.monotonic) < needle)
1830 return TEST_LEFT;
1831 else
1832 return TEST_RIGHT;
1833}
1834
47838ab3
ZJS
1835static inline int find_data_object_by_boot_id(
1836 JournalFile *f,
1837 sd_id128_t boot_id,
1838 Object **o,
1839 uint64_t *b) {
1840 char t[sizeof("_BOOT_ID=")-1 + 32 + 1] = "_BOOT_ID=";
1841
1842 sd_id128_to_string(boot_id, t + 9);
1843 return journal_file_find_data_object(f, t, sizeof(t) - 1, o, b);
1844}
1845
de190aef
LP
1846int journal_file_move_to_entry_by_monotonic(
1847 JournalFile *f,
1848 sd_id128_t boot_id,
1849 uint64_t monotonic,
1850 direction_t direction,
1851 Object **ret,
1852 uint64_t *offset) {
1853
de190aef
LP
1854 Object *o;
1855 int r;
1856
cbdca852 1857 assert(f);
de190aef 1858
47838ab3 1859 r = find_data_object_by_boot_id(f, boot_id, &o, NULL);
de190aef
LP
1860 if (r < 0)
1861 return r;
cbdca852 1862 if (r == 0)
de190aef
LP
1863 return -ENOENT;
1864
1865 return generic_array_bisect_plus_one(f,
1866 le64toh(o->data.entry_offset),
1867 le64toh(o->data.entry_array_offset),
1868 le64toh(o->data.n_entries),
1869 monotonic,
1870 test_object_monotonic,
1871 direction,
1872 ret, offset, NULL);
1873}
1874
de190aef
LP
1875int journal_file_next_entry(
1876 JournalFile *f,
1877 Object *o, uint64_t p,
1878 direction_t direction,
1879 Object **ret, uint64_t *offset) {
1880
1881 uint64_t i, n;
cec736d2
LP
1882 int r;
1883
1884 assert(f);
de190aef
LP
1885 assert(p > 0 || !o);
1886
1887 n = le64toh(f->header->n_entries);
1888 if (n <= 0)
1889 return 0;
cec736d2
LP
1890
1891 if (!o)
de190aef 1892 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1893 else {
de190aef 1894 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1895 return -EINVAL;
1896
de190aef
LP
1897 r = generic_array_bisect(f,
1898 le64toh(f->header->entry_array_offset),
1899 le64toh(f->header->n_entries),
1900 p,
1901 test_object_offset,
1902 DIRECTION_DOWN,
1903 NULL, NULL,
1904 &i);
1905 if (r <= 0)
1906 return r;
1907
1908 if (direction == DIRECTION_DOWN) {
1909 if (i >= n - 1)
1910 return 0;
1911
1912 i++;
1913 } else {
1914 if (i <= 0)
1915 return 0;
1916
1917 i--;
1918 }
cec736d2
LP
1919 }
1920
de190aef
LP
1921 /* And jump to it */
1922 return generic_array_get(f,
1923 le64toh(f->header->entry_array_offset),
1924 i,
1925 ret, offset);
1926}
cec736d2 1927
de190aef
LP
1928int journal_file_skip_entry(
1929 JournalFile *f,
1930 Object *o, uint64_t p,
1931 int64_t skip,
1932 Object **ret, uint64_t *offset) {
1933
1934 uint64_t i, n;
1935 int r;
1936
1937 assert(f);
1938 assert(o);
1939 assert(p > 0);
1940
1941 if (o->object.type != OBJECT_ENTRY)
1942 return -EINVAL;
1943
1944 r = generic_array_bisect(f,
1945 le64toh(f->header->entry_array_offset),
1946 le64toh(f->header->n_entries),
1947 p,
1948 test_object_offset,
1949 DIRECTION_DOWN,
1950 NULL, NULL,
1951 &i);
1952 if (r <= 0)
cec736d2
LP
1953 return r;
1954
de190aef
LP
1955 /* Calculate new index */
1956 if (skip < 0) {
1957 if ((uint64_t) -skip >= i)
1958 i = 0;
1959 else
1960 i = i - (uint64_t) -skip;
1961 } else
1962 i += (uint64_t) skip;
cec736d2 1963
de190aef
LP
1964 n = le64toh(f->header->n_entries);
1965 if (n <= 0)
1966 return -EBADMSG;
cec736d2 1967
de190aef
LP
1968 if (i >= n)
1969 i = n-1;
1970
1971 return generic_array_get(f,
1972 le64toh(f->header->entry_array_offset),
1973 i,
1974 ret, offset);
cec736d2
LP
1975}
1976
de190aef
LP
1977int journal_file_next_entry_for_data(
1978 JournalFile *f,
1979 Object *o, uint64_t p,
1980 uint64_t data_offset,
1981 direction_t direction,
1982 Object **ret, uint64_t *offset) {
1983
1984 uint64_t n, i;
cec736d2 1985 int r;
de190aef 1986 Object *d;
cec736d2
LP
1987
1988 assert(f);
de190aef 1989 assert(p > 0 || !o);
cec736d2 1990
de190aef 1991 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 1992 if (r < 0)
de190aef 1993 return r;
cec736d2 1994
de190aef
LP
1995 n = le64toh(d->data.n_entries);
1996 if (n <= 0)
1997 return n;
cec736d2 1998
de190aef
LP
1999 if (!o)
2000 i = direction == DIRECTION_DOWN ? 0 : n - 1;
2001 else {
2002 if (o->object.type != OBJECT_ENTRY)
2003 return -EINVAL;
cec736d2 2004
de190aef
LP
2005 r = generic_array_bisect_plus_one(f,
2006 le64toh(d->data.entry_offset),
2007 le64toh(d->data.entry_array_offset),
2008 le64toh(d->data.n_entries),
2009 p,
2010 test_object_offset,
2011 DIRECTION_DOWN,
2012 NULL, NULL,
2013 &i);
2014
2015 if (r <= 0)
cec736d2
LP
2016 return r;
2017
de190aef
LP
2018 if (direction == DIRECTION_DOWN) {
2019 if (i >= n - 1)
2020 return 0;
cec736d2 2021
de190aef
LP
2022 i++;
2023 } else {
2024 if (i <= 0)
2025 return 0;
cec736d2 2026
de190aef
LP
2027 i--;
2028 }
cec736d2 2029
de190aef 2030 }
cec736d2 2031
de190aef
LP
2032 return generic_array_get_plus_one(f,
2033 le64toh(d->data.entry_offset),
2034 le64toh(d->data.entry_array_offset),
2035 i,
2036 ret, offset);
2037}
cec736d2 2038
cbdca852
LP
2039int journal_file_move_to_entry_by_offset_for_data(
2040 JournalFile *f,
2041 uint64_t data_offset,
2042 uint64_t p,
2043 direction_t direction,
2044 Object **ret, uint64_t *offset) {
2045
2046 int r;
2047 Object *d;
2048
2049 assert(f);
2050
2051 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2052 if (r < 0)
2053 return r;
2054
2055 return generic_array_bisect_plus_one(f,
2056 le64toh(d->data.entry_offset),
2057 le64toh(d->data.entry_array_offset),
2058 le64toh(d->data.n_entries),
2059 p,
2060 test_object_offset,
2061 direction,
2062 ret, offset, NULL);
2063}
2064
2065int journal_file_move_to_entry_by_monotonic_for_data(
2066 JournalFile *f,
2067 uint64_t data_offset,
2068 sd_id128_t boot_id,
2069 uint64_t monotonic,
2070 direction_t direction,
2071 Object **ret, uint64_t *offset) {
2072
cbdca852
LP
2073 Object *o, *d;
2074 int r;
2075 uint64_t b, z;
2076
2077 assert(f);
2078
2079 /* First, seek by time */
47838ab3 2080 r = find_data_object_by_boot_id(f, boot_id, &o, &b);
cbdca852
LP
2081 if (r < 0)
2082 return r;
2083 if (r == 0)
2084 return -ENOENT;
2085
2086 r = generic_array_bisect_plus_one(f,
2087 le64toh(o->data.entry_offset),
2088 le64toh(o->data.entry_array_offset),
2089 le64toh(o->data.n_entries),
2090 monotonic,
2091 test_object_monotonic,
2092 direction,
2093 NULL, &z, NULL);
2094 if (r <= 0)
2095 return r;
2096
2097 /* And now, continue seeking until we find an entry that
2098 * exists in both bisection arrays */
2099
2100 for (;;) {
2101 Object *qo;
2102 uint64_t p, q;
2103
2104 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2105 if (r < 0)
2106 return r;
2107
2108 r = generic_array_bisect_plus_one(f,
2109 le64toh(d->data.entry_offset),
2110 le64toh(d->data.entry_array_offset),
2111 le64toh(d->data.n_entries),
2112 z,
2113 test_object_offset,
2114 direction,
2115 NULL, &p, NULL);
2116 if (r <= 0)
2117 return r;
2118
2119 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
2120 if (r < 0)
2121 return r;
2122
2123 r = generic_array_bisect_plus_one(f,
2124 le64toh(o->data.entry_offset),
2125 le64toh(o->data.entry_array_offset),
2126 le64toh(o->data.n_entries),
2127 p,
2128 test_object_offset,
2129 direction,
2130 &qo, &q, NULL);
2131
2132 if (r <= 0)
2133 return r;
2134
2135 if (p == q) {
2136 if (ret)
2137 *ret = qo;
2138 if (offset)
2139 *offset = q;
2140
2141 return 1;
2142 }
2143
2144 z = q;
2145 }
2146
2147 return 0;
2148}
2149
de190aef
LP
2150int journal_file_move_to_entry_by_seqnum_for_data(
2151 JournalFile *f,
2152 uint64_t data_offset,
2153 uint64_t seqnum,
2154 direction_t direction,
2155 Object **ret, uint64_t *offset) {
cec736d2 2156
de190aef
LP
2157 Object *d;
2158 int r;
cec736d2 2159
91a31dde
LP
2160 assert(f);
2161
de190aef 2162 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 2163 if (r < 0)
de190aef 2164 return r;
cec736d2 2165
de190aef
LP
2166 return generic_array_bisect_plus_one(f,
2167 le64toh(d->data.entry_offset),
2168 le64toh(d->data.entry_array_offset),
2169 le64toh(d->data.n_entries),
2170 seqnum,
2171 test_object_seqnum,
2172 direction,
2173 ret, offset, NULL);
2174}
cec736d2 2175
de190aef
LP
2176int journal_file_move_to_entry_by_realtime_for_data(
2177 JournalFile *f,
2178 uint64_t data_offset,
2179 uint64_t realtime,
2180 direction_t direction,
2181 Object **ret, uint64_t *offset) {
2182
2183 Object *d;
2184 int r;
2185
91a31dde
LP
2186 assert(f);
2187
de190aef 2188 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 2189 if (r < 0)
de190aef
LP
2190 return r;
2191
2192 return generic_array_bisect_plus_one(f,
2193 le64toh(d->data.entry_offset),
2194 le64toh(d->data.entry_array_offset),
2195 le64toh(d->data.n_entries),
2196 realtime,
2197 test_object_realtime,
2198 direction,
2199 ret, offset, NULL);
cec736d2
LP
2200}
2201
0284adc6 2202void journal_file_dump(JournalFile *f) {
7560fffc 2203 Object *o;
7560fffc 2204 int r;
0284adc6 2205 uint64_t p;
7560fffc
LP
2206
2207 assert(f);
2208
0284adc6 2209 journal_file_print_header(f);
7560fffc 2210
0284adc6
LP
2211 p = le64toh(f->header->header_size);
2212 while (p != 0) {
2213 r = journal_file_move_to_object(f, -1, p, &o);
2214 if (r < 0)
2215 goto fail;
7560fffc 2216
0284adc6 2217 switch (o->object.type) {
d98cc1f2 2218
0284adc6
LP
2219 case OBJECT_UNUSED:
2220 printf("Type: OBJECT_UNUSED\n");
2221 break;
d98cc1f2 2222
0284adc6
LP
2223 case OBJECT_DATA:
2224 printf("Type: OBJECT_DATA\n");
2225 break;
7560fffc 2226
3c1668da
LP
2227 case OBJECT_FIELD:
2228 printf("Type: OBJECT_FIELD\n");
2229 break;
2230
0284adc6 2231 case OBJECT_ENTRY:
507f22bd
ZJS
2232 printf("Type: OBJECT_ENTRY seqnum=%"PRIu64" monotonic=%"PRIu64" realtime=%"PRIu64"\n",
2233 le64toh(o->entry.seqnum),
2234 le64toh(o->entry.monotonic),
2235 le64toh(o->entry.realtime));
0284adc6 2236 break;
7560fffc 2237
0284adc6
LP
2238 case OBJECT_FIELD_HASH_TABLE:
2239 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
2240 break;
7560fffc 2241
0284adc6
LP
2242 case OBJECT_DATA_HASH_TABLE:
2243 printf("Type: OBJECT_DATA_HASH_TABLE\n");
2244 break;
7560fffc 2245
0284adc6
LP
2246 case OBJECT_ENTRY_ARRAY:
2247 printf("Type: OBJECT_ENTRY_ARRAY\n");
2248 break;
7560fffc 2249
0284adc6 2250 case OBJECT_TAG:
507f22bd
ZJS
2251 printf("Type: OBJECT_TAG seqnum=%"PRIu64" epoch=%"PRIu64"\n",
2252 le64toh(o->tag.seqnum),
2253 le64toh(o->tag.epoch));
0284adc6 2254 break;
3c1668da
LP
2255
2256 default:
2257 printf("Type: unknown (%u)\n", o->object.type);
2258 break;
0284adc6 2259 }
7560fffc 2260
0284adc6
LP
2261 if (o->object.flags & OBJECT_COMPRESSED)
2262 printf("Flags: COMPRESSED\n");
7560fffc 2263
0284adc6
LP
2264 if (p == le64toh(f->header->tail_object_offset))
2265 p = 0;
2266 else
2267 p = p + ALIGN64(le64toh(o->object.size));
2268 }
7560fffc 2269
0284adc6
LP
2270 return;
2271fail:
2272 log_error("File corrupt");
7560fffc
LP
2273}
2274
718fe4b1
ZJS
2275static const char* format_timestamp_safe(char *buf, size_t l, usec_t t) {
2276 const char *x;
2277
2278 x = format_timestamp(buf, l, t);
2279 if (x)
2280 return x;
2281 return " --- ";
2282}
2283
0284adc6 2284void journal_file_print_header(JournalFile *f) {
2765b7bb 2285 char a[33], b[33], c[33], d[33];
ed375beb 2286 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX], z[FORMAT_TIMESTAMP_MAX];
a1a03e30
LP
2287 struct stat st;
2288 char bytes[FORMAT_BYTES_MAX];
7560fffc
LP
2289
2290 assert(f);
7560fffc 2291
0284adc6
LP
2292 printf("File Path: %s\n"
2293 "File ID: %s\n"
2294 "Machine ID: %s\n"
2295 "Boot ID: %s\n"
2296 "Sequential Number ID: %s\n"
2297 "State: %s\n"
2298 "Compatible Flags:%s%s\n"
2299 "Incompatible Flags:%s%s\n"
507f22bd
ZJS
2300 "Header size: %"PRIu64"\n"
2301 "Arena size: %"PRIu64"\n"
2302 "Data Hash Table Size: %"PRIu64"\n"
2303 "Field Hash Table Size: %"PRIu64"\n"
0284adc6 2304 "Rotate Suggested: %s\n"
507f22bd
ZJS
2305 "Head Sequential Number: %"PRIu64"\n"
2306 "Tail Sequential Number: %"PRIu64"\n"
0284adc6 2307 "Head Realtime Timestamp: %s\n"
3223f44f 2308 "Tail Realtime Timestamp: %s\n"
ed375beb 2309 "Tail Monotonic Timestamp: %s\n"
507f22bd
ZJS
2310 "Objects: %"PRIu64"\n"
2311 "Entry Objects: %"PRIu64"\n",
0284adc6
LP
2312 f->path,
2313 sd_id128_to_string(f->header->file_id, a),
2314 sd_id128_to_string(f->header->machine_id, b),
2315 sd_id128_to_string(f->header->boot_id, c),
2765b7bb 2316 sd_id128_to_string(f->header->seqnum_id, d),
3223f44f
LP
2317 f->header->state == STATE_OFFLINE ? "OFFLINE" :
2318 f->header->state == STATE_ONLINE ? "ONLINE" :
2319 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
8088cbd3
LP
2320 JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
2321 (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
2322 JOURNAL_HEADER_COMPRESSED(f->header) ? " COMPRESSED" : "",
2323 (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
507f22bd
ZJS
2324 le64toh(f->header->header_size),
2325 le64toh(f->header->arena_size),
2326 le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2327 le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
fb0951b0 2328 yes_no(journal_file_rotate_suggested(f, 0)),
507f22bd
ZJS
2329 le64toh(f->header->head_entry_seqnum),
2330 le64toh(f->header->tail_entry_seqnum),
718fe4b1
ZJS
2331 format_timestamp_safe(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
2332 format_timestamp_safe(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
ed375beb 2333 format_timespan(z, sizeof(z), le64toh(f->header->tail_entry_monotonic), USEC_PER_MSEC),
507f22bd
ZJS
2334 le64toh(f->header->n_objects),
2335 le64toh(f->header->n_entries));
7560fffc 2336
0284adc6 2337 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
507f22bd 2338 printf("Data Objects: %"PRIu64"\n"
0284adc6 2339 "Data Hash Table Fill: %.1f%%\n",
507f22bd 2340 le64toh(f->header->n_data),
0284adc6 2341 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
7560fffc 2342
0284adc6 2343 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
507f22bd 2344 printf("Field Objects: %"PRIu64"\n"
0284adc6 2345 "Field Hash Table Fill: %.1f%%\n",
507f22bd 2346 le64toh(f->header->n_fields),
0284adc6 2347 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
3223f44f
LP
2348
2349 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
507f22bd
ZJS
2350 printf("Tag Objects: %"PRIu64"\n",
2351 le64toh(f->header->n_tags));
3223f44f 2352 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
507f22bd
ZJS
2353 printf("Entry Array Objects: %"PRIu64"\n",
2354 le64toh(f->header->n_entry_arrays));
a1a03e30
LP
2355
2356 if (fstat(f->fd, &st) >= 0)
2357 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
7560fffc
LP
2358}
2359
0284adc6
LP
2360int journal_file_open(
2361 const char *fname,
2362 int flags,
2363 mode_t mode,
2364 bool compress,
baed47c3 2365 bool seal,
0284adc6
LP
2366 JournalMetrics *metrics,
2367 MMapCache *mmap_cache,
2368 JournalFile *template,
2369 JournalFile **ret) {
7560fffc 2370
0284adc6
LP
2371 JournalFile *f;
2372 int r;
2373 bool newly_created = false;
7560fffc 2374
0284adc6 2375 assert(fname);
0559d3a5 2376 assert(ret);
7560fffc 2377
0284adc6
LP
2378 if ((flags & O_ACCMODE) != O_RDONLY &&
2379 (flags & O_ACCMODE) != O_RDWR)
2380 return -EINVAL;
7560fffc 2381
a0108012
LP
2382 if (!endswith(fname, ".journal") &&
2383 !endswith(fname, ".journal~"))
0284adc6 2384 return -EINVAL;
7560fffc 2385
0284adc6
LP
2386 f = new0(JournalFile, 1);
2387 if (!f)
2388 return -ENOMEM;
7560fffc 2389
0284adc6
LP
2390 f->fd = -1;
2391 f->mode = mode;
7560fffc 2392
0284adc6
LP
2393 f->flags = flags;
2394 f->prot = prot_from_flags(flags);
2395 f->writable = (flags & O_ACCMODE) != O_RDONLY;
48b61739 2396#ifdef HAVE_XZ
0284adc6 2397 f->compress = compress;
48b61739 2398#endif
49a32d43 2399#ifdef HAVE_GCRYPT
baed47c3 2400 f->seal = seal;
49a32d43 2401#endif
7560fffc 2402
0284adc6
LP
2403 if (mmap_cache)
2404 f->mmap = mmap_cache_ref(mmap_cache);
2405 else {
84168d80 2406 f->mmap = mmap_cache_new();
0284adc6
LP
2407 if (!f->mmap) {
2408 r = -ENOMEM;
2409 goto fail;
2410 }
2411 }
7560fffc 2412
0284adc6
LP
2413 f->path = strdup(fname);
2414 if (!f->path) {
2415 r = -ENOMEM;
2416 goto fail;
2417 }
7560fffc 2418
a4bcff5b
LP
2419 f->chain_cache = hashmap_new(uint64_hash_func, uint64_compare_func);
2420 if (!f->chain_cache) {
2421 r = -ENOMEM;
2422 goto fail;
2423 }
2424
0284adc6
LP
2425 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2426 if (f->fd < 0) {
2427 r = -errno;
2428 goto fail;
7560fffc 2429 }
7560fffc 2430
0284adc6
LP
2431 if (fstat(f->fd, &f->last_stat) < 0) {
2432 r = -errno;
2433 goto fail;
2434 }
7560fffc 2435
0284adc6 2436 if (f->last_stat.st_size == 0 && f->writable) {
fb0951b0
LP
2437#ifdef HAVE_XATTR
2438 uint64_t crtime;
2439
2440 /* Let's attach the creation time to the journal file,
2441 * so that the vacuuming code knows the age of this
2442 * file even if the file might end up corrupted one
2443 * day... Ideally we'd just use the creation time many
2444 * file systems maintain for each file, but there is
2445 * currently no usable API to query this, hence let's
2446 * emulate this via extended attributes. If extended
2447 * attributes are not supported we'll just skip this,
2448 * and rely solely on mtime/atime/ctime of the file.*/
2449
2450 crtime = htole64((uint64_t) now(CLOCK_REALTIME));
2451 fsetxattr(f->fd, "user.crtime_usec", &crtime, sizeof(crtime), XATTR_CREATE);
2452#endif
7560fffc 2453
feb12d3e 2454#ifdef HAVE_GCRYPT
0284adc6 2455 /* Try to load the FSPRG state, and if we can't, then
baed47c3 2456 * just don't do sealing */
49a32d43
LP
2457 if (f->seal) {
2458 r = journal_file_fss_load(f);
2459 if (r < 0)
2460 f->seal = false;
2461 }
feb12d3e 2462#endif
7560fffc 2463
0284adc6
LP
2464 r = journal_file_init_header(f, template);
2465 if (r < 0)
2466 goto fail;
7560fffc 2467
0284adc6
LP
2468 if (fstat(f->fd, &f->last_stat) < 0) {
2469 r = -errno;
2470 goto fail;
2471 }
fb0951b0
LP
2472
2473 newly_created = true;
0284adc6 2474 }
7560fffc 2475
0284adc6
LP
2476 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2477 r = -EIO;
2478 goto fail;
2479 }
7560fffc 2480
0284adc6
LP
2481 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2482 if (f->header == MAP_FAILED) {
2483 f->header = NULL;
2484 r = -errno;
2485 goto fail;
2486 }
7560fffc 2487
0284adc6
LP
2488 if (!newly_created) {
2489 r = journal_file_verify_header(f);
2490 if (r < 0)
2491 goto fail;
2492 }
7560fffc 2493
feb12d3e 2494#ifdef HAVE_GCRYPT
0284adc6 2495 if (!newly_created && f->writable) {
baed47c3 2496 r = journal_file_fss_load(f);
0284adc6
LP
2497 if (r < 0)
2498 goto fail;
2499 }
feb12d3e 2500#endif
cec736d2
LP
2501
2502 if (f->writable) {
4a92baf3
LP
2503 if (metrics) {
2504 journal_default_metrics(metrics, f->fd);
2505 f->metrics = *metrics;
2506 } else if (template)
2507 f->metrics = template->metrics;
2508
cec736d2
LP
2509 r = journal_file_refresh_header(f);
2510 if (r < 0)
2511 goto fail;
2512 }
2513
feb12d3e 2514#ifdef HAVE_GCRYPT
baed47c3 2515 r = journal_file_hmac_setup(f);
14d10188
LP
2516 if (r < 0)
2517 goto fail;
feb12d3e 2518#endif
14d10188 2519
cec736d2 2520 if (newly_created) {
de190aef 2521 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
2522 if (r < 0)
2523 goto fail;
2524
de190aef 2525 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
2526 if (r < 0)
2527 goto fail;
7560fffc 2528
feb12d3e 2529#ifdef HAVE_GCRYPT
7560fffc
LP
2530 r = journal_file_append_first_tag(f);
2531 if (r < 0)
2532 goto fail;
feb12d3e 2533#endif
cec736d2
LP
2534 }
2535
de190aef 2536 r = journal_file_map_field_hash_table(f);
cec736d2
LP
2537 if (r < 0)
2538 goto fail;
2539
de190aef 2540 r = journal_file_map_data_hash_table(f);
cec736d2
LP
2541 if (r < 0)
2542 goto fail;
2543
0559d3a5 2544 *ret = f;
cec736d2
LP
2545 return 0;
2546
2547fail:
2548 journal_file_close(f);
2549
2550 return r;
2551}
0ac38b70 2552
baed47c3 2553int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
57535f47 2554 _cleanup_free_ char *p = NULL;
0ac38b70
LP
2555 size_t l;
2556 JournalFile *old_file, *new_file = NULL;
2557 int r;
2558
2559 assert(f);
2560 assert(*f);
2561
2562 old_file = *f;
2563
2564 if (!old_file->writable)
2565 return -EINVAL;
2566
2567 if (!endswith(old_file->path, ".journal"))
2568 return -EINVAL;
2569
2570 l = strlen(old_file->path);
57535f47
ZJS
2571 r = asprintf(&p, "%.*s@" SD_ID128_FORMAT_STR "-%016"PRIx64"-%016"PRIx64".journal",
2572 (int) l - 8, old_file->path,
2573 SD_ID128_FORMAT_VAL(old_file->header->seqnum_id),
2574 le64toh((*f)->header->head_entry_seqnum),
2575 le64toh((*f)->header->head_entry_realtime));
2576 if (r < 0)
0ac38b70
LP
2577 return -ENOMEM;
2578
0ac38b70 2579 r = rename(old_file->path, p);
0ac38b70
LP
2580 if (r < 0)
2581 return -errno;
2582
ccdbaf91 2583 old_file->header->state = STATE_ARCHIVED;
0ac38b70 2584
baed47c3 2585 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
0ac38b70
LP
2586 journal_file_close(old_file);
2587
2588 *f = new_file;
2589 return r;
2590}
2591
9447a7f1
LP
2592int journal_file_open_reliably(
2593 const char *fname,
2594 int flags,
2595 mode_t mode,
7560fffc 2596 bool compress,
baed47c3 2597 bool seal,
4a92baf3 2598 JournalMetrics *metrics,
27370278 2599 MMapCache *mmap_cache,
9447a7f1
LP
2600 JournalFile *template,
2601 JournalFile **ret) {
2602
2603 int r;
2604 size_t l;
ed375beb 2605 _cleanup_free_ char *p = NULL;
9447a7f1 2606
baed47c3 2607 r = journal_file_open(fname, flags, mode, compress, seal,
27370278 2608 metrics, mmap_cache, template, ret);
0071d9f1
LP
2609 if (r != -EBADMSG && /* corrupted */
2610 r != -ENODATA && /* truncated */
2611 r != -EHOSTDOWN && /* other machine */
a1a1898f
LP
2612 r != -EPROTONOSUPPORT && /* incompatible feature */
2613 r != -EBUSY && /* unclean shutdown */
2614 r != -ESHUTDOWN /* already archived */)
9447a7f1
LP
2615 return r;
2616
2617 if ((flags & O_ACCMODE) == O_RDONLY)
2618 return r;
2619
2620 if (!(flags & O_CREAT))
2621 return r;
2622
7560fffc
LP
2623 if (!endswith(fname, ".journal"))
2624 return r;
2625
5c70eab4
LP
2626 /* The file is corrupted. Rotate it away and try it again (but only once) */
2627
9447a7f1
LP
2628 l = strlen(fname);
2629 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
57535f47 2630 (int) l - 8, fname,
9447a7f1
LP
2631 (unsigned long long) now(CLOCK_REALTIME),
2632 random_ull()) < 0)
2633 return -ENOMEM;
2634
2635 r = rename(fname, p);
9447a7f1
LP
2636 if (r < 0)
2637 return -errno;
2638
a1a1898f 2639 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
9447a7f1 2640
baed47c3 2641 return journal_file_open(fname, flags, mode, compress, seal,
27370278 2642 metrics, mmap_cache, template, ret);
9447a7f1
LP
2643}
2644
cf244689
LP
2645int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2646 uint64_t i, n;
2647 uint64_t q, xor_hash = 0;
2648 int r;
2649 EntryItem *items;
2650 dual_timestamp ts;
2651
2652 assert(from);
2653 assert(to);
2654 assert(o);
2655 assert(p);
2656
2657 if (!to->writable)
2658 return -EPERM;
2659
2660 ts.monotonic = le64toh(o->entry.monotonic);
2661 ts.realtime = le64toh(o->entry.realtime);
2662
2663 if (to->tail_entry_monotonic_valid &&
2664 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2665 return -EINVAL;
2666
cf244689
LP
2667 n = journal_file_entry_n_items(o);
2668 items = alloca(sizeof(EntryItem) * n);
2669
2670 for (i = 0; i < n; i++) {
4fd052ae
FC
2671 uint64_t l, h;
2672 le64_t le_hash;
cf244689
LP
2673 size_t t;
2674 void *data;
2675 Object *u;
2676
2677 q = le64toh(o->entry.items[i].object_offset);
2678 le_hash = o->entry.items[i].hash;
2679
2680 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2681 if (r < 0)
2682 return r;
2683
2684 if (le_hash != o->data.hash)
2685 return -EBADMSG;
2686
2687 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2688 t = (size_t) l;
2689
2690 /* We hit the limit on 32bit machines */
2691 if ((uint64_t) t != l)
2692 return -E2BIG;
2693
2694 if (o->object.flags & OBJECT_COMPRESSED) {
2695#ifdef HAVE_XZ
2696 uint64_t rsize;
2697
93b73b06 2698 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize, 0))
cf244689
LP
2699 return -EBADMSG;
2700
2701 data = from->compress_buffer;
2702 l = rsize;
2703#else
2704 return -EPROTONOSUPPORT;
2705#endif
2706 } else
2707 data = o->data.payload;
2708
2709 r = journal_file_append_data(to, data, l, &u, &h);
2710 if (r < 0)
2711 return r;
2712
2713 xor_hash ^= le64toh(u->data.hash);
2714 items[i].object_offset = htole64(h);
2715 items[i].hash = u->data.hash;
2716
2717 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2718 if (r < 0)
2719 return r;
2720 }
2721
2722 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2723}
babfc091
LP
2724
2725void journal_default_metrics(JournalMetrics *m, int fd) {
2726 uint64_t fs_size = 0;
2727 struct statvfs ss;
a7bc2c2a 2728 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2729
2730 assert(m);
2731 assert(fd >= 0);
2732
2733 if (fstatvfs(fd, &ss) >= 0)
2734 fs_size = ss.f_frsize * ss.f_blocks;
2735
2736 if (m->max_use == (uint64_t) -1) {
2737
2738 if (fs_size > 0) {
2739 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2740
2741 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2742 m->max_use = DEFAULT_MAX_USE_UPPER;
2743
2744 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2745 m->max_use = DEFAULT_MAX_USE_LOWER;
2746 } else
2747 m->max_use = DEFAULT_MAX_USE_LOWER;
2748 } else {
2749 m->max_use = PAGE_ALIGN(m->max_use);
2750
2751 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2752 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2753 }
2754
2755 if (m->max_size == (uint64_t) -1) {
2756 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2757
2758 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2759 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2760 } else
2761 m->max_size = PAGE_ALIGN(m->max_size);
2762
2763 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2764 m->max_size = JOURNAL_FILE_SIZE_MIN;
2765
2766 if (m->max_size*2 > m->max_use)
2767 m->max_use = m->max_size*2;
2768
2769 if (m->min_size == (uint64_t) -1)
2770 m->min_size = JOURNAL_FILE_SIZE_MIN;
2771 else {
2772 m->min_size = PAGE_ALIGN(m->min_size);
2773
2774 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2775 m->min_size = JOURNAL_FILE_SIZE_MIN;
2776
2777 if (m->min_size > m->max_size)
2778 m->max_size = m->min_size;
2779 }
2780
2781 if (m->keep_free == (uint64_t) -1) {
2782
2783 if (fs_size > 0) {
8621b110 2784 m->keep_free = PAGE_ALIGN(fs_size * 3 / 20); /* 15% of file system size */
babfc091
LP
2785
2786 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2787 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2788
2789 } else
2790 m->keep_free = DEFAULT_KEEP_FREE;
2791 }
2792
2b43f939
LP
2793 log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2794 format_bytes(a, sizeof(a), m->max_use),
2795 format_bytes(b, sizeof(b), m->max_size),
2796 format_bytes(c, sizeof(c), m->min_size),
2797 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2798}
08984293
LP
2799
2800int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
08984293
LP
2801 assert(f);
2802 assert(from || to);
2803
2804 if (from) {
162566a4
LP
2805 if (f->header->head_entry_realtime == 0)
2806 return -ENOENT;
08984293 2807
162566a4 2808 *from = le64toh(f->header->head_entry_realtime);
08984293
LP
2809 }
2810
2811 if (to) {
162566a4
LP
2812 if (f->header->tail_entry_realtime == 0)
2813 return -ENOENT;
08984293 2814
162566a4 2815 *to = le64toh(f->header->tail_entry_realtime);
08984293
LP
2816 }
2817
2818 return 1;
2819}
2820
2821int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
08984293
LP
2822 Object *o;
2823 uint64_t p;
2824 int r;
2825
2826 assert(f);
2827 assert(from || to);
2828
47838ab3 2829 r = find_data_object_by_boot_id(f, boot_id, &o, &p);
08984293
LP
2830 if (r <= 0)
2831 return r;
2832
2833 if (le64toh(o->data.n_entries) <= 0)
2834 return 0;
2835
2836 if (from) {
2837 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2838 if (r < 0)
2839 return r;
2840
2841 *from = le64toh(o->entry.monotonic);
2842 }
2843
2844 if (to) {
2845 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2846 if (r < 0)
2847 return r;
2848
2849 r = generic_array_get_plus_one(f,
2850 le64toh(o->data.entry_offset),
2851 le64toh(o->data.entry_array_offset),
2852 le64toh(o->data.n_entries)-1,
2853 &o, NULL);
2854 if (r <= 0)
2855 return r;
2856
2857 *to = le64toh(o->entry.monotonic);
2858 }
2859
2860 return 1;
2861}
dca6219e 2862
fb0951b0 2863bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
dca6219e
LP
2864 assert(f);
2865
2866 /* If we gained new header fields we gained new features,
2867 * hence suggest a rotation */
361f9cbc
LP
2868 if (le64toh(f->header->header_size) < sizeof(Header)) {
2869 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
dca6219e 2870 return true;
361f9cbc 2871 }
dca6219e
LP
2872
2873 /* Let's check if the hash tables grew over a certain fill
2874 * level (75%, borrowing this value from Java's hash table
2875 * implementation), and if so suggest a rotation. To calculate
2876 * the fill level we need the n_data field, which only exists
2877 * in newer versions. */
2878
2879 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
361f9cbc 2880 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
507f22bd 2881 log_debug("Data hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items, %llu file size, %"PRIu64" bytes per hash table item), suggesting rotation.",
361f9cbc
LP
2882 f->path,
2883 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
507f22bd
ZJS
2884 le64toh(f->header->n_data),
2885 le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2886 (unsigned long long) f->last_stat.st_size,
2887 f->last_stat.st_size / le64toh(f->header->n_data));
dca6219e 2888 return true;
361f9cbc 2889 }
dca6219e
LP
2890
2891 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
361f9cbc 2892 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
507f22bd 2893 log_debug("Field hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items), suggesting rotation.",
361f9cbc
LP
2894 f->path,
2895 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
507f22bd
ZJS
2896 le64toh(f->header->n_fields),
2897 le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
dca6219e 2898 return true;
361f9cbc 2899 }
dca6219e 2900
0598fd4a
LP
2901 /* Are the data objects properly indexed by field objects? */
2902 if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
2903 JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
2904 le64toh(f->header->n_data) > 0 &&
2905 le64toh(f->header->n_fields) == 0)
2906 return true;
2907
fb0951b0
LP
2908 if (max_file_usec > 0) {
2909 usec_t t, h;
2910
2911 h = le64toh(f->header->head_entry_realtime);
2912 t = now(CLOCK_REALTIME);
2913
2914 if (h > 0 && t > h + max_file_usec)
2915 return true;
2916 }
2917
dca6219e
LP
2918 return false;
2919}