]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
Properly check for overflow in offsets
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
fb0951b0
LP
30#ifdef HAVE_XATTR
31#include <attr/xattr.h>
32#endif
33
cec736d2
LP
34#include "journal-def.h"
35#include "journal-file.h"
0284adc6 36#include "journal-authenticate.h"
cec736d2 37#include "lookup3.h"
807e17f0 38#include "compress.h"
7560fffc 39#include "fsprg.h"
cec736d2 40
4a92baf3
LP
41#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
42#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
cec736d2 43
be19b7df 44#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 45
babfc091 46/* This is the minimum journal file size */
253f59df 47#define JOURNAL_FILE_SIZE_MIN (4ULL*1024ULL*1024ULL) /* 4 MiB */
babfc091
LP
48
49/* These are the lower and upper bounds if we deduce the max_use value
50 * from the file system size */
51#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
52#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
53
54/* This is the upper bound if we deduce max_size from max_use */
71100051 55#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
56
57/* This is the upper bound if we deduce the keep_free value from the
58 * file system size */
59#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
60
61/* This is the keep_free value when we can't determine the system
62 * size */
63#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
64
dca6219e
LP
65/* n_data was the first entry we added after the initial file format design */
66#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
cec736d2 67
a4bcff5b
LP
68/* How many entries to keep in the entry array chain cache at max */
69#define CHAIN_CACHE_MAX 20
70
26687bf8
OS
71int journal_file_set_online(JournalFile *f) {
72 assert(f);
73
74 if (!f->writable)
75 return -EPERM;
76
77 if (!(f->fd >= 0 && f->header))
78 return -EINVAL;
79
80 switch(f->header->state) {
81 case STATE_ONLINE:
82 return 0;
83
84 case STATE_OFFLINE:
85 f->header->state = STATE_ONLINE;
86 fsync(f->fd);
87 return 0;
88
89 default:
90 return -EINVAL;
91 }
92}
93
94int journal_file_set_offline(JournalFile *f) {
95 assert(f);
96
97 if (!f->writable)
98 return -EPERM;
99
100 if (!(f->fd >= 0 && f->header))
101 return -EINVAL;
102
103 if (f->header->state != STATE_ONLINE)
104 return 0;
105
106 fsync(f->fd);
107
108 f->header->state = STATE_OFFLINE;
109
110 fsync(f->fd);
111
112 return 0;
113}
114
cec736d2 115void journal_file_close(JournalFile *f) {
de190aef 116 assert(f);
cec736d2 117
feb12d3e 118#ifdef HAVE_GCRYPT
b0af6f41 119 /* Write the final tag */
c586dbf1 120 if (f->seal && f->writable)
b0af6f41 121 journal_file_append_tag(f);
feb12d3e 122#endif
b0af6f41 123
7560fffc 124 /* Sync everything to disk, before we mark the file offline */
16e9f408
LP
125 if (f->mmap && f->fd >= 0)
126 mmap_cache_close_fd(f->mmap, f->fd);
7560fffc 127
26687bf8 128 journal_file_set_offline(f);
cec736d2 129
26687bf8 130 if (f->header)
d384c7a8 131 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
cec736d2 132
0ac38b70
LP
133 if (f->fd >= 0)
134 close_nointr_nofail(f->fd);
135
cec736d2 136 free(f->path);
807e17f0 137
16e9f408
LP
138 if (f->mmap)
139 mmap_cache_unref(f->mmap);
140
a4bcff5b
LP
141 hashmap_free_free(f->chain_cache);
142
807e17f0
LP
143#ifdef HAVE_XZ
144 free(f->compress_buffer);
145#endif
146
7560fffc 147#ifdef HAVE_GCRYPT
baed47c3
LP
148 if (f->fss_file)
149 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
b7c9ae91
LP
150 else if (f->fsprg_state)
151 free(f->fsprg_state);
152
153 free(f->fsprg_seed);
7560fffc
LP
154
155 if (f->hmac)
156 gcry_md_close(f->hmac);
157#endif
158
cec736d2
LP
159 free(f);
160}
161
0ac38b70 162static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
163 Header h;
164 ssize_t k;
165 int r;
166
167 assert(f);
168
169 zero(h);
7560fffc 170 memcpy(h.signature, HEADER_SIGNATURE, 8);
23b0b2b2 171 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2 172
7560fffc
LP
173 h.incompatible_flags =
174 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
175
176 h.compatible_flags =
baed47c3 177 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
7560fffc 178
cec736d2
LP
179 r = sd_id128_randomize(&h.file_id);
180 if (r < 0)
181 return r;
182
0ac38b70
LP
183 if (template) {
184 h.seqnum_id = template->header->seqnum_id;
beec0085 185 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
0ac38b70
LP
186 } else
187 h.seqnum_id = h.file_id;
cec736d2
LP
188
189 k = pwrite(f->fd, &h, sizeof(h), 0);
190 if (k < 0)
191 return -errno;
192
193 if (k != sizeof(h))
194 return -EIO;
195
196 return 0;
197}
198
199static int journal_file_refresh_header(JournalFile *f) {
200 int r;
de190aef 201 sd_id128_t boot_id;
cec736d2
LP
202
203 assert(f);
204
205 r = sd_id128_get_machine(&f->header->machine_id);
206 if (r < 0)
207 return r;
208
de190aef 209 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
210 if (r < 0)
211 return r;
212
de190aef
LP
213 if (sd_id128_equal(boot_id, f->header->boot_id))
214 f->tail_entry_monotonic_valid = true;
215
216 f->header->boot_id = boot_id;
217
26687bf8 218 journal_file_set_online(f);
b788cc23 219
7560fffc
LP
220 /* Sync the online state to disk */
221 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
222 fdatasync(f->fd);
b788cc23 223
cec736d2
LP
224 return 0;
225}
226
227static int journal_file_verify_header(JournalFile *f) {
228 assert(f);
229
7560fffc 230 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
cec736d2
LP
231 return -EBADMSG;
232
7560fffc
LP
233 /* In both read and write mode we refuse to open files with
234 * incompatible flags we don't know */
807e17f0 235#ifdef HAVE_XZ
7560fffc 236 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
807e17f0
LP
237 return -EPROTONOSUPPORT;
238#else
cec736d2
LP
239 if (f->header->incompatible_flags != 0)
240 return -EPROTONOSUPPORT;
807e17f0 241#endif
cec736d2 242
7560fffc
LP
243 /* When open for writing we refuse to open files with
244 * compatible flags, too */
245 if (f->writable) {
246#ifdef HAVE_GCRYPT
baed47c3 247 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
7560fffc
LP
248 return -EPROTONOSUPPORT;
249#else
250 if (f->header->compatible_flags != 0)
251 return -EPROTONOSUPPORT;
252#endif
253 }
254
db11ac1a
LP
255 if (f->header->state >= _STATE_MAX)
256 return -EBADMSG;
257
dca6219e
LP
258 /* The first addition was n_data, so check that we are at least this large */
259 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
23b0b2b2
LP
260 return -EBADMSG;
261
8088cbd3 262 if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
beec0085
LP
263 return -EBADMSG;
264
db11ac1a
LP
265 if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
266 return -ENODATA;
267
268 if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
269 return -ENODATA;
270
7762e02b
LP
271 if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
272 !VALID64(le64toh(f->header->field_hash_table_offset)) ||
273 !VALID64(le64toh(f->header->tail_object_offset)) ||
274 !VALID64(le64toh(f->header->entry_array_offset)))
275 return -ENODATA;
276
277 if (le64toh(f->header->data_hash_table_offset) < le64toh(f->header->header_size) ||
278 le64toh(f->header->field_hash_table_offset) < le64toh(f->header->header_size) ||
279 le64toh(f->header->tail_object_offset) < le64toh(f->header->header_size) ||
280 le64toh(f->header->entry_array_offset) < le64toh(f->header->header_size))
cec736d2
LP
281 return -ENODATA;
282
283 if (f->writable) {
ccdbaf91 284 uint8_t state;
cec736d2
LP
285 sd_id128_t machine_id;
286 int r;
287
288 r = sd_id128_get_machine(&machine_id);
289 if (r < 0)
290 return r;
291
292 if (!sd_id128_equal(machine_id, f->header->machine_id))
293 return -EHOSTDOWN;
294
de190aef 295 state = f->header->state;
cec736d2 296
71fa6f00
LP
297 if (state == STATE_ONLINE) {
298 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
299 return -EBUSY;
300 } else if (state == STATE_ARCHIVED)
cec736d2 301 return -ESHUTDOWN;
71fa6f00
LP
302 else if (state != STATE_OFFLINE) {
303 log_debug("Journal file %s has unknown state %u.", f->path, state);
304 return -EBUSY;
305 }
cec736d2
LP
306 }
307
8088cbd3 308 f->compress = JOURNAL_HEADER_COMPRESSED(f->header);
c586dbf1 309
f1889c91 310 f->seal = JOURNAL_HEADER_SEALED(f->header);
7560fffc 311
cec736d2
LP
312 return 0;
313}
314
315static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
cec736d2 316 uint64_t old_size, new_size;
fec2aa2f 317 int r;
cec736d2
LP
318
319 assert(f);
320
cec736d2 321 /* We assume that this file is not sparse, and we know that
38ac38b2 322 * for sure, since we always call posix_fallocate()
cec736d2
LP
323 * ourselves */
324
325 old_size =
23b0b2b2 326 le64toh(f->header->header_size) +
cec736d2
LP
327 le64toh(f->header->arena_size);
328
bc85bfee 329 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
330 if (new_size < le64toh(f->header->header_size))
331 new_size = le64toh(f->header->header_size);
bc85bfee
LP
332
333 if (new_size <= old_size)
cec736d2
LP
334 return 0;
335
bc85bfee
LP
336 if (f->metrics.max_size > 0 &&
337 new_size > f->metrics.max_size)
338 return -E2BIG;
cec736d2 339
bc85bfee
LP
340 if (new_size > f->metrics.min_size &&
341 f->metrics.keep_free > 0) {
cec736d2
LP
342 struct statvfs svfs;
343
344 if (fstatvfs(f->fd, &svfs) >= 0) {
345 uint64_t available;
346
347 available = svfs.f_bfree * svfs.f_bsize;
348
bc85bfee
LP
349 if (available >= f->metrics.keep_free)
350 available -= f->metrics.keep_free;
cec736d2
LP
351 else
352 available = 0;
353
354 if (new_size - old_size > available)
355 return -E2BIG;
356 }
357 }
358
bc85bfee
LP
359 /* Note that the glibc fallocate() fallback is very
360 inefficient, hence we try to minimize the allocation area
361 as we can. */
fec2aa2f
GV
362 r = posix_fallocate(f->fd, old_size, new_size - old_size);
363 if (r != 0)
364 return -r;
cec736d2
LP
365
366 if (fstat(f->fd, &f->last_stat) < 0)
367 return -errno;
368
23b0b2b2 369 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
370
371 return 0;
372}
373
fcde2389 374static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
cec736d2 375 assert(f);
cec736d2
LP
376 assert(ret);
377
7762e02b
LP
378 if (size <= 0)
379 return -EINVAL;
380
2a59ea54 381 /* Avoid SIGBUS on invalid accesses */
4bbdcdb3
LP
382 if (offset + size > (uint64_t) f->last_stat.st_size) {
383 /* Hmm, out of range? Let's refresh the fstat() data
384 * first, before we trust that check. */
385
386 if (fstat(f->fd, &f->last_stat) < 0 ||
387 offset + size > (uint64_t) f->last_stat.st_size)
388 return -EADDRNOTAVAIL;
389 }
390
fcde2389 391 return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
cec736d2
LP
392}
393
16e9f408
LP
394static uint64_t minimum_header_size(Object *o) {
395
396 static uint64_t table[] = {
397 [OBJECT_DATA] = sizeof(DataObject),
398 [OBJECT_FIELD] = sizeof(FieldObject),
399 [OBJECT_ENTRY] = sizeof(EntryObject),
400 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
401 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
402 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
403 [OBJECT_TAG] = sizeof(TagObject),
404 };
405
406 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
407 return sizeof(ObjectHeader);
408
409 return table[o->object.type];
410}
411
de190aef 412int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
413 int r;
414 void *t;
415 Object *o;
416 uint64_t s;
16e9f408 417 unsigned context;
cec736d2
LP
418
419 assert(f);
420 assert(ret);
421
db11ac1a
LP
422 /* Objects may only be located at multiple of 64 bit */
423 if (!VALID64(offset))
424 return -EFAULT;
425
16e9f408
LP
426 /* One context for each type, plus one catch-all for the rest */
427 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
428
fcde2389 429 r = journal_file_move_to(f, context, false, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
430 if (r < 0)
431 return r;
432
433 o = (Object*) t;
434 s = le64toh(o->object.size);
435
436 if (s < sizeof(ObjectHeader))
437 return -EBADMSG;
438
16e9f408
LP
439 if (o->object.type <= OBJECT_UNUSED)
440 return -EBADMSG;
441
442 if (s < minimum_header_size(o))
443 return -EBADMSG;
444
3c1668da 445 if (type > 0 && o->object.type != type)
cec736d2
LP
446 return -EBADMSG;
447
448 if (s > sizeof(ObjectHeader)) {
fcde2389 449 r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
cec736d2
LP
450 if (r < 0)
451 return r;
452
453 o = (Object*) t;
454 }
455
cec736d2
LP
456 *ret = o;
457 return 0;
458}
459
d98cc1f2 460static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
461 uint64_t r;
462
463 assert(f);
464
beec0085 465 r = le64toh(f->header->tail_entry_seqnum) + 1;
c2373f84
LP
466
467 if (seqnum) {
de190aef 468 /* If an external seqnum counter was passed, we update
c2373f84
LP
469 * both the local and the external one, and set it to
470 * the maximum of both */
471
472 if (*seqnum + 1 > r)
473 r = *seqnum + 1;
474
475 *seqnum = r;
476 }
477
beec0085 478 f->header->tail_entry_seqnum = htole64(r);
cec736d2 479
beec0085
LP
480 if (f->header->head_entry_seqnum == 0)
481 f->header->head_entry_seqnum = htole64(r);
de190aef 482
cec736d2
LP
483 return r;
484}
485
0284adc6 486int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
487 int r;
488 uint64_t p;
489 Object *tail, *o;
490 void *t;
491
492 assert(f);
16e9f408 493 assert(type > 0 && type < _OBJECT_TYPE_MAX);
cec736d2
LP
494 assert(size >= sizeof(ObjectHeader));
495 assert(offset);
496 assert(ret);
497
26687bf8
OS
498 r = journal_file_set_online(f);
499 if (r < 0)
500 return r;
501
cec736d2 502 p = le64toh(f->header->tail_object_offset);
cec736d2 503 if (p == 0)
23b0b2b2 504 p = le64toh(f->header->header_size);
cec736d2 505 else {
de190aef 506 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
507 if (r < 0)
508 return r;
509
510 p += ALIGN64(le64toh(tail->object.size));
511 }
512
513 r = journal_file_allocate(f, p, size);
514 if (r < 0)
515 return r;
516
fcde2389 517 r = journal_file_move_to(f, type, false, p, size, &t);
cec736d2
LP
518 if (r < 0)
519 return r;
520
521 o = (Object*) t;
522
523 zero(o->object);
de190aef 524 o->object.type = type;
cec736d2
LP
525 o->object.size = htole64(size);
526
527 f->header->tail_object_offset = htole64(p);
cec736d2
LP
528 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
529
530 *ret = o;
531 *offset = p;
532
533 return 0;
534}
535
de190aef 536static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
537 uint64_t s, p;
538 Object *o;
539 int r;
540
541 assert(f);
542
dfabe643 543 /* We estimate that we need 1 hash table entry per 768 of
4a92baf3
LP
544 journal file and we want to make sure we never get beyond
545 75% fill level. Calculate the hash table size for the
546 maximum file size based on these metrics. */
547
dfabe643 548 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
4a92baf3
LP
549 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
550 s = DEFAULT_DATA_HASH_TABLE_SIZE;
551
2b43f939 552 log_debug("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
4a92baf3 553
de190aef
LP
554 r = journal_file_append_object(f,
555 OBJECT_DATA_HASH_TABLE,
556 offsetof(Object, hash_table.items) + s,
557 &o, &p);
cec736d2
LP
558 if (r < 0)
559 return r;
560
de190aef 561 memset(o->hash_table.items, 0, s);
cec736d2 562
de190aef
LP
563 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
564 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
565
566 return 0;
567}
568
de190aef 569static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
570 uint64_t s, p;
571 Object *o;
572 int r;
573
574 assert(f);
575
3c1668da
LP
576 /* We use a fixed size hash table for the fields as this
577 * number should grow very slowly only */
578
de190aef
LP
579 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
580 r = journal_file_append_object(f,
581 OBJECT_FIELD_HASH_TABLE,
582 offsetof(Object, hash_table.items) + s,
583 &o, &p);
cec736d2
LP
584 if (r < 0)
585 return r;
586
de190aef 587 memset(o->hash_table.items, 0, s);
cec736d2 588
de190aef
LP
589 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
590 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
591
592 return 0;
593}
594
de190aef 595static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
596 uint64_t s, p;
597 void *t;
598 int r;
599
600 assert(f);
601
de190aef
LP
602 p = le64toh(f->header->data_hash_table_offset);
603 s = le64toh(f->header->data_hash_table_size);
cec736d2 604
de190aef 605 r = journal_file_move_to(f,
16e9f408 606 OBJECT_DATA_HASH_TABLE,
fcde2389 607 true,
de190aef
LP
608 p, s,
609 &t);
cec736d2
LP
610 if (r < 0)
611 return r;
612
de190aef 613 f->data_hash_table = t;
cec736d2
LP
614 return 0;
615}
616
de190aef 617static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
618 uint64_t s, p;
619 void *t;
620 int r;
621
622 assert(f);
623
de190aef
LP
624 p = le64toh(f->header->field_hash_table_offset);
625 s = le64toh(f->header->field_hash_table_size);
cec736d2 626
de190aef 627 r = journal_file_move_to(f,
16e9f408 628 OBJECT_FIELD_HASH_TABLE,
fcde2389 629 true,
de190aef
LP
630 p, s,
631 &t);
cec736d2
LP
632 if (r < 0)
633 return r;
634
de190aef 635 f->field_hash_table = t;
cec736d2
LP
636 return 0;
637}
638
3c1668da
LP
639static int journal_file_link_field(
640 JournalFile *f,
641 Object *o,
642 uint64_t offset,
643 uint64_t hash) {
644
645 uint64_t p, h;
646 int r;
647
648 assert(f);
649 assert(o);
650 assert(offset > 0);
651
652 if (o->object.type != OBJECT_FIELD)
653 return -EINVAL;
654
655 /* This might alter the window we are looking at */
656
657 o->field.next_hash_offset = o->field.head_data_offset = 0;
658
659 h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
660 p = le64toh(f->field_hash_table[h].tail_hash_offset);
661 if (p == 0)
662 f->field_hash_table[h].head_hash_offset = htole64(offset);
663 else {
664 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
665 if (r < 0)
666 return r;
667
668 o->field.next_hash_offset = htole64(offset);
669 }
670
671 f->field_hash_table[h].tail_hash_offset = htole64(offset);
672
673 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
674 f->header->n_fields = htole64(le64toh(f->header->n_fields) + 1);
675
676 return 0;
677}
678
679static int journal_file_link_data(
680 JournalFile *f,
681 Object *o,
682 uint64_t offset,
683 uint64_t hash) {
684
de190aef 685 uint64_t p, h;
cec736d2
LP
686 int r;
687
688 assert(f);
689 assert(o);
690 assert(offset > 0);
b588975f
LP
691
692 if (o->object.type != OBJECT_DATA)
693 return -EINVAL;
cec736d2 694
48496df6
LP
695 /* This might alter the window we are looking at */
696
de190aef
LP
697 o->data.next_hash_offset = o->data.next_field_offset = 0;
698 o->data.entry_offset = o->data.entry_array_offset = 0;
699 o->data.n_entries = 0;
cec736d2 700
de190aef 701 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
8db4213e 702 p = le64toh(f->data_hash_table[h].tail_hash_offset);
3c1668da 703 if (p == 0)
cec736d2 704 /* Only entry in the hash table is easy */
de190aef 705 f->data_hash_table[h].head_hash_offset = htole64(offset);
3c1668da 706 else {
48496df6
LP
707 /* Move back to the previous data object, to patch in
708 * pointer */
cec736d2 709
de190aef 710 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
711 if (r < 0)
712 return r;
713
de190aef 714 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
715 }
716
de190aef 717 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2 718
dca6219e
LP
719 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
720 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
721
cec736d2
LP
722 return 0;
723}
724
3c1668da
LP
725int journal_file_find_field_object_with_hash(
726 JournalFile *f,
727 const void *field, uint64_t size, uint64_t hash,
728 Object **ret, uint64_t *offset) {
729
730 uint64_t p, osize, h;
731 int r;
732
733 assert(f);
734 assert(field && size > 0);
735
736 osize = offsetof(Object, field.payload) + size;
737
738 if (f->header->field_hash_table_size == 0)
739 return -EBADMSG;
740
741 h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
742 p = le64toh(f->field_hash_table[h].head_hash_offset);
743
744 while (p > 0) {
745 Object *o;
746
747 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
748 if (r < 0)
749 return r;
750
751 if (le64toh(o->field.hash) == hash &&
752 le64toh(o->object.size) == osize &&
753 memcmp(o->field.payload, field, size) == 0) {
754
755 if (ret)
756 *ret = o;
757 if (offset)
758 *offset = p;
759
760 return 1;
761 }
762
763 p = le64toh(o->field.next_hash_offset);
764 }
765
766 return 0;
767}
768
769int journal_file_find_field_object(
770 JournalFile *f,
771 const void *field, uint64_t size,
772 Object **ret, uint64_t *offset) {
773
774 uint64_t hash;
775
776 assert(f);
777 assert(field && size > 0);
778
779 hash = hash64(field, size);
780
781 return journal_file_find_field_object_with_hash(f,
782 field, size, hash,
783 ret, offset);
784}
785
de190aef
LP
786int journal_file_find_data_object_with_hash(
787 JournalFile *f,
788 const void *data, uint64_t size, uint64_t hash,
789 Object **ret, uint64_t *offset) {
48496df6 790
de190aef 791 uint64_t p, osize, h;
cec736d2
LP
792 int r;
793
794 assert(f);
795 assert(data || size == 0);
796
797 osize = offsetof(Object, data.payload) + size;
798
bc85bfee
LP
799 if (f->header->data_hash_table_size == 0)
800 return -EBADMSG;
801
de190aef
LP
802 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
803 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 804
de190aef
LP
805 while (p > 0) {
806 Object *o;
cec736d2 807
de190aef 808 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
809 if (r < 0)
810 return r;
811
807e17f0 812 if (le64toh(o->data.hash) != hash)
85a131e8 813 goto next;
807e17f0
LP
814
815 if (o->object.flags & OBJECT_COMPRESSED) {
816#ifdef HAVE_XZ
b785c858 817 uint64_t l, rsize;
cec736d2 818
807e17f0
LP
819 l = le64toh(o->object.size);
820 if (l <= offsetof(Object, data.payload))
cec736d2
LP
821 return -EBADMSG;
822
807e17f0
LP
823 l -= offsetof(Object, data.payload);
824
93b73b06 825 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize, 0))
807e17f0
LP
826 return -EBADMSG;
827
b785c858 828 if (rsize == size &&
807e17f0
LP
829 memcmp(f->compress_buffer, data, size) == 0) {
830
831 if (ret)
832 *ret = o;
833
834 if (offset)
835 *offset = p;
836
837 return 1;
838 }
839#else
840 return -EPROTONOSUPPORT;
841#endif
842
843 } else if (le64toh(o->object.size) == osize &&
844 memcmp(o->data.payload, data, size) == 0) {
845
cec736d2
LP
846 if (ret)
847 *ret = o;
848
849 if (offset)
850 *offset = p;
851
de190aef 852 return 1;
cec736d2
LP
853 }
854
85a131e8 855 next:
cec736d2
LP
856 p = le64toh(o->data.next_hash_offset);
857 }
858
de190aef
LP
859 return 0;
860}
861
862int journal_file_find_data_object(
863 JournalFile *f,
864 const void *data, uint64_t size,
865 Object **ret, uint64_t *offset) {
866
867 uint64_t hash;
868
869 assert(f);
870 assert(data || size == 0);
871
872 hash = hash64(data, size);
873
874 return journal_file_find_data_object_with_hash(f,
875 data, size, hash,
876 ret, offset);
877}
878
3c1668da
LP
879static int journal_file_append_field(
880 JournalFile *f,
881 const void *field, uint64_t size,
882 Object **ret, uint64_t *offset) {
883
884 uint64_t hash, p;
885 uint64_t osize;
886 Object *o;
887 int r;
888
889 assert(f);
890 assert(field && size > 0);
891
892 hash = hash64(field, size);
893
894 r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
895 if (r < 0)
896 return r;
897 else if (r > 0) {
898
899 if (ret)
900 *ret = o;
901
902 if (offset)
903 *offset = p;
904
905 return 0;
906 }
907
908 osize = offsetof(Object, field.payload) + size;
909 r = journal_file_append_object(f, OBJECT_FIELD, osize, &o, &p);
910
911 o->field.hash = htole64(hash);
912 memcpy(o->field.payload, field, size);
913
914 r = journal_file_link_field(f, o, p, hash);
915 if (r < 0)
916 return r;
917
918 /* The linking might have altered the window, so let's
919 * refresh our pointer */
920 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
921 if (r < 0)
922 return r;
923
924#ifdef HAVE_GCRYPT
925 r = journal_file_hmac_put_object(f, OBJECT_FIELD, o, p);
926 if (r < 0)
927 return r;
928#endif
929
930 if (ret)
931 *ret = o;
932
933 if (offset)
934 *offset = p;
935
936 return 0;
937}
938
48496df6
LP
939static int journal_file_append_data(
940 JournalFile *f,
941 const void *data, uint64_t size,
942 Object **ret, uint64_t *offset) {
943
de190aef
LP
944 uint64_t hash, p;
945 uint64_t osize;
946 Object *o;
947 int r;
807e17f0 948 bool compressed = false;
3c1668da 949 const void *eq;
de190aef
LP
950
951 assert(f);
952 assert(data || size == 0);
953
954 hash = hash64(data, size);
955
956 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
957 if (r < 0)
958 return r;
959 else if (r > 0) {
960
961 if (ret)
962 *ret = o;
963
964 if (offset)
965 *offset = p;
966
967 return 0;
968 }
969
970 osize = offsetof(Object, data.payload) + size;
971 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
972 if (r < 0)
973 return r;
974
cec736d2 975 o->data.hash = htole64(hash);
807e17f0
LP
976
977#ifdef HAVE_XZ
978 if (f->compress &&
979 size >= COMPRESSION_SIZE_THRESHOLD) {
980 uint64_t rsize;
981
982 compressed = compress_blob(data, size, o->data.payload, &rsize);
983
984 if (compressed) {
985 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
986 o->object.flags |= OBJECT_COMPRESSED;
987
807e17f0
LP
988 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
989 }
990 }
991#endif
992
64825d3c 993 if (!compressed && size > 0)
807e17f0 994 memcpy(o->data.payload, data, size);
cec736d2 995
de190aef 996 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
997 if (r < 0)
998 return r;
999
48496df6
LP
1000 /* The linking might have altered the window, so let's
1001 * refresh our pointer */
1002 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1003 if (r < 0)
1004 return r;
1005
3c1668da
LP
1006 eq = memchr(data, '=', size);
1007 if (eq && eq > data) {
1008 uint64_t fp;
1009 Object *fo;
1010
1011 /* Create field object ... */
1012 r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp);
1013 if (r < 0)
1014 return r;
1015
1016 /* ... and link it in. */
1017 o->data.next_field_offset = fo->field.head_data_offset;
1018 fo->field.head_data_offset = le64toh(p);
1019 }
1020
5996c7c2
LP
1021#ifdef HAVE_GCRYPT
1022 r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
1023 if (r < 0)
1024 return r;
1025#endif
1026
cec736d2
LP
1027 if (ret)
1028 *ret = o;
1029
1030 if (offset)
de190aef 1031 *offset = p;
cec736d2
LP
1032
1033 return 0;
1034}
1035
1036uint64_t journal_file_entry_n_items(Object *o) {
1037 assert(o);
b588975f
LP
1038
1039 if (o->object.type != OBJECT_ENTRY)
1040 return 0;
cec736d2
LP
1041
1042 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
1043}
1044
0284adc6 1045uint64_t journal_file_entry_array_n_items(Object *o) {
de190aef 1046 assert(o);
b588975f
LP
1047
1048 if (o->object.type != OBJECT_ENTRY_ARRAY)
1049 return 0;
de190aef
LP
1050
1051 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
1052}
1053
fb9a24b6
LP
1054uint64_t journal_file_hash_table_n_items(Object *o) {
1055 assert(o);
b588975f
LP
1056
1057 if (o->object.type != OBJECT_DATA_HASH_TABLE &&
1058 o->object.type != OBJECT_FIELD_HASH_TABLE)
1059 return 0;
fb9a24b6
LP
1060
1061 return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
1062}
1063
de190aef 1064static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
1065 le64_t *first,
1066 le64_t *idx,
de190aef 1067 uint64_t p) {
cec736d2 1068 int r;
de190aef
LP
1069 uint64_t n = 0, ap = 0, q, i, a, hidx;
1070 Object *o;
1071
cec736d2 1072 assert(f);
de190aef
LP
1073 assert(first);
1074 assert(idx);
1075 assert(p > 0);
cec736d2 1076
de190aef
LP
1077 a = le64toh(*first);
1078 i = hidx = le64toh(*idx);
1079 while (a > 0) {
1080
1081 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1082 if (r < 0)
1083 return r;
cec736d2 1084
de190aef
LP
1085 n = journal_file_entry_array_n_items(o);
1086 if (i < n) {
1087 o->entry_array.items[i] = htole64(p);
1088 *idx = htole64(hidx + 1);
1089 return 0;
1090 }
cec736d2 1091
de190aef
LP
1092 i -= n;
1093 ap = a;
1094 a = le64toh(o->entry_array.next_entry_array_offset);
1095 }
1096
1097 if (hidx > n)
1098 n = (hidx+1) * 2;
1099 else
1100 n = n * 2;
1101
1102 if (n < 4)
1103 n = 4;
1104
1105 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
1106 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
1107 &o, &q);
cec736d2
LP
1108 if (r < 0)
1109 return r;
1110
feb12d3e 1111#ifdef HAVE_GCRYPT
5996c7c2 1112 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
b0af6f41
LP
1113 if (r < 0)
1114 return r;
feb12d3e 1115#endif
b0af6f41 1116
de190aef 1117 o->entry_array.items[i] = htole64(p);
cec736d2 1118
de190aef 1119 if (ap == 0)
7be3aa17 1120 *first = htole64(q);
cec736d2 1121 else {
de190aef 1122 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
1123 if (r < 0)
1124 return r;
1125
de190aef
LP
1126 o->entry_array.next_entry_array_offset = htole64(q);
1127 }
cec736d2 1128
2dee23eb
LP
1129 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1130 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
1131
de190aef
LP
1132 *idx = htole64(hidx + 1);
1133
1134 return 0;
1135}
cec736d2 1136
de190aef 1137static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
1138 le64_t *extra,
1139 le64_t *first,
1140 le64_t *idx,
de190aef
LP
1141 uint64_t p) {
1142
1143 int r;
1144
1145 assert(f);
1146 assert(extra);
1147 assert(first);
1148 assert(idx);
1149 assert(p > 0);
1150
1151 if (*idx == 0)
1152 *extra = htole64(p);
1153 else {
4fd052ae 1154 le64_t i;
de190aef 1155
7be3aa17 1156 i = htole64(le64toh(*idx) - 1);
de190aef
LP
1157 r = link_entry_into_array(f, first, &i, p);
1158 if (r < 0)
1159 return r;
cec736d2
LP
1160 }
1161
de190aef
LP
1162 *idx = htole64(le64toh(*idx) + 1);
1163 return 0;
1164}
1165
1166static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
1167 uint64_t p;
1168 int r;
1169 assert(f);
1170 assert(o);
1171 assert(offset > 0);
1172
1173 p = le64toh(o->entry.items[i].object_offset);
1174 if (p == 0)
1175 return -EINVAL;
1176
1177 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
1178 if (r < 0)
1179 return r;
1180
de190aef
LP
1181 return link_entry_into_array_plus_one(f,
1182 &o->data.entry_offset,
1183 &o->data.entry_array_offset,
1184 &o->data.n_entries,
1185 offset);
cec736d2
LP
1186}
1187
1188static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 1189 uint64_t n, i;
cec736d2
LP
1190 int r;
1191
1192 assert(f);
1193 assert(o);
1194 assert(offset > 0);
b588975f
LP
1195
1196 if (o->object.type != OBJECT_ENTRY)
1197 return -EINVAL;
cec736d2 1198
b788cc23
LP
1199 __sync_synchronize();
1200
cec736d2 1201 /* Link up the entry itself */
de190aef
LP
1202 r = link_entry_into_array(f,
1203 &f->header->entry_array_offset,
1204 &f->header->n_entries,
1205 offset);
1206 if (r < 0)
1207 return r;
cec736d2 1208
aaf53376 1209 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
cec736d2 1210
de190aef 1211 if (f->header->head_entry_realtime == 0)
0ac38b70 1212 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 1213
0ac38b70 1214 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
1215 f->header->tail_entry_monotonic = o->entry.monotonic;
1216
1217 f->tail_entry_monotonic_valid = true;
cec736d2
LP
1218
1219 /* Link up the items */
1220 n = journal_file_entry_n_items(o);
1221 for (i = 0; i < n; i++) {
1222 r = journal_file_link_entry_item(f, o, offset, i);
1223 if (r < 0)
1224 return r;
1225 }
1226
cec736d2
LP
1227 return 0;
1228}
1229
1230static int journal_file_append_entry_internal(
1231 JournalFile *f,
1232 const dual_timestamp *ts,
1233 uint64_t xor_hash,
1234 const EntryItem items[], unsigned n_items,
de190aef 1235 uint64_t *seqnum,
cec736d2
LP
1236 Object **ret, uint64_t *offset) {
1237 uint64_t np;
1238 uint64_t osize;
1239 Object *o;
1240 int r;
1241
1242 assert(f);
1243 assert(items || n_items == 0);
de190aef 1244 assert(ts);
cec736d2
LP
1245
1246 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1247
de190aef 1248 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
1249 if (r < 0)
1250 return r;
1251
d98cc1f2 1252 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
cec736d2 1253 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
1254 o->entry.realtime = htole64(ts->realtime);
1255 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
1256 o->entry.xor_hash = htole64(xor_hash);
1257 o->entry.boot_id = f->header->boot_id;
1258
feb12d3e 1259#ifdef HAVE_GCRYPT
5996c7c2 1260 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
b0af6f41
LP
1261 if (r < 0)
1262 return r;
feb12d3e 1263#endif
b0af6f41 1264
cec736d2
LP
1265 r = journal_file_link_entry(f, o, np);
1266 if (r < 0)
1267 return r;
1268
1269 if (ret)
1270 *ret = o;
1271
1272 if (offset)
1273 *offset = np;
1274
1275 return 0;
1276}
1277
cf244689 1278void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
1279 assert(f);
1280
1281 /* inotify() does not receive IN_MODIFY events from file
1282 * accesses done via mmap(). After each access we hence
1283 * trigger IN_MODIFY by truncating the journal file to its
1284 * current size which triggers IN_MODIFY. */
1285
bc85bfee
LP
1286 __sync_synchronize();
1287
50f20cfd 1288 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
c5315881 1289 log_error("Failed to truncate file to its own size: %m");
50f20cfd
LP
1290}
1291
1f2da9ec
LP
1292static int entry_item_cmp(const void *_a, const void *_b) {
1293 const EntryItem *a = _a, *b = _b;
1294
1295 if (le64toh(a->object_offset) < le64toh(b->object_offset))
1296 return -1;
1297 if (le64toh(a->object_offset) > le64toh(b->object_offset))
1298 return 1;
1299 return 0;
1300}
1301
de190aef 1302int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1303 unsigned i;
1304 EntryItem *items;
1305 int r;
1306 uint64_t xor_hash = 0;
de190aef 1307 struct dual_timestamp _ts;
cec736d2
LP
1308
1309 assert(f);
1310 assert(iovec || n_iovec == 0);
1311
de190aef
LP
1312 if (!ts) {
1313 dual_timestamp_get(&_ts);
1314 ts = &_ts;
1315 }
1316
1317 if (f->tail_entry_monotonic_valid &&
1318 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1319 return -EINVAL;
1320
feb12d3e 1321#ifdef HAVE_GCRYPT
7560fffc
LP
1322 r = journal_file_maybe_append_tag(f, ts->realtime);
1323 if (r < 0)
1324 return r;
feb12d3e 1325#endif
7560fffc 1326
64825d3c 1327 /* alloca() can't take 0, hence let's allocate at least one */
9607d947 1328 items = alloca(sizeof(EntryItem) * MAX(1u, n_iovec));
cec736d2
LP
1329
1330 for (i = 0; i < n_iovec; i++) {
1331 uint64_t p;
1332 Object *o;
1333
1334 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1335 if (r < 0)
cf244689 1336 return r;
cec736d2
LP
1337
1338 xor_hash ^= le64toh(o->data.hash);
1339 items[i].object_offset = htole64(p);
de7b95cd 1340 items[i].hash = o->data.hash;
cec736d2
LP
1341 }
1342
1f2da9ec
LP
1343 /* Order by the position on disk, in order to improve seek
1344 * times for rotating media. */
1345 qsort(items, n_iovec, sizeof(EntryItem), entry_item_cmp);
1346
de190aef 1347 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1348
50f20cfd
LP
1349 journal_file_post_change(f);
1350
cec736d2
LP
1351 return r;
1352}
1353
a4bcff5b
LP
1354typedef struct ChainCacheItem {
1355 uint64_t first; /* the array at the begin of the chain */
1356 uint64_t array; /* the cached array */
1357 uint64_t begin; /* the first item in the cached array */
1358 uint64_t total; /* the total number of items in all arrays before this one in the chain */
1359} ChainCacheItem;
1360
1361static void chain_cache_put(
1362 Hashmap *h,
1363 ChainCacheItem *ci,
1364 uint64_t first,
1365 uint64_t array,
1366 uint64_t begin,
1367 uint64_t total) {
1368
1369 if (!ci) {
34741aa3
LP
1370 /* If the chain item to cache for this chain is the
1371 * first one it's not worth caching anything */
1372 if (array == first)
1373 return;
1374
a4bcff5b
LP
1375 if (hashmap_size(h) >= CHAIN_CACHE_MAX)
1376 ci = hashmap_steal_first(h);
1377 else {
1378 ci = new(ChainCacheItem, 1);
1379 if (!ci)
1380 return;
1381 }
1382
1383 ci->first = first;
1384
1385 if (hashmap_put(h, &ci->first, ci) < 0) {
1386 free(ci);
1387 return;
1388 }
1389 } else
1390 assert(ci->first == first);
1391
1392 ci->array = array;
1393 ci->begin = begin;
1394 ci->total = total;
1395}
1396
de190aef
LP
1397static int generic_array_get(JournalFile *f,
1398 uint64_t first,
1399 uint64_t i,
1400 Object **ret, uint64_t *offset) {
1401
cec736d2 1402 Object *o;
a4bcff5b 1403 uint64_t p = 0, a, t = 0;
cec736d2 1404 int r;
a4bcff5b 1405 ChainCacheItem *ci;
cec736d2
LP
1406
1407 assert(f);
1408
de190aef 1409 a = first;
a4bcff5b
LP
1410
1411 /* Try the chain cache first */
1412 ci = hashmap_get(f->chain_cache, &first);
1413 if (ci && i > ci->total) {
1414 a = ci->array;
1415 i -= ci->total;
1416 t = ci->total;
1417 }
1418
de190aef 1419 while (a > 0) {
a4bcff5b 1420 uint64_t k;
cec736d2 1421
de190aef
LP
1422 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1423 if (r < 0)
1424 return r;
cec736d2 1425
a4bcff5b
LP
1426 k = journal_file_entry_array_n_items(o);
1427 if (i < k) {
de190aef 1428 p = le64toh(o->entry_array.items[i]);
a4bcff5b 1429 goto found;
cec736d2
LP
1430 }
1431
a4bcff5b
LP
1432 i -= k;
1433 t += k;
de190aef
LP
1434 a = le64toh(o->entry_array.next_entry_array_offset);
1435 }
1436
a4bcff5b
LP
1437 return 0;
1438
1439found:
1440 /* Let's cache this item for the next invocation */
1441 chain_cache_put(f->chain_cache, ci, first, a, o->entry_array.items[0], t);
de190aef
LP
1442
1443 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1444 if (r < 0)
1445 return r;
1446
1447 if (ret)
1448 *ret = o;
1449
1450 if (offset)
1451 *offset = p;
1452
1453 return 1;
1454}
1455
1456static int generic_array_get_plus_one(JournalFile *f,
1457 uint64_t extra,
1458 uint64_t first,
1459 uint64_t i,
1460 Object **ret, uint64_t *offset) {
1461
1462 Object *o;
1463
1464 assert(f);
1465
1466 if (i == 0) {
1467 int r;
1468
1469 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1470 if (r < 0)
1471 return r;
1472
de190aef
LP
1473 if (ret)
1474 *ret = o;
cec736d2 1475
de190aef
LP
1476 if (offset)
1477 *offset = extra;
cec736d2 1478
de190aef 1479 return 1;
cec736d2
LP
1480 }
1481
de190aef
LP
1482 return generic_array_get(f, first, i-1, ret, offset);
1483}
cec736d2 1484
de190aef
LP
1485enum {
1486 TEST_FOUND,
1487 TEST_LEFT,
1488 TEST_RIGHT
1489};
cec736d2 1490
de190aef
LP
1491static int generic_array_bisect(JournalFile *f,
1492 uint64_t first,
1493 uint64_t n,
1494 uint64_t needle,
1495 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1496 direction_t direction,
1497 Object **ret,
1498 uint64_t *offset,
1499 uint64_t *idx) {
1500
1501 uint64_t a, p, t = 0, i = 0, last_p = 0;
1502 bool subtract_one = false;
1503 Object *o, *array = NULL;
1504 int r;
a4bcff5b 1505 ChainCacheItem *ci;
cec736d2 1506
de190aef
LP
1507 assert(f);
1508 assert(test_object);
cec736d2 1509
a4bcff5b 1510 /* Start with the first array in the chain */
de190aef 1511 a = first;
a4bcff5b
LP
1512
1513 ci = hashmap_get(f->chain_cache, &first);
1514 if (ci && n > ci->total) {
1515 /* Ah, we have iterated this bisection array chain
1516 * previously! Let's see if we can skip ahead in the
1517 * chain, as far as the last time. But we can't jump
1518 * backwards in the chain, so let's check that
1519 * first. */
1520
1521 r = test_object(f, ci->begin, needle);
1522 if (r < 0)
1523 return r;
1524
1525 if (r == TEST_LEFT) {
1526 /* OK, what we are looking for is right of th
1527 * begin of this EntryArray, so let's jump
1528 * straight to previously cached array in the
1529 * chain */
1530
1531 a = ci->array;
1532 n -= ci->total;
1533 t = ci->total;
1534 }
1535 }
1536
de190aef
LP
1537 while (a > 0) {
1538 uint64_t left, right, k, lp;
1539
1540 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1541 if (r < 0)
1542 return r;
1543
de190aef
LP
1544 k = journal_file_entry_array_n_items(array);
1545 right = MIN(k, n);
1546 if (right <= 0)
1547 return 0;
cec736d2 1548
de190aef
LP
1549 i = right - 1;
1550 lp = p = le64toh(array->entry_array.items[i]);
1551 if (p <= 0)
1552 return -EBADMSG;
cec736d2 1553
de190aef
LP
1554 r = test_object(f, p, needle);
1555 if (r < 0)
1556 return r;
cec736d2 1557
de190aef
LP
1558 if (r == TEST_FOUND)
1559 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1560
1561 if (r == TEST_RIGHT) {
1562 left = 0;
1563 right -= 1;
1564 for (;;) {
1565 if (left == right) {
1566 if (direction == DIRECTION_UP)
1567 subtract_one = true;
1568
1569 i = left;
1570 goto found;
1571 }
1572
1573 assert(left < right);
1574
1575 i = (left + right) / 2;
1576 p = le64toh(array->entry_array.items[i]);
1577 if (p <= 0)
1578 return -EBADMSG;
1579
1580 r = test_object(f, p, needle);
1581 if (r < 0)
1582 return r;
cec736d2 1583
de190aef
LP
1584 if (r == TEST_FOUND)
1585 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1586
1587 if (r == TEST_RIGHT)
1588 right = i;
1589 else
1590 left = i + 1;
1591 }
1592 }
1593
cbdca852
LP
1594 if (k > n) {
1595 if (direction == DIRECTION_UP) {
1596 i = n;
1597 subtract_one = true;
1598 goto found;
1599 }
1600
cec736d2 1601 return 0;
cbdca852 1602 }
cec736d2 1603
de190aef
LP
1604 last_p = lp;
1605
1606 n -= k;
1607 t += k;
1608 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1609 }
1610
1611 return 0;
de190aef
LP
1612
1613found:
1614 if (subtract_one && t == 0 && i == 0)
1615 return 0;
1616
a4bcff5b
LP
1617 /* Let's cache this item for the next invocation */
1618 chain_cache_put(f->chain_cache, ci, first, a, array->entry_array.items[0], t);
1619
de190aef
LP
1620 if (subtract_one && i == 0)
1621 p = last_p;
1622 else if (subtract_one)
1623 p = le64toh(array->entry_array.items[i-1]);
1624 else
1625 p = le64toh(array->entry_array.items[i]);
1626
1627 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1628 if (r < 0)
1629 return r;
1630
1631 if (ret)
1632 *ret = o;
1633
1634 if (offset)
1635 *offset = p;
1636
1637 if (idx)
cbdca852 1638 *idx = t + i + (subtract_one ? -1 : 0);
de190aef
LP
1639
1640 return 1;
cec736d2
LP
1641}
1642
de190aef
LP
1643static int generic_array_bisect_plus_one(JournalFile *f,
1644 uint64_t extra,
1645 uint64_t first,
1646 uint64_t n,
1647 uint64_t needle,
1648 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1649 direction_t direction,
1650 Object **ret,
1651 uint64_t *offset,
1652 uint64_t *idx) {
1653
cec736d2 1654 int r;
cbdca852
LP
1655 bool step_back = false;
1656 Object *o;
cec736d2
LP
1657
1658 assert(f);
de190aef 1659 assert(test_object);
cec736d2 1660
de190aef
LP
1661 if (n <= 0)
1662 return 0;
cec736d2 1663
de190aef
LP
1664 /* This bisects the array in object 'first', but first checks
1665 * an extra */
de190aef
LP
1666 r = test_object(f, extra, needle);
1667 if (r < 0)
1668 return r;
a536e261
LP
1669
1670 if (r == TEST_FOUND)
1671 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1672
cbdca852
LP
1673 /* if we are looking with DIRECTION_UP then we need to first
1674 see if in the actual array there is a matching entry, and
1675 return the last one of that. But if there isn't any we need
1676 to return this one. Hence remember this, and return it
1677 below. */
1678 if (r == TEST_LEFT)
1679 step_back = direction == DIRECTION_UP;
de190aef 1680
cbdca852
LP
1681 if (r == TEST_RIGHT) {
1682 if (direction == DIRECTION_DOWN)
1683 goto found;
1684 else
1685 return 0;
a536e261 1686 }
cec736d2 1687
de190aef
LP
1688 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1689
cbdca852
LP
1690 if (r == 0 && step_back)
1691 goto found;
1692
ecf68b1d 1693 if (r > 0 && idx)
de190aef
LP
1694 (*idx) ++;
1695
1696 return r;
cbdca852
LP
1697
1698found:
1699 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1700 if (r < 0)
1701 return r;
1702
1703 if (ret)
1704 *ret = o;
1705
1706 if (offset)
1707 *offset = extra;
1708
1709 if (idx)
1710 *idx = 0;
1711
1712 return 1;
1713}
1714
44a6b1b6 1715_pure_ static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
cbdca852
LP
1716 assert(f);
1717 assert(p > 0);
1718
1719 if (p == needle)
1720 return TEST_FOUND;
1721 else if (p < needle)
1722 return TEST_LEFT;
1723 else
1724 return TEST_RIGHT;
1725}
1726
1727int journal_file_move_to_entry_by_offset(
1728 JournalFile *f,
1729 uint64_t p,
1730 direction_t direction,
1731 Object **ret,
1732 uint64_t *offset) {
1733
1734 return generic_array_bisect(f,
1735 le64toh(f->header->entry_array_offset),
1736 le64toh(f->header->n_entries),
1737 p,
1738 test_object_offset,
1739 direction,
1740 ret, offset, NULL);
de190aef
LP
1741}
1742
cbdca852 1743
de190aef
LP
1744static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1745 Object *o;
1746 int r;
1747
1748 assert(f);
1749 assert(p > 0);
1750
1751 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1752 if (r < 0)
1753 return r;
1754
de190aef
LP
1755 if (le64toh(o->entry.seqnum) == needle)
1756 return TEST_FOUND;
1757 else if (le64toh(o->entry.seqnum) < needle)
1758 return TEST_LEFT;
1759 else
1760 return TEST_RIGHT;
1761}
cec736d2 1762
de190aef
LP
1763int journal_file_move_to_entry_by_seqnum(
1764 JournalFile *f,
1765 uint64_t seqnum,
1766 direction_t direction,
1767 Object **ret,
1768 uint64_t *offset) {
1769
1770 return generic_array_bisect(f,
1771 le64toh(f->header->entry_array_offset),
1772 le64toh(f->header->n_entries),
1773 seqnum,
1774 test_object_seqnum,
1775 direction,
1776 ret, offset, NULL);
1777}
cec736d2 1778
de190aef
LP
1779static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1780 Object *o;
1781 int r;
1782
1783 assert(f);
1784 assert(p > 0);
1785
1786 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1787 if (r < 0)
1788 return r;
1789
1790 if (le64toh(o->entry.realtime) == needle)
1791 return TEST_FOUND;
1792 else if (le64toh(o->entry.realtime) < needle)
1793 return TEST_LEFT;
1794 else
1795 return TEST_RIGHT;
cec736d2
LP
1796}
1797
de190aef
LP
1798int journal_file_move_to_entry_by_realtime(
1799 JournalFile *f,
1800 uint64_t realtime,
1801 direction_t direction,
1802 Object **ret,
1803 uint64_t *offset) {
1804
1805 return generic_array_bisect(f,
1806 le64toh(f->header->entry_array_offset),
1807 le64toh(f->header->n_entries),
1808 realtime,
1809 test_object_realtime,
1810 direction,
1811 ret, offset, NULL);
1812}
1813
1814static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1815 Object *o;
1816 int r;
1817
1818 assert(f);
1819 assert(p > 0);
1820
1821 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1822 if (r < 0)
1823 return r;
1824
1825 if (le64toh(o->entry.monotonic) == needle)
1826 return TEST_FOUND;
1827 else if (le64toh(o->entry.monotonic) < needle)
1828 return TEST_LEFT;
1829 else
1830 return TEST_RIGHT;
1831}
1832
47838ab3
ZJS
1833static inline int find_data_object_by_boot_id(
1834 JournalFile *f,
1835 sd_id128_t boot_id,
1836 Object **o,
1837 uint64_t *b) {
1838 char t[sizeof("_BOOT_ID=")-1 + 32 + 1] = "_BOOT_ID=";
1839
1840 sd_id128_to_string(boot_id, t + 9);
1841 return journal_file_find_data_object(f, t, sizeof(t) - 1, o, b);
1842}
1843
de190aef
LP
1844int journal_file_move_to_entry_by_monotonic(
1845 JournalFile *f,
1846 sd_id128_t boot_id,
1847 uint64_t monotonic,
1848 direction_t direction,
1849 Object **ret,
1850 uint64_t *offset) {
1851
de190aef
LP
1852 Object *o;
1853 int r;
1854
cbdca852 1855 assert(f);
de190aef 1856
47838ab3 1857 r = find_data_object_by_boot_id(f, boot_id, &o, NULL);
de190aef
LP
1858 if (r < 0)
1859 return r;
cbdca852 1860 if (r == 0)
de190aef
LP
1861 return -ENOENT;
1862
1863 return generic_array_bisect_plus_one(f,
1864 le64toh(o->data.entry_offset),
1865 le64toh(o->data.entry_array_offset),
1866 le64toh(o->data.n_entries),
1867 monotonic,
1868 test_object_monotonic,
1869 direction,
1870 ret, offset, NULL);
1871}
1872
de190aef
LP
1873int journal_file_next_entry(
1874 JournalFile *f,
1875 Object *o, uint64_t p,
1876 direction_t direction,
1877 Object **ret, uint64_t *offset) {
1878
1879 uint64_t i, n;
cec736d2
LP
1880 int r;
1881
1882 assert(f);
de190aef
LP
1883 assert(p > 0 || !o);
1884
1885 n = le64toh(f->header->n_entries);
1886 if (n <= 0)
1887 return 0;
cec736d2
LP
1888
1889 if (!o)
de190aef 1890 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1891 else {
de190aef 1892 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1893 return -EINVAL;
1894
de190aef
LP
1895 r = generic_array_bisect(f,
1896 le64toh(f->header->entry_array_offset),
1897 le64toh(f->header->n_entries),
1898 p,
1899 test_object_offset,
1900 DIRECTION_DOWN,
1901 NULL, NULL,
1902 &i);
1903 if (r <= 0)
1904 return r;
1905
1906 if (direction == DIRECTION_DOWN) {
1907 if (i >= n - 1)
1908 return 0;
1909
1910 i++;
1911 } else {
1912 if (i <= 0)
1913 return 0;
1914
1915 i--;
1916 }
cec736d2
LP
1917 }
1918
de190aef
LP
1919 /* And jump to it */
1920 return generic_array_get(f,
1921 le64toh(f->header->entry_array_offset),
1922 i,
1923 ret, offset);
1924}
cec736d2 1925
de190aef
LP
1926int journal_file_skip_entry(
1927 JournalFile *f,
1928 Object *o, uint64_t p,
1929 int64_t skip,
1930 Object **ret, uint64_t *offset) {
1931
1932 uint64_t i, n;
1933 int r;
1934
1935 assert(f);
1936 assert(o);
1937 assert(p > 0);
1938
1939 if (o->object.type != OBJECT_ENTRY)
1940 return -EINVAL;
1941
1942 r = generic_array_bisect(f,
1943 le64toh(f->header->entry_array_offset),
1944 le64toh(f->header->n_entries),
1945 p,
1946 test_object_offset,
1947 DIRECTION_DOWN,
1948 NULL, NULL,
1949 &i);
1950 if (r <= 0)
cec736d2
LP
1951 return r;
1952
de190aef
LP
1953 /* Calculate new index */
1954 if (skip < 0) {
1955 if ((uint64_t) -skip >= i)
1956 i = 0;
1957 else
1958 i = i - (uint64_t) -skip;
1959 } else
1960 i += (uint64_t) skip;
cec736d2 1961
de190aef
LP
1962 n = le64toh(f->header->n_entries);
1963 if (n <= 0)
1964 return -EBADMSG;
cec736d2 1965
de190aef
LP
1966 if (i >= n)
1967 i = n-1;
1968
1969 return generic_array_get(f,
1970 le64toh(f->header->entry_array_offset),
1971 i,
1972 ret, offset);
cec736d2
LP
1973}
1974
de190aef
LP
1975int journal_file_next_entry_for_data(
1976 JournalFile *f,
1977 Object *o, uint64_t p,
1978 uint64_t data_offset,
1979 direction_t direction,
1980 Object **ret, uint64_t *offset) {
1981
1982 uint64_t n, i;
cec736d2 1983 int r;
de190aef 1984 Object *d;
cec736d2
LP
1985
1986 assert(f);
de190aef 1987 assert(p > 0 || !o);
cec736d2 1988
de190aef 1989 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 1990 if (r < 0)
de190aef 1991 return r;
cec736d2 1992
de190aef
LP
1993 n = le64toh(d->data.n_entries);
1994 if (n <= 0)
1995 return n;
cec736d2 1996
de190aef
LP
1997 if (!o)
1998 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1999 else {
2000 if (o->object.type != OBJECT_ENTRY)
2001 return -EINVAL;
cec736d2 2002
de190aef
LP
2003 r = generic_array_bisect_plus_one(f,
2004 le64toh(d->data.entry_offset),
2005 le64toh(d->data.entry_array_offset),
2006 le64toh(d->data.n_entries),
2007 p,
2008 test_object_offset,
2009 DIRECTION_DOWN,
2010 NULL, NULL,
2011 &i);
2012
2013 if (r <= 0)
cec736d2
LP
2014 return r;
2015
de190aef
LP
2016 if (direction == DIRECTION_DOWN) {
2017 if (i >= n - 1)
2018 return 0;
cec736d2 2019
de190aef
LP
2020 i++;
2021 } else {
2022 if (i <= 0)
2023 return 0;
cec736d2 2024
de190aef
LP
2025 i--;
2026 }
cec736d2 2027
de190aef 2028 }
cec736d2 2029
de190aef
LP
2030 return generic_array_get_plus_one(f,
2031 le64toh(d->data.entry_offset),
2032 le64toh(d->data.entry_array_offset),
2033 i,
2034 ret, offset);
2035}
cec736d2 2036
cbdca852
LP
2037int journal_file_move_to_entry_by_offset_for_data(
2038 JournalFile *f,
2039 uint64_t data_offset,
2040 uint64_t p,
2041 direction_t direction,
2042 Object **ret, uint64_t *offset) {
2043
2044 int r;
2045 Object *d;
2046
2047 assert(f);
2048
2049 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2050 if (r < 0)
2051 return r;
2052
2053 return generic_array_bisect_plus_one(f,
2054 le64toh(d->data.entry_offset),
2055 le64toh(d->data.entry_array_offset),
2056 le64toh(d->data.n_entries),
2057 p,
2058 test_object_offset,
2059 direction,
2060 ret, offset, NULL);
2061}
2062
2063int journal_file_move_to_entry_by_monotonic_for_data(
2064 JournalFile *f,
2065 uint64_t data_offset,
2066 sd_id128_t boot_id,
2067 uint64_t monotonic,
2068 direction_t direction,
2069 Object **ret, uint64_t *offset) {
2070
cbdca852
LP
2071 Object *o, *d;
2072 int r;
2073 uint64_t b, z;
2074
2075 assert(f);
2076
2077 /* First, seek by time */
47838ab3 2078 r = find_data_object_by_boot_id(f, boot_id, &o, &b);
cbdca852
LP
2079 if (r < 0)
2080 return r;
2081 if (r == 0)
2082 return -ENOENT;
2083
2084 r = generic_array_bisect_plus_one(f,
2085 le64toh(o->data.entry_offset),
2086 le64toh(o->data.entry_array_offset),
2087 le64toh(o->data.n_entries),
2088 monotonic,
2089 test_object_monotonic,
2090 direction,
2091 NULL, &z, NULL);
2092 if (r <= 0)
2093 return r;
2094
2095 /* And now, continue seeking until we find an entry that
2096 * exists in both bisection arrays */
2097
2098 for (;;) {
2099 Object *qo;
2100 uint64_t p, q;
2101
2102 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2103 if (r < 0)
2104 return r;
2105
2106 r = generic_array_bisect_plus_one(f,
2107 le64toh(d->data.entry_offset),
2108 le64toh(d->data.entry_array_offset),
2109 le64toh(d->data.n_entries),
2110 z,
2111 test_object_offset,
2112 direction,
2113 NULL, &p, NULL);
2114 if (r <= 0)
2115 return r;
2116
2117 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
2118 if (r < 0)
2119 return r;
2120
2121 r = generic_array_bisect_plus_one(f,
2122 le64toh(o->data.entry_offset),
2123 le64toh(o->data.entry_array_offset),
2124 le64toh(o->data.n_entries),
2125 p,
2126 test_object_offset,
2127 direction,
2128 &qo, &q, NULL);
2129
2130 if (r <= 0)
2131 return r;
2132
2133 if (p == q) {
2134 if (ret)
2135 *ret = qo;
2136 if (offset)
2137 *offset = q;
2138
2139 return 1;
2140 }
2141
2142 z = q;
2143 }
2144
2145 return 0;
2146}
2147
de190aef
LP
2148int journal_file_move_to_entry_by_seqnum_for_data(
2149 JournalFile *f,
2150 uint64_t data_offset,
2151 uint64_t seqnum,
2152 direction_t direction,
2153 Object **ret, uint64_t *offset) {
cec736d2 2154
de190aef
LP
2155 Object *d;
2156 int r;
cec736d2 2157
91a31dde
LP
2158 assert(f);
2159
de190aef 2160 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 2161 if (r < 0)
de190aef 2162 return r;
cec736d2 2163
de190aef
LP
2164 return generic_array_bisect_plus_one(f,
2165 le64toh(d->data.entry_offset),
2166 le64toh(d->data.entry_array_offset),
2167 le64toh(d->data.n_entries),
2168 seqnum,
2169 test_object_seqnum,
2170 direction,
2171 ret, offset, NULL);
2172}
cec736d2 2173
de190aef
LP
2174int journal_file_move_to_entry_by_realtime_for_data(
2175 JournalFile *f,
2176 uint64_t data_offset,
2177 uint64_t realtime,
2178 direction_t direction,
2179 Object **ret, uint64_t *offset) {
2180
2181 Object *d;
2182 int r;
2183
91a31dde
LP
2184 assert(f);
2185
de190aef 2186 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 2187 if (r < 0)
de190aef
LP
2188 return r;
2189
2190 return generic_array_bisect_plus_one(f,
2191 le64toh(d->data.entry_offset),
2192 le64toh(d->data.entry_array_offset),
2193 le64toh(d->data.n_entries),
2194 realtime,
2195 test_object_realtime,
2196 direction,
2197 ret, offset, NULL);
cec736d2
LP
2198}
2199
0284adc6 2200void journal_file_dump(JournalFile *f) {
7560fffc 2201 Object *o;
7560fffc 2202 int r;
0284adc6 2203 uint64_t p;
7560fffc
LP
2204
2205 assert(f);
2206
0284adc6 2207 journal_file_print_header(f);
7560fffc 2208
0284adc6
LP
2209 p = le64toh(f->header->header_size);
2210 while (p != 0) {
2211 r = journal_file_move_to_object(f, -1, p, &o);
2212 if (r < 0)
2213 goto fail;
7560fffc 2214
0284adc6 2215 switch (o->object.type) {
d98cc1f2 2216
0284adc6
LP
2217 case OBJECT_UNUSED:
2218 printf("Type: OBJECT_UNUSED\n");
2219 break;
d98cc1f2 2220
0284adc6
LP
2221 case OBJECT_DATA:
2222 printf("Type: OBJECT_DATA\n");
2223 break;
7560fffc 2224
3c1668da
LP
2225 case OBJECT_FIELD:
2226 printf("Type: OBJECT_FIELD\n");
2227 break;
2228
0284adc6 2229 case OBJECT_ENTRY:
f7fab8a5 2230 printf("Type: OBJECT_ENTRY seqnum=%llu monotonic=%llu realtime=%llu\n",
0284adc6
LP
2231 (unsigned long long) le64toh(o->entry.seqnum),
2232 (unsigned long long) le64toh(o->entry.monotonic),
2233 (unsigned long long) le64toh(o->entry.realtime));
2234 break;
7560fffc 2235
0284adc6
LP
2236 case OBJECT_FIELD_HASH_TABLE:
2237 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
2238 break;
7560fffc 2239
0284adc6
LP
2240 case OBJECT_DATA_HASH_TABLE:
2241 printf("Type: OBJECT_DATA_HASH_TABLE\n");
2242 break;
7560fffc 2243
0284adc6
LP
2244 case OBJECT_ENTRY_ARRAY:
2245 printf("Type: OBJECT_ENTRY_ARRAY\n");
2246 break;
7560fffc 2247
0284adc6 2248 case OBJECT_TAG:
f7fab8a5
LP
2249 printf("Type: OBJECT_TAG seqnum=%llu epoch=%llu\n",
2250 (unsigned long long) le64toh(o->tag.seqnum),
2251 (unsigned long long) le64toh(o->tag.epoch));
0284adc6 2252 break;
3c1668da
LP
2253
2254 default:
2255 printf("Type: unknown (%u)\n", o->object.type);
2256 break;
0284adc6 2257 }
7560fffc 2258
0284adc6
LP
2259 if (o->object.flags & OBJECT_COMPRESSED)
2260 printf("Flags: COMPRESSED\n");
7560fffc 2261
0284adc6
LP
2262 if (p == le64toh(f->header->tail_object_offset))
2263 p = 0;
2264 else
2265 p = p + ALIGN64(le64toh(o->object.size));
2266 }
7560fffc 2267
0284adc6
LP
2268 return;
2269fail:
2270 log_error("File corrupt");
7560fffc
LP
2271}
2272
0284adc6
LP
2273void journal_file_print_header(JournalFile *f) {
2274 char a[33], b[33], c[33];
2275 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
a1a03e30
LP
2276 struct stat st;
2277 char bytes[FORMAT_BYTES_MAX];
7560fffc
LP
2278
2279 assert(f);
7560fffc 2280
0284adc6
LP
2281 printf("File Path: %s\n"
2282 "File ID: %s\n"
2283 "Machine ID: %s\n"
2284 "Boot ID: %s\n"
2285 "Sequential Number ID: %s\n"
2286 "State: %s\n"
2287 "Compatible Flags:%s%s\n"
2288 "Incompatible Flags:%s%s\n"
2289 "Header size: %llu\n"
2290 "Arena size: %llu\n"
2291 "Data Hash Table Size: %llu\n"
2292 "Field Hash Table Size: %llu\n"
0284adc6
LP
2293 "Rotate Suggested: %s\n"
2294 "Head Sequential Number: %llu\n"
2295 "Tail Sequential Number: %llu\n"
2296 "Head Realtime Timestamp: %s\n"
3223f44f
LP
2297 "Tail Realtime Timestamp: %s\n"
2298 "Objects: %llu\n"
2299 "Entry Objects: %llu\n",
0284adc6
LP
2300 f->path,
2301 sd_id128_to_string(f->header->file_id, a),
2302 sd_id128_to_string(f->header->machine_id, b),
2303 sd_id128_to_string(f->header->boot_id, c),
2304 sd_id128_to_string(f->header->seqnum_id, c),
3223f44f
LP
2305 f->header->state == STATE_OFFLINE ? "OFFLINE" :
2306 f->header->state == STATE_ONLINE ? "ONLINE" :
2307 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
8088cbd3
LP
2308 JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
2309 (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
2310 JOURNAL_HEADER_COMPRESSED(f->header) ? " COMPRESSED" : "",
2311 (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
0284adc6
LP
2312 (unsigned long long) le64toh(f->header->header_size),
2313 (unsigned long long) le64toh(f->header->arena_size),
2314 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2315 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
fb0951b0 2316 yes_no(journal_file_rotate_suggested(f, 0)),
0284adc6
LP
2317 (unsigned long long) le64toh(f->header->head_entry_seqnum),
2318 (unsigned long long) le64toh(f->header->tail_entry_seqnum),
2319 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
3223f44f
LP
2320 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
2321 (unsigned long long) le64toh(f->header->n_objects),
2322 (unsigned long long) le64toh(f->header->n_entries));
7560fffc 2323
0284adc6
LP
2324 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2325 printf("Data Objects: %llu\n"
2326 "Data Hash Table Fill: %.1f%%\n",
2327 (unsigned long long) le64toh(f->header->n_data),
2328 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
7560fffc 2329
0284adc6
LP
2330 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2331 printf("Field Objects: %llu\n"
2332 "Field Hash Table Fill: %.1f%%\n",
2333 (unsigned long long) le64toh(f->header->n_fields),
2334 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
3223f44f
LP
2335
2336 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
2337 printf("Tag Objects: %llu\n",
2338 (unsigned long long) le64toh(f->header->n_tags));
2339 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
2340 printf("Entry Array Objects: %llu\n",
2341 (unsigned long long) le64toh(f->header->n_entry_arrays));
a1a03e30
LP
2342
2343 if (fstat(f->fd, &st) >= 0)
2344 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
7560fffc
LP
2345}
2346
0284adc6
LP
2347int journal_file_open(
2348 const char *fname,
2349 int flags,
2350 mode_t mode,
2351 bool compress,
baed47c3 2352 bool seal,
0284adc6
LP
2353 JournalMetrics *metrics,
2354 MMapCache *mmap_cache,
2355 JournalFile *template,
2356 JournalFile **ret) {
7560fffc 2357
0284adc6
LP
2358 JournalFile *f;
2359 int r;
2360 bool newly_created = false;
7560fffc 2361
0284adc6 2362 assert(fname);
0559d3a5 2363 assert(ret);
7560fffc 2364
0284adc6
LP
2365 if ((flags & O_ACCMODE) != O_RDONLY &&
2366 (flags & O_ACCMODE) != O_RDWR)
2367 return -EINVAL;
7560fffc 2368
a0108012
LP
2369 if (!endswith(fname, ".journal") &&
2370 !endswith(fname, ".journal~"))
0284adc6 2371 return -EINVAL;
7560fffc 2372
0284adc6
LP
2373 f = new0(JournalFile, 1);
2374 if (!f)
2375 return -ENOMEM;
7560fffc 2376
0284adc6
LP
2377 f->fd = -1;
2378 f->mode = mode;
7560fffc 2379
0284adc6
LP
2380 f->flags = flags;
2381 f->prot = prot_from_flags(flags);
2382 f->writable = (flags & O_ACCMODE) != O_RDONLY;
48b61739 2383#ifdef HAVE_XZ
0284adc6 2384 f->compress = compress;
48b61739 2385#endif
49a32d43 2386#ifdef HAVE_GCRYPT
baed47c3 2387 f->seal = seal;
49a32d43 2388#endif
7560fffc 2389
0284adc6
LP
2390 if (mmap_cache)
2391 f->mmap = mmap_cache_ref(mmap_cache);
2392 else {
84168d80 2393 f->mmap = mmap_cache_new();
0284adc6
LP
2394 if (!f->mmap) {
2395 r = -ENOMEM;
2396 goto fail;
2397 }
2398 }
7560fffc 2399
0284adc6
LP
2400 f->path = strdup(fname);
2401 if (!f->path) {
2402 r = -ENOMEM;
2403 goto fail;
2404 }
7560fffc 2405
a4bcff5b
LP
2406 f->chain_cache = hashmap_new(uint64_hash_func, uint64_compare_func);
2407 if (!f->chain_cache) {
2408 r = -ENOMEM;
2409 goto fail;
2410 }
2411
0284adc6
LP
2412 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2413 if (f->fd < 0) {
2414 r = -errno;
2415 goto fail;
7560fffc 2416 }
7560fffc 2417
0284adc6
LP
2418 if (fstat(f->fd, &f->last_stat) < 0) {
2419 r = -errno;
2420 goto fail;
2421 }
7560fffc 2422
0284adc6 2423 if (f->last_stat.st_size == 0 && f->writable) {
fb0951b0
LP
2424#ifdef HAVE_XATTR
2425 uint64_t crtime;
2426
2427 /* Let's attach the creation time to the journal file,
2428 * so that the vacuuming code knows the age of this
2429 * file even if the file might end up corrupted one
2430 * day... Ideally we'd just use the creation time many
2431 * file systems maintain for each file, but there is
2432 * currently no usable API to query this, hence let's
2433 * emulate this via extended attributes. If extended
2434 * attributes are not supported we'll just skip this,
2435 * and rely solely on mtime/atime/ctime of the file.*/
2436
2437 crtime = htole64((uint64_t) now(CLOCK_REALTIME));
2438 fsetxattr(f->fd, "user.crtime_usec", &crtime, sizeof(crtime), XATTR_CREATE);
2439#endif
7560fffc 2440
feb12d3e 2441#ifdef HAVE_GCRYPT
0284adc6 2442 /* Try to load the FSPRG state, and if we can't, then
baed47c3 2443 * just don't do sealing */
49a32d43
LP
2444 if (f->seal) {
2445 r = journal_file_fss_load(f);
2446 if (r < 0)
2447 f->seal = false;
2448 }
feb12d3e 2449#endif
7560fffc 2450
0284adc6
LP
2451 r = journal_file_init_header(f, template);
2452 if (r < 0)
2453 goto fail;
7560fffc 2454
0284adc6
LP
2455 if (fstat(f->fd, &f->last_stat) < 0) {
2456 r = -errno;
2457 goto fail;
2458 }
fb0951b0
LP
2459
2460 newly_created = true;
0284adc6 2461 }
7560fffc 2462
0284adc6
LP
2463 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2464 r = -EIO;
2465 goto fail;
2466 }
7560fffc 2467
0284adc6
LP
2468 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2469 if (f->header == MAP_FAILED) {
2470 f->header = NULL;
2471 r = -errno;
2472 goto fail;
2473 }
7560fffc 2474
0284adc6
LP
2475 if (!newly_created) {
2476 r = journal_file_verify_header(f);
2477 if (r < 0)
2478 goto fail;
2479 }
7560fffc 2480
feb12d3e 2481#ifdef HAVE_GCRYPT
0284adc6 2482 if (!newly_created && f->writable) {
baed47c3 2483 r = journal_file_fss_load(f);
0284adc6
LP
2484 if (r < 0)
2485 goto fail;
2486 }
feb12d3e 2487#endif
cec736d2
LP
2488
2489 if (f->writable) {
4a92baf3
LP
2490 if (metrics) {
2491 journal_default_metrics(metrics, f->fd);
2492 f->metrics = *metrics;
2493 } else if (template)
2494 f->metrics = template->metrics;
2495
cec736d2
LP
2496 r = journal_file_refresh_header(f);
2497 if (r < 0)
2498 goto fail;
2499 }
2500
feb12d3e 2501#ifdef HAVE_GCRYPT
baed47c3 2502 r = journal_file_hmac_setup(f);
14d10188
LP
2503 if (r < 0)
2504 goto fail;
feb12d3e 2505#endif
14d10188 2506
cec736d2 2507 if (newly_created) {
de190aef 2508 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
2509 if (r < 0)
2510 goto fail;
2511
de190aef 2512 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
2513 if (r < 0)
2514 goto fail;
7560fffc 2515
feb12d3e 2516#ifdef HAVE_GCRYPT
7560fffc
LP
2517 r = journal_file_append_first_tag(f);
2518 if (r < 0)
2519 goto fail;
feb12d3e 2520#endif
cec736d2
LP
2521 }
2522
de190aef 2523 r = journal_file_map_field_hash_table(f);
cec736d2
LP
2524 if (r < 0)
2525 goto fail;
2526
de190aef 2527 r = journal_file_map_data_hash_table(f);
cec736d2
LP
2528 if (r < 0)
2529 goto fail;
2530
0559d3a5 2531 *ret = f;
cec736d2
LP
2532 return 0;
2533
2534fail:
2535 journal_file_close(f);
2536
2537 return r;
2538}
0ac38b70 2539
baed47c3 2540int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
0ac38b70
LP
2541 char *p;
2542 size_t l;
2543 JournalFile *old_file, *new_file = NULL;
2544 int r;
2545
2546 assert(f);
2547 assert(*f);
2548
2549 old_file = *f;
2550
2551 if (!old_file->writable)
2552 return -EINVAL;
2553
2554 if (!endswith(old_file->path, ".journal"))
2555 return -EINVAL;
2556
2557 l = strlen(old_file->path);
2558
9447a7f1 2559 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
0ac38b70
LP
2560 if (!p)
2561 return -ENOMEM;
2562
2563 memcpy(p, old_file->path, l - 8);
2564 p[l-8] = '@';
2565 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2566 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2567 "-%016llx-%016llx.journal",
fb0951b0
LP
2568 (unsigned long long) le64toh((*f)->header->head_entry_seqnum),
2569 (unsigned long long) le64toh((*f)->header->head_entry_realtime));
0ac38b70
LP
2570
2571 r = rename(old_file->path, p);
2572 free(p);
2573
2574 if (r < 0)
2575 return -errno;
2576
ccdbaf91 2577 old_file->header->state = STATE_ARCHIVED;
0ac38b70 2578
baed47c3 2579 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
0ac38b70
LP
2580 journal_file_close(old_file);
2581
2582 *f = new_file;
2583 return r;
2584}
2585
9447a7f1
LP
2586int journal_file_open_reliably(
2587 const char *fname,
2588 int flags,
2589 mode_t mode,
7560fffc 2590 bool compress,
baed47c3 2591 bool seal,
4a92baf3 2592 JournalMetrics *metrics,
27370278 2593 MMapCache *mmap_cache,
9447a7f1
LP
2594 JournalFile *template,
2595 JournalFile **ret) {
2596
2597 int r;
2598 size_t l;
2599 char *p;
2600
baed47c3 2601 r = journal_file_open(fname, flags, mode, compress, seal,
27370278 2602 metrics, mmap_cache, template, ret);
0071d9f1
LP
2603 if (r != -EBADMSG && /* corrupted */
2604 r != -ENODATA && /* truncated */
2605 r != -EHOSTDOWN && /* other machine */
a1a1898f
LP
2606 r != -EPROTONOSUPPORT && /* incompatible feature */
2607 r != -EBUSY && /* unclean shutdown */
2608 r != -ESHUTDOWN /* already archived */)
9447a7f1
LP
2609 return r;
2610
2611 if ((flags & O_ACCMODE) == O_RDONLY)
2612 return r;
2613
2614 if (!(flags & O_CREAT))
2615 return r;
2616
7560fffc
LP
2617 if (!endswith(fname, ".journal"))
2618 return r;
2619
5c70eab4
LP
2620 /* The file is corrupted. Rotate it away and try it again (but only once) */
2621
9447a7f1
LP
2622 l = strlen(fname);
2623 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2624 (int) (l-8), fname,
2625 (unsigned long long) now(CLOCK_REALTIME),
2626 random_ull()) < 0)
2627 return -ENOMEM;
2628
2629 r = rename(fname, p);
2630 free(p);
2631 if (r < 0)
2632 return -errno;
2633
a1a1898f 2634 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
9447a7f1 2635
baed47c3 2636 return journal_file_open(fname, flags, mode, compress, seal,
27370278 2637 metrics, mmap_cache, template, ret);
9447a7f1
LP
2638}
2639
cf244689
LP
2640int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2641 uint64_t i, n;
2642 uint64_t q, xor_hash = 0;
2643 int r;
2644 EntryItem *items;
2645 dual_timestamp ts;
2646
2647 assert(from);
2648 assert(to);
2649 assert(o);
2650 assert(p);
2651
2652 if (!to->writable)
2653 return -EPERM;
2654
2655 ts.monotonic = le64toh(o->entry.monotonic);
2656 ts.realtime = le64toh(o->entry.realtime);
2657
2658 if (to->tail_entry_monotonic_valid &&
2659 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2660 return -EINVAL;
2661
cf244689
LP
2662 n = journal_file_entry_n_items(o);
2663 items = alloca(sizeof(EntryItem) * n);
2664
2665 for (i = 0; i < n; i++) {
4fd052ae
FC
2666 uint64_t l, h;
2667 le64_t le_hash;
cf244689
LP
2668 size_t t;
2669 void *data;
2670 Object *u;
2671
2672 q = le64toh(o->entry.items[i].object_offset);
2673 le_hash = o->entry.items[i].hash;
2674
2675 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2676 if (r < 0)
2677 return r;
2678
2679 if (le_hash != o->data.hash)
2680 return -EBADMSG;
2681
2682 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2683 t = (size_t) l;
2684
2685 /* We hit the limit on 32bit machines */
2686 if ((uint64_t) t != l)
2687 return -E2BIG;
2688
2689 if (o->object.flags & OBJECT_COMPRESSED) {
2690#ifdef HAVE_XZ
2691 uint64_t rsize;
2692
93b73b06 2693 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize, 0))
cf244689
LP
2694 return -EBADMSG;
2695
2696 data = from->compress_buffer;
2697 l = rsize;
2698#else
2699 return -EPROTONOSUPPORT;
2700#endif
2701 } else
2702 data = o->data.payload;
2703
2704 r = journal_file_append_data(to, data, l, &u, &h);
2705 if (r < 0)
2706 return r;
2707
2708 xor_hash ^= le64toh(u->data.hash);
2709 items[i].object_offset = htole64(h);
2710 items[i].hash = u->data.hash;
2711
2712 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2713 if (r < 0)
2714 return r;
2715 }
2716
2717 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2718}
babfc091
LP
2719
2720void journal_default_metrics(JournalMetrics *m, int fd) {
2721 uint64_t fs_size = 0;
2722 struct statvfs ss;
a7bc2c2a 2723 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2724
2725 assert(m);
2726 assert(fd >= 0);
2727
2728 if (fstatvfs(fd, &ss) >= 0)
2729 fs_size = ss.f_frsize * ss.f_blocks;
2730
2731 if (m->max_use == (uint64_t) -1) {
2732
2733 if (fs_size > 0) {
2734 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2735
2736 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2737 m->max_use = DEFAULT_MAX_USE_UPPER;
2738
2739 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2740 m->max_use = DEFAULT_MAX_USE_LOWER;
2741 } else
2742 m->max_use = DEFAULT_MAX_USE_LOWER;
2743 } else {
2744 m->max_use = PAGE_ALIGN(m->max_use);
2745
2746 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2747 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2748 }
2749
2750 if (m->max_size == (uint64_t) -1) {
2751 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2752
2753 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2754 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2755 } else
2756 m->max_size = PAGE_ALIGN(m->max_size);
2757
2758 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2759 m->max_size = JOURNAL_FILE_SIZE_MIN;
2760
2761 if (m->max_size*2 > m->max_use)
2762 m->max_use = m->max_size*2;
2763
2764 if (m->min_size == (uint64_t) -1)
2765 m->min_size = JOURNAL_FILE_SIZE_MIN;
2766 else {
2767 m->min_size = PAGE_ALIGN(m->min_size);
2768
2769 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2770 m->min_size = JOURNAL_FILE_SIZE_MIN;
2771
2772 if (m->min_size > m->max_size)
2773 m->max_size = m->min_size;
2774 }
2775
2776 if (m->keep_free == (uint64_t) -1) {
2777
2778 if (fs_size > 0) {
8621b110 2779 m->keep_free = PAGE_ALIGN(fs_size * 3 / 20); /* 15% of file system size */
babfc091
LP
2780
2781 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2782 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2783
2784 } else
2785 m->keep_free = DEFAULT_KEEP_FREE;
2786 }
2787
2b43f939
LP
2788 log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2789 format_bytes(a, sizeof(a), m->max_use),
2790 format_bytes(b, sizeof(b), m->max_size),
2791 format_bytes(c, sizeof(c), m->min_size),
2792 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2793}
08984293
LP
2794
2795int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
08984293
LP
2796 assert(f);
2797 assert(from || to);
2798
2799 if (from) {
162566a4
LP
2800 if (f->header->head_entry_realtime == 0)
2801 return -ENOENT;
08984293 2802
162566a4 2803 *from = le64toh(f->header->head_entry_realtime);
08984293
LP
2804 }
2805
2806 if (to) {
162566a4
LP
2807 if (f->header->tail_entry_realtime == 0)
2808 return -ENOENT;
08984293 2809
162566a4 2810 *to = le64toh(f->header->tail_entry_realtime);
08984293
LP
2811 }
2812
2813 return 1;
2814}
2815
2816int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
08984293
LP
2817 Object *o;
2818 uint64_t p;
2819 int r;
2820
2821 assert(f);
2822 assert(from || to);
2823
47838ab3 2824 r = find_data_object_by_boot_id(f, boot_id, &o, &p);
08984293
LP
2825 if (r <= 0)
2826 return r;
2827
2828 if (le64toh(o->data.n_entries) <= 0)
2829 return 0;
2830
2831 if (from) {
2832 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2833 if (r < 0)
2834 return r;
2835
2836 *from = le64toh(o->entry.monotonic);
2837 }
2838
2839 if (to) {
2840 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2841 if (r < 0)
2842 return r;
2843
2844 r = generic_array_get_plus_one(f,
2845 le64toh(o->data.entry_offset),
2846 le64toh(o->data.entry_array_offset),
2847 le64toh(o->data.n_entries)-1,
2848 &o, NULL);
2849 if (r <= 0)
2850 return r;
2851
2852 *to = le64toh(o->entry.monotonic);
2853 }
2854
2855 return 1;
2856}
dca6219e 2857
fb0951b0 2858bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
dca6219e
LP
2859 assert(f);
2860
2861 /* If we gained new header fields we gained new features,
2862 * hence suggest a rotation */
361f9cbc
LP
2863 if (le64toh(f->header->header_size) < sizeof(Header)) {
2864 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
dca6219e 2865 return true;
361f9cbc 2866 }
dca6219e
LP
2867
2868 /* Let's check if the hash tables grew over a certain fill
2869 * level (75%, borrowing this value from Java's hash table
2870 * implementation), and if so suggest a rotation. To calculate
2871 * the fill level we need the n_data field, which only exists
2872 * in newer versions. */
2873
2874 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
361f9cbc
LP
2875 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2876 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2877 f->path,
2878 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2879 (unsigned long long) le64toh(f->header->n_data),
2880 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2881 (unsigned long long) (f->last_stat.st_size),
2882 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
dca6219e 2883 return true;
361f9cbc 2884 }
dca6219e
LP
2885
2886 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
361f9cbc
LP
2887 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2888 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2889 f->path,
2890 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2891 (unsigned long long) le64toh(f->header->n_fields),
2892 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
dca6219e 2893 return true;
361f9cbc 2894 }
dca6219e 2895
0598fd4a
LP
2896 /* Are the data objects properly indexed by field objects? */
2897 if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
2898 JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
2899 le64toh(f->header->n_data) > 0 &&
2900 le64toh(f->header->n_fields) == 0)
2901 return true;
2902
fb0951b0
LP
2903 if (max_file_usec > 0) {
2904 usec_t t, h;
2905
2906 h = le64toh(f->header->head_entry_realtime);
2907 t = now(CLOCK_REALTIME);
2908
2909 if (h > 0 && t > h + max_file_usec)
2910 return true;
2911 }
2912
dca6219e
LP
2913 return false;
2914}