]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
analyze: fix plot issues when using gummiboot
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
fb0951b0
LP
30#ifdef HAVE_XATTR
31#include <attr/xattr.h>
32#endif
33
cec736d2
LP
34#include "journal-def.h"
35#include "journal-file.h"
0284adc6 36#include "journal-authenticate.h"
cec736d2 37#include "lookup3.h"
807e17f0 38#include "compress.h"
7560fffc 39#include "fsprg.h"
cec736d2 40
4a92baf3
LP
41#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
42#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
cec736d2 43
be19b7df 44#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 45
babfc091 46/* This is the minimum journal file size */
253f59df 47#define JOURNAL_FILE_SIZE_MIN (4ULL*1024ULL*1024ULL) /* 4 MiB */
babfc091
LP
48
49/* These are the lower and upper bounds if we deduce the max_use value
50 * from the file system size */
51#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
52#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
53
54/* This is the upper bound if we deduce max_size from max_use */
71100051 55#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
56
57/* This is the upper bound if we deduce the keep_free value from the
58 * file system size */
59#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
60
61/* This is the keep_free value when we can't determine the system
62 * size */
63#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
64
dca6219e
LP
65/* n_data was the first entry we added after the initial file format design */
66#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
cec736d2 67
a4bcff5b
LP
68/* How many entries to keep in the entry array chain cache at max */
69#define CHAIN_CACHE_MAX 20
70
a676e665
LP
71/* How much to increase the journal file size at once each time we allocate something new. */
72#define FILE_SIZE_INCREASE (8ULL*1024ULL*1024ULL) /* 8MB */
73
9588bc32 74static int journal_file_set_online(JournalFile *f) {
26687bf8
OS
75 assert(f);
76
77 if (!f->writable)
78 return -EPERM;
79
80 if (!(f->fd >= 0 && f->header))
81 return -EINVAL;
82
83 switch(f->header->state) {
84 case STATE_ONLINE:
85 return 0;
86
87 case STATE_OFFLINE:
88 f->header->state = STATE_ONLINE;
89 fsync(f->fd);
90 return 0;
91
92 default:
93 return -EINVAL;
94 }
95}
96
97int journal_file_set_offline(JournalFile *f) {
98 assert(f);
99
100 if (!f->writable)
101 return -EPERM;
102
103 if (!(f->fd >= 0 && f->header))
104 return -EINVAL;
105
106 if (f->header->state != STATE_ONLINE)
107 return 0;
108
109 fsync(f->fd);
110
111 f->header->state = STATE_OFFLINE;
112
113 fsync(f->fd);
114
115 return 0;
116}
117
cec736d2 118void journal_file_close(JournalFile *f) {
de190aef 119 assert(f);
cec736d2 120
feb12d3e 121#ifdef HAVE_GCRYPT
b0af6f41 122 /* Write the final tag */
c586dbf1 123 if (f->seal && f->writable)
b0af6f41 124 journal_file_append_tag(f);
feb12d3e 125#endif
b0af6f41 126
7560fffc 127 /* Sync everything to disk, before we mark the file offline */
16e9f408
LP
128 if (f->mmap && f->fd >= 0)
129 mmap_cache_close_fd(f->mmap, f->fd);
7560fffc 130
26687bf8 131 journal_file_set_offline(f);
cec736d2 132
26687bf8 133 if (f->header)
d384c7a8 134 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
cec736d2 135
0ac38b70
LP
136 if (f->fd >= 0)
137 close_nointr_nofail(f->fd);
138
cec736d2 139 free(f->path);
807e17f0 140
16e9f408
LP
141 if (f->mmap)
142 mmap_cache_unref(f->mmap);
143
a4bcff5b
LP
144 hashmap_free_free(f->chain_cache);
145
807e17f0
LP
146#ifdef HAVE_XZ
147 free(f->compress_buffer);
148#endif
149
7560fffc 150#ifdef HAVE_GCRYPT
baed47c3
LP
151 if (f->fss_file)
152 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
b7c9ae91
LP
153 else if (f->fsprg_state)
154 free(f->fsprg_state);
155
156 free(f->fsprg_seed);
7560fffc
LP
157
158 if (f->hmac)
159 gcry_md_close(f->hmac);
160#endif
161
cec736d2
LP
162 free(f);
163}
164
0ac38b70 165static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
166 Header h;
167 ssize_t k;
168 int r;
169
170 assert(f);
171
172 zero(h);
7560fffc 173 memcpy(h.signature, HEADER_SIGNATURE, 8);
23b0b2b2 174 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2 175
7560fffc
LP
176 h.incompatible_flags =
177 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
178
179 h.compatible_flags =
baed47c3 180 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
7560fffc 181
cec736d2
LP
182 r = sd_id128_randomize(&h.file_id);
183 if (r < 0)
184 return r;
185
0ac38b70
LP
186 if (template) {
187 h.seqnum_id = template->header->seqnum_id;
beec0085 188 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
0ac38b70
LP
189 } else
190 h.seqnum_id = h.file_id;
cec736d2
LP
191
192 k = pwrite(f->fd, &h, sizeof(h), 0);
193 if (k < 0)
194 return -errno;
195
196 if (k != sizeof(h))
197 return -EIO;
198
199 return 0;
200}
201
202static int journal_file_refresh_header(JournalFile *f) {
203 int r;
de190aef 204 sd_id128_t boot_id;
cec736d2
LP
205
206 assert(f);
207
208 r = sd_id128_get_machine(&f->header->machine_id);
209 if (r < 0)
210 return r;
211
de190aef 212 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
213 if (r < 0)
214 return r;
215
de190aef
LP
216 if (sd_id128_equal(boot_id, f->header->boot_id))
217 f->tail_entry_monotonic_valid = true;
218
219 f->header->boot_id = boot_id;
220
26687bf8 221 journal_file_set_online(f);
b788cc23 222
7560fffc 223 /* Sync the online state to disk */
a676e665 224 fsync(f->fd);
b788cc23 225
cec736d2
LP
226 return 0;
227}
228
229static int journal_file_verify_header(JournalFile *f) {
230 assert(f);
231
7560fffc 232 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
cec736d2
LP
233 return -EBADMSG;
234
7560fffc
LP
235 /* In both read and write mode we refuse to open files with
236 * incompatible flags we don't know */
807e17f0 237#ifdef HAVE_XZ
7560fffc 238 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
807e17f0
LP
239 return -EPROTONOSUPPORT;
240#else
cec736d2
LP
241 if (f->header->incompatible_flags != 0)
242 return -EPROTONOSUPPORT;
807e17f0 243#endif
cec736d2 244
7560fffc
LP
245 /* When open for writing we refuse to open files with
246 * compatible flags, too */
247 if (f->writable) {
248#ifdef HAVE_GCRYPT
baed47c3 249 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
7560fffc
LP
250 return -EPROTONOSUPPORT;
251#else
252 if (f->header->compatible_flags != 0)
253 return -EPROTONOSUPPORT;
254#endif
255 }
256
db11ac1a
LP
257 if (f->header->state >= _STATE_MAX)
258 return -EBADMSG;
259
dca6219e
LP
260 /* The first addition was n_data, so check that we are at least this large */
261 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
23b0b2b2
LP
262 return -EBADMSG;
263
8088cbd3 264 if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
beec0085
LP
265 return -EBADMSG;
266
db11ac1a
LP
267 if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
268 return -ENODATA;
269
270 if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
271 return -ENODATA;
272
7762e02b
LP
273 if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
274 !VALID64(le64toh(f->header->field_hash_table_offset)) ||
275 !VALID64(le64toh(f->header->tail_object_offset)) ||
276 !VALID64(le64toh(f->header->entry_array_offset)))
277 return -ENODATA;
278
279 if (le64toh(f->header->data_hash_table_offset) < le64toh(f->header->header_size) ||
280 le64toh(f->header->field_hash_table_offset) < le64toh(f->header->header_size) ||
281 le64toh(f->header->tail_object_offset) < le64toh(f->header->header_size) ||
282 le64toh(f->header->entry_array_offset) < le64toh(f->header->header_size))
cec736d2
LP
283 return -ENODATA;
284
285 if (f->writable) {
ccdbaf91 286 uint8_t state;
cec736d2
LP
287 sd_id128_t machine_id;
288 int r;
289
290 r = sd_id128_get_machine(&machine_id);
291 if (r < 0)
292 return r;
293
294 if (!sd_id128_equal(machine_id, f->header->machine_id))
295 return -EHOSTDOWN;
296
de190aef 297 state = f->header->state;
cec736d2 298
71fa6f00
LP
299 if (state == STATE_ONLINE) {
300 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
301 return -EBUSY;
302 } else if (state == STATE_ARCHIVED)
cec736d2 303 return -ESHUTDOWN;
71fa6f00
LP
304 else if (state != STATE_OFFLINE) {
305 log_debug("Journal file %s has unknown state %u.", f->path, state);
306 return -EBUSY;
307 }
cec736d2
LP
308 }
309
8088cbd3 310 f->compress = JOURNAL_HEADER_COMPRESSED(f->header);
c586dbf1 311
f1889c91 312 f->seal = JOURNAL_HEADER_SEALED(f->header);
7560fffc 313
cec736d2
LP
314 return 0;
315}
316
317static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
eda4b58b 318 uint64_t old_size, new_size;
fec2aa2f 319 int r;
cec736d2
LP
320
321 assert(f);
322
cec736d2 323 /* We assume that this file is not sparse, and we know that
38ac38b2 324 * for sure, since we always call posix_fallocate()
cec736d2
LP
325 * ourselves */
326
327 old_size =
23b0b2b2 328 le64toh(f->header->header_size) +
cec736d2
LP
329 le64toh(f->header->arena_size);
330
bc85bfee 331 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
332 if (new_size < le64toh(f->header->header_size))
333 new_size = le64toh(f->header->header_size);
bc85bfee
LP
334
335 if (new_size <= old_size)
cec736d2
LP
336 return 0;
337
a676e665 338 if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
bc85bfee 339 return -E2BIG;
cec736d2 340
a676e665 341 if (new_size > f->metrics.min_size && f->metrics.keep_free > 0) {
cec736d2
LP
342 struct statvfs svfs;
343
344 if (fstatvfs(f->fd, &svfs) >= 0) {
345 uint64_t available;
346
347 available = svfs.f_bfree * svfs.f_bsize;
348
bc85bfee
LP
349 if (available >= f->metrics.keep_free)
350 available -= f->metrics.keep_free;
cec736d2
LP
351 else
352 available = 0;
353
354 if (new_size - old_size > available)
355 return -E2BIG;
356 }
357 }
358
eda4b58b
LP
359 /* Increase by larger blocks at once */
360 new_size = ((new_size+FILE_SIZE_INCREASE-1) / FILE_SIZE_INCREASE) * FILE_SIZE_INCREASE;
361 if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
362 new_size = f->metrics.max_size;
363
bc85bfee
LP
364 /* Note that the glibc fallocate() fallback is very
365 inefficient, hence we try to minimize the allocation area
366 as we can. */
fec2aa2f
GV
367 r = posix_fallocate(f->fd, old_size, new_size - old_size);
368 if (r != 0)
369 return -r;
cec736d2 370
eda4b58b
LP
371 if (fstat(f->fd, &f->last_stat) < 0)
372 return -errno;
cec736d2 373
23b0b2b2 374 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
375
376 return 0;
377}
378
fcde2389 379static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
cec736d2 380 assert(f);
cec736d2
LP
381 assert(ret);
382
7762e02b
LP
383 if (size <= 0)
384 return -EINVAL;
385
2a59ea54 386 /* Avoid SIGBUS on invalid accesses */
4bbdcdb3
LP
387 if (offset + size > (uint64_t) f->last_stat.st_size) {
388 /* Hmm, out of range? Let's refresh the fstat() data
389 * first, before we trust that check. */
390
391 if (fstat(f->fd, &f->last_stat) < 0 ||
392 offset + size > (uint64_t) f->last_stat.st_size)
393 return -EADDRNOTAVAIL;
394 }
395
fcde2389 396 return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
cec736d2
LP
397}
398
16e9f408
LP
399static uint64_t minimum_header_size(Object *o) {
400
b8e891e6 401 static const uint64_t table[] = {
16e9f408
LP
402 [OBJECT_DATA] = sizeof(DataObject),
403 [OBJECT_FIELD] = sizeof(FieldObject),
404 [OBJECT_ENTRY] = sizeof(EntryObject),
405 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
406 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
407 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
408 [OBJECT_TAG] = sizeof(TagObject),
409 };
410
411 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
412 return sizeof(ObjectHeader);
413
414 return table[o->object.type];
415}
416
de190aef 417int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
418 int r;
419 void *t;
420 Object *o;
421 uint64_t s;
422
423 assert(f);
424 assert(ret);
425
db11ac1a
LP
426 /* Objects may only be located at multiple of 64 bit */
427 if (!VALID64(offset))
428 return -EFAULT;
429
16e9f408 430
ae97089d 431 r = journal_file_move_to(f, type_to_context(type), false, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
432 if (r < 0)
433 return r;
434
435 o = (Object*) t;
436 s = le64toh(o->object.size);
437
438 if (s < sizeof(ObjectHeader))
439 return -EBADMSG;
440
16e9f408
LP
441 if (o->object.type <= OBJECT_UNUSED)
442 return -EBADMSG;
443
444 if (s < minimum_header_size(o))
445 return -EBADMSG;
446
3c1668da 447 if (type > 0 && o->object.type != type)
cec736d2
LP
448 return -EBADMSG;
449
450 if (s > sizeof(ObjectHeader)) {
fcde2389 451 r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
cec736d2
LP
452 if (r < 0)
453 return r;
454
455 o = (Object*) t;
456 }
457
cec736d2
LP
458 *ret = o;
459 return 0;
460}
461
d98cc1f2 462static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
463 uint64_t r;
464
465 assert(f);
466
beec0085 467 r = le64toh(f->header->tail_entry_seqnum) + 1;
c2373f84
LP
468
469 if (seqnum) {
de190aef 470 /* If an external seqnum counter was passed, we update
c2373f84
LP
471 * both the local and the external one, and set it to
472 * the maximum of both */
473
474 if (*seqnum + 1 > r)
475 r = *seqnum + 1;
476
477 *seqnum = r;
478 }
479
beec0085 480 f->header->tail_entry_seqnum = htole64(r);
cec736d2 481
beec0085
LP
482 if (f->header->head_entry_seqnum == 0)
483 f->header->head_entry_seqnum = htole64(r);
de190aef 484
cec736d2
LP
485 return r;
486}
487
0284adc6 488int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
489 int r;
490 uint64_t p;
491 Object *tail, *o;
492 void *t;
493
494 assert(f);
16e9f408 495 assert(type > 0 && type < _OBJECT_TYPE_MAX);
cec736d2
LP
496 assert(size >= sizeof(ObjectHeader));
497 assert(offset);
498 assert(ret);
499
26687bf8
OS
500 r = journal_file_set_online(f);
501 if (r < 0)
502 return r;
503
cec736d2 504 p = le64toh(f->header->tail_object_offset);
cec736d2 505 if (p == 0)
23b0b2b2 506 p = le64toh(f->header->header_size);
cec736d2 507 else {
de190aef 508 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
509 if (r < 0)
510 return r;
511
512 p += ALIGN64(le64toh(tail->object.size));
513 }
514
515 r = journal_file_allocate(f, p, size);
516 if (r < 0)
517 return r;
518
fcde2389 519 r = journal_file_move_to(f, type, false, p, size, &t);
cec736d2
LP
520 if (r < 0)
521 return r;
522
523 o = (Object*) t;
524
525 zero(o->object);
de190aef 526 o->object.type = type;
cec736d2
LP
527 o->object.size = htole64(size);
528
529 f->header->tail_object_offset = htole64(p);
cec736d2
LP
530 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
531
532 *ret = o;
533 *offset = p;
534
535 return 0;
536}
537
de190aef 538static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
539 uint64_t s, p;
540 Object *o;
541 int r;
542
543 assert(f);
544
dfabe643 545 /* We estimate that we need 1 hash table entry per 768 of
4a92baf3
LP
546 journal file and we want to make sure we never get beyond
547 75% fill level. Calculate the hash table size for the
548 maximum file size based on these metrics. */
549
dfabe643 550 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
4a92baf3
LP
551 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
552 s = DEFAULT_DATA_HASH_TABLE_SIZE;
553
507f22bd 554 log_debug("Reserving %"PRIu64" entries in hash table.", s / sizeof(HashItem));
4a92baf3 555
de190aef
LP
556 r = journal_file_append_object(f,
557 OBJECT_DATA_HASH_TABLE,
558 offsetof(Object, hash_table.items) + s,
559 &o, &p);
cec736d2
LP
560 if (r < 0)
561 return r;
562
de190aef 563 memset(o->hash_table.items, 0, s);
cec736d2 564
de190aef
LP
565 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
566 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
567
568 return 0;
569}
570
de190aef 571static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
572 uint64_t s, p;
573 Object *o;
574 int r;
575
576 assert(f);
577
3c1668da
LP
578 /* We use a fixed size hash table for the fields as this
579 * number should grow very slowly only */
580
de190aef
LP
581 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
582 r = journal_file_append_object(f,
583 OBJECT_FIELD_HASH_TABLE,
584 offsetof(Object, hash_table.items) + s,
585 &o, &p);
cec736d2
LP
586 if (r < 0)
587 return r;
588
de190aef 589 memset(o->hash_table.items, 0, s);
cec736d2 590
de190aef
LP
591 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
592 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
593
594 return 0;
595}
596
de190aef 597static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
598 uint64_t s, p;
599 void *t;
600 int r;
601
602 assert(f);
603
de190aef
LP
604 p = le64toh(f->header->data_hash_table_offset);
605 s = le64toh(f->header->data_hash_table_size);
cec736d2 606
de190aef 607 r = journal_file_move_to(f,
16e9f408 608 OBJECT_DATA_HASH_TABLE,
fcde2389 609 true,
de190aef
LP
610 p, s,
611 &t);
cec736d2
LP
612 if (r < 0)
613 return r;
614
de190aef 615 f->data_hash_table = t;
cec736d2
LP
616 return 0;
617}
618
de190aef 619static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
620 uint64_t s, p;
621 void *t;
622 int r;
623
624 assert(f);
625
de190aef
LP
626 p = le64toh(f->header->field_hash_table_offset);
627 s = le64toh(f->header->field_hash_table_size);
cec736d2 628
de190aef 629 r = journal_file_move_to(f,
16e9f408 630 OBJECT_FIELD_HASH_TABLE,
fcde2389 631 true,
de190aef
LP
632 p, s,
633 &t);
cec736d2
LP
634 if (r < 0)
635 return r;
636
de190aef 637 f->field_hash_table = t;
cec736d2
LP
638 return 0;
639}
640
3c1668da
LP
641static int journal_file_link_field(
642 JournalFile *f,
643 Object *o,
644 uint64_t offset,
645 uint64_t hash) {
646
647 uint64_t p, h;
648 int r;
649
650 assert(f);
651 assert(o);
652 assert(offset > 0);
653
654 if (o->object.type != OBJECT_FIELD)
655 return -EINVAL;
656
657 /* This might alter the window we are looking at */
658
659 o->field.next_hash_offset = o->field.head_data_offset = 0;
660
661 h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
662 p = le64toh(f->field_hash_table[h].tail_hash_offset);
663 if (p == 0)
664 f->field_hash_table[h].head_hash_offset = htole64(offset);
665 else {
666 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
667 if (r < 0)
668 return r;
669
670 o->field.next_hash_offset = htole64(offset);
671 }
672
673 f->field_hash_table[h].tail_hash_offset = htole64(offset);
674
675 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
676 f->header->n_fields = htole64(le64toh(f->header->n_fields) + 1);
677
678 return 0;
679}
680
681static int journal_file_link_data(
682 JournalFile *f,
683 Object *o,
684 uint64_t offset,
685 uint64_t hash) {
686
de190aef 687 uint64_t p, h;
cec736d2
LP
688 int r;
689
690 assert(f);
691 assert(o);
692 assert(offset > 0);
b588975f
LP
693
694 if (o->object.type != OBJECT_DATA)
695 return -EINVAL;
cec736d2 696
48496df6
LP
697 /* This might alter the window we are looking at */
698
de190aef
LP
699 o->data.next_hash_offset = o->data.next_field_offset = 0;
700 o->data.entry_offset = o->data.entry_array_offset = 0;
701 o->data.n_entries = 0;
cec736d2 702
de190aef 703 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
8db4213e 704 p = le64toh(f->data_hash_table[h].tail_hash_offset);
3c1668da 705 if (p == 0)
cec736d2 706 /* Only entry in the hash table is easy */
de190aef 707 f->data_hash_table[h].head_hash_offset = htole64(offset);
3c1668da 708 else {
48496df6
LP
709 /* Move back to the previous data object, to patch in
710 * pointer */
cec736d2 711
de190aef 712 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
713 if (r < 0)
714 return r;
715
de190aef 716 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
717 }
718
de190aef 719 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2 720
dca6219e
LP
721 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
722 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
723
cec736d2
LP
724 return 0;
725}
726
3c1668da
LP
727int journal_file_find_field_object_with_hash(
728 JournalFile *f,
729 const void *field, uint64_t size, uint64_t hash,
730 Object **ret, uint64_t *offset) {
731
732 uint64_t p, osize, h;
733 int r;
734
735 assert(f);
736 assert(field && size > 0);
737
738 osize = offsetof(Object, field.payload) + size;
739
740 if (f->header->field_hash_table_size == 0)
741 return -EBADMSG;
742
743 h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
744 p = le64toh(f->field_hash_table[h].head_hash_offset);
745
746 while (p > 0) {
747 Object *o;
748
749 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
750 if (r < 0)
751 return r;
752
753 if (le64toh(o->field.hash) == hash &&
754 le64toh(o->object.size) == osize &&
755 memcmp(o->field.payload, field, size) == 0) {
756
757 if (ret)
758 *ret = o;
759 if (offset)
760 *offset = p;
761
762 return 1;
763 }
764
765 p = le64toh(o->field.next_hash_offset);
766 }
767
768 return 0;
769}
770
771int journal_file_find_field_object(
772 JournalFile *f,
773 const void *field, uint64_t size,
774 Object **ret, uint64_t *offset) {
775
776 uint64_t hash;
777
778 assert(f);
779 assert(field && size > 0);
780
781 hash = hash64(field, size);
782
783 return journal_file_find_field_object_with_hash(f,
784 field, size, hash,
785 ret, offset);
786}
787
de190aef
LP
788int journal_file_find_data_object_with_hash(
789 JournalFile *f,
790 const void *data, uint64_t size, uint64_t hash,
791 Object **ret, uint64_t *offset) {
48496df6 792
de190aef 793 uint64_t p, osize, h;
cec736d2
LP
794 int r;
795
796 assert(f);
797 assert(data || size == 0);
798
799 osize = offsetof(Object, data.payload) + size;
800
bc85bfee
LP
801 if (f->header->data_hash_table_size == 0)
802 return -EBADMSG;
803
de190aef
LP
804 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
805 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 806
de190aef
LP
807 while (p > 0) {
808 Object *o;
cec736d2 809
de190aef 810 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
811 if (r < 0)
812 return r;
813
807e17f0 814 if (le64toh(o->data.hash) != hash)
85a131e8 815 goto next;
807e17f0
LP
816
817 if (o->object.flags & OBJECT_COMPRESSED) {
818#ifdef HAVE_XZ
b785c858 819 uint64_t l, rsize;
cec736d2 820
807e17f0
LP
821 l = le64toh(o->object.size);
822 if (l <= offsetof(Object, data.payload))
cec736d2
LP
823 return -EBADMSG;
824
807e17f0
LP
825 l -= offsetof(Object, data.payload);
826
93b73b06 827 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize, 0))
807e17f0
LP
828 return -EBADMSG;
829
b785c858 830 if (rsize == size &&
807e17f0
LP
831 memcmp(f->compress_buffer, data, size) == 0) {
832
833 if (ret)
834 *ret = o;
835
836 if (offset)
837 *offset = p;
838
839 return 1;
840 }
841#else
842 return -EPROTONOSUPPORT;
843#endif
844
845 } else if (le64toh(o->object.size) == osize &&
846 memcmp(o->data.payload, data, size) == 0) {
847
cec736d2
LP
848 if (ret)
849 *ret = o;
850
851 if (offset)
852 *offset = p;
853
de190aef 854 return 1;
cec736d2
LP
855 }
856
85a131e8 857 next:
cec736d2
LP
858 p = le64toh(o->data.next_hash_offset);
859 }
860
de190aef
LP
861 return 0;
862}
863
864int journal_file_find_data_object(
865 JournalFile *f,
866 const void *data, uint64_t size,
867 Object **ret, uint64_t *offset) {
868
869 uint64_t hash;
870
871 assert(f);
872 assert(data || size == 0);
873
874 hash = hash64(data, size);
875
876 return journal_file_find_data_object_with_hash(f,
877 data, size, hash,
878 ret, offset);
879}
880
3c1668da
LP
881static int journal_file_append_field(
882 JournalFile *f,
883 const void *field, uint64_t size,
884 Object **ret, uint64_t *offset) {
885
886 uint64_t hash, p;
887 uint64_t osize;
888 Object *o;
889 int r;
890
891 assert(f);
892 assert(field && size > 0);
893
894 hash = hash64(field, size);
895
896 r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
897 if (r < 0)
898 return r;
899 else if (r > 0) {
900
901 if (ret)
902 *ret = o;
903
904 if (offset)
905 *offset = p;
906
907 return 0;
908 }
909
910 osize = offsetof(Object, field.payload) + size;
911 r = journal_file_append_object(f, OBJECT_FIELD, osize, &o, &p);
8c92d4bb
LP
912 if (r < 0)
913 return r;
3c1668da
LP
914
915 o->field.hash = htole64(hash);
916 memcpy(o->field.payload, field, size);
917
918 r = journal_file_link_field(f, o, p, hash);
919 if (r < 0)
920 return r;
921
922 /* The linking might have altered the window, so let's
923 * refresh our pointer */
924 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
925 if (r < 0)
926 return r;
927
928#ifdef HAVE_GCRYPT
929 r = journal_file_hmac_put_object(f, OBJECT_FIELD, o, p);
930 if (r < 0)
931 return r;
932#endif
933
934 if (ret)
935 *ret = o;
936
937 if (offset)
938 *offset = p;
939
940 return 0;
941}
942
48496df6
LP
943static int journal_file_append_data(
944 JournalFile *f,
945 const void *data, uint64_t size,
946 Object **ret, uint64_t *offset) {
947
de190aef
LP
948 uint64_t hash, p;
949 uint64_t osize;
950 Object *o;
951 int r;
807e17f0 952 bool compressed = false;
3c1668da 953 const void *eq;
de190aef
LP
954
955 assert(f);
956 assert(data || size == 0);
957
958 hash = hash64(data, size);
959
960 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
961 if (r < 0)
962 return r;
963 else if (r > 0) {
964
965 if (ret)
966 *ret = o;
967
968 if (offset)
969 *offset = p;
970
971 return 0;
972 }
973
974 osize = offsetof(Object, data.payload) + size;
975 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
976 if (r < 0)
977 return r;
978
cec736d2 979 o->data.hash = htole64(hash);
807e17f0
LP
980
981#ifdef HAVE_XZ
982 if (f->compress &&
983 size >= COMPRESSION_SIZE_THRESHOLD) {
984 uint64_t rsize;
985
986 compressed = compress_blob(data, size, o->data.payload, &rsize);
987
988 if (compressed) {
989 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
990 o->object.flags |= OBJECT_COMPRESSED;
991
507f22bd 992 log_debug("Compressed data object %"PRIu64" -> %"PRIu64, size, rsize);
807e17f0
LP
993 }
994 }
995#endif
996
64825d3c 997 if (!compressed && size > 0)
807e17f0 998 memcpy(o->data.payload, data, size);
cec736d2 999
de190aef 1000 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
1001 if (r < 0)
1002 return r;
1003
48496df6
LP
1004 /* The linking might have altered the window, so let's
1005 * refresh our pointer */
1006 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1007 if (r < 0)
1008 return r;
1009
08c6f819
SL
1010 if (!data)
1011 eq = NULL;
1012 else
1013 eq = memchr(data, '=', size);
3c1668da
LP
1014 if (eq && eq > data) {
1015 uint64_t fp;
1016 Object *fo;
1017
1018 /* Create field object ... */
1019 r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp);
1020 if (r < 0)
1021 return r;
1022
1023 /* ... and link it in. */
1024 o->data.next_field_offset = fo->field.head_data_offset;
1025 fo->field.head_data_offset = le64toh(p);
1026 }
1027
5996c7c2
LP
1028#ifdef HAVE_GCRYPT
1029 r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
1030 if (r < 0)
1031 return r;
1032#endif
1033
cec736d2
LP
1034 if (ret)
1035 *ret = o;
1036
1037 if (offset)
de190aef 1038 *offset = p;
cec736d2
LP
1039
1040 return 0;
1041}
1042
1043uint64_t journal_file_entry_n_items(Object *o) {
1044 assert(o);
b588975f
LP
1045
1046 if (o->object.type != OBJECT_ENTRY)
1047 return 0;
cec736d2
LP
1048
1049 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
1050}
1051
0284adc6 1052uint64_t journal_file_entry_array_n_items(Object *o) {
de190aef 1053 assert(o);
b588975f
LP
1054
1055 if (o->object.type != OBJECT_ENTRY_ARRAY)
1056 return 0;
de190aef
LP
1057
1058 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
1059}
1060
fb9a24b6
LP
1061uint64_t journal_file_hash_table_n_items(Object *o) {
1062 assert(o);
b588975f
LP
1063
1064 if (o->object.type != OBJECT_DATA_HASH_TABLE &&
1065 o->object.type != OBJECT_FIELD_HASH_TABLE)
1066 return 0;
fb9a24b6
LP
1067
1068 return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
1069}
1070
de190aef 1071static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
1072 le64_t *first,
1073 le64_t *idx,
de190aef 1074 uint64_t p) {
cec736d2 1075 int r;
de190aef
LP
1076 uint64_t n = 0, ap = 0, q, i, a, hidx;
1077 Object *o;
1078
cec736d2 1079 assert(f);
de190aef
LP
1080 assert(first);
1081 assert(idx);
1082 assert(p > 0);
cec736d2 1083
de190aef
LP
1084 a = le64toh(*first);
1085 i = hidx = le64toh(*idx);
1086 while (a > 0) {
1087
1088 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1089 if (r < 0)
1090 return r;
cec736d2 1091
de190aef
LP
1092 n = journal_file_entry_array_n_items(o);
1093 if (i < n) {
1094 o->entry_array.items[i] = htole64(p);
1095 *idx = htole64(hidx + 1);
1096 return 0;
1097 }
cec736d2 1098
de190aef
LP
1099 i -= n;
1100 ap = a;
1101 a = le64toh(o->entry_array.next_entry_array_offset);
1102 }
1103
1104 if (hidx > n)
1105 n = (hidx+1) * 2;
1106 else
1107 n = n * 2;
1108
1109 if (n < 4)
1110 n = 4;
1111
1112 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
1113 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
1114 &o, &q);
cec736d2
LP
1115 if (r < 0)
1116 return r;
1117
feb12d3e 1118#ifdef HAVE_GCRYPT
5996c7c2 1119 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
b0af6f41
LP
1120 if (r < 0)
1121 return r;
feb12d3e 1122#endif
b0af6f41 1123
de190aef 1124 o->entry_array.items[i] = htole64(p);
cec736d2 1125
de190aef 1126 if (ap == 0)
7be3aa17 1127 *first = htole64(q);
cec736d2 1128 else {
de190aef 1129 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
1130 if (r < 0)
1131 return r;
1132
de190aef
LP
1133 o->entry_array.next_entry_array_offset = htole64(q);
1134 }
cec736d2 1135
2dee23eb
LP
1136 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1137 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
1138
de190aef
LP
1139 *idx = htole64(hidx + 1);
1140
1141 return 0;
1142}
cec736d2 1143
de190aef 1144static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
1145 le64_t *extra,
1146 le64_t *first,
1147 le64_t *idx,
de190aef
LP
1148 uint64_t p) {
1149
1150 int r;
1151
1152 assert(f);
1153 assert(extra);
1154 assert(first);
1155 assert(idx);
1156 assert(p > 0);
1157
1158 if (*idx == 0)
1159 *extra = htole64(p);
1160 else {
4fd052ae 1161 le64_t i;
de190aef 1162
7be3aa17 1163 i = htole64(le64toh(*idx) - 1);
de190aef
LP
1164 r = link_entry_into_array(f, first, &i, p);
1165 if (r < 0)
1166 return r;
cec736d2
LP
1167 }
1168
de190aef
LP
1169 *idx = htole64(le64toh(*idx) + 1);
1170 return 0;
1171}
1172
1173static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
1174 uint64_t p;
1175 int r;
1176 assert(f);
1177 assert(o);
1178 assert(offset > 0);
1179
1180 p = le64toh(o->entry.items[i].object_offset);
1181 if (p == 0)
1182 return -EINVAL;
1183
1184 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
1185 if (r < 0)
1186 return r;
1187
de190aef
LP
1188 return link_entry_into_array_plus_one(f,
1189 &o->data.entry_offset,
1190 &o->data.entry_array_offset,
1191 &o->data.n_entries,
1192 offset);
cec736d2
LP
1193}
1194
1195static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 1196 uint64_t n, i;
cec736d2
LP
1197 int r;
1198
1199 assert(f);
1200 assert(o);
1201 assert(offset > 0);
b588975f
LP
1202
1203 if (o->object.type != OBJECT_ENTRY)
1204 return -EINVAL;
cec736d2 1205
b788cc23
LP
1206 __sync_synchronize();
1207
cec736d2 1208 /* Link up the entry itself */
de190aef
LP
1209 r = link_entry_into_array(f,
1210 &f->header->entry_array_offset,
1211 &f->header->n_entries,
1212 offset);
1213 if (r < 0)
1214 return r;
cec736d2 1215
507f22bd 1216 /* log_debug("=> %s seqnr=%"PRIu64" n_entries=%"PRIu64, f->path, o->entry.seqnum, f->header->n_entries); */
cec736d2 1217
de190aef 1218 if (f->header->head_entry_realtime == 0)
0ac38b70 1219 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 1220
0ac38b70 1221 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
1222 f->header->tail_entry_monotonic = o->entry.monotonic;
1223
1224 f->tail_entry_monotonic_valid = true;
cec736d2
LP
1225
1226 /* Link up the items */
1227 n = journal_file_entry_n_items(o);
1228 for (i = 0; i < n; i++) {
1229 r = journal_file_link_entry_item(f, o, offset, i);
1230 if (r < 0)
1231 return r;
1232 }
1233
cec736d2
LP
1234 return 0;
1235}
1236
1237static int journal_file_append_entry_internal(
1238 JournalFile *f,
1239 const dual_timestamp *ts,
1240 uint64_t xor_hash,
1241 const EntryItem items[], unsigned n_items,
de190aef 1242 uint64_t *seqnum,
cec736d2
LP
1243 Object **ret, uint64_t *offset) {
1244 uint64_t np;
1245 uint64_t osize;
1246 Object *o;
1247 int r;
1248
1249 assert(f);
1250 assert(items || n_items == 0);
de190aef 1251 assert(ts);
cec736d2
LP
1252
1253 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1254
de190aef 1255 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
1256 if (r < 0)
1257 return r;
1258
d98cc1f2 1259 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
cec736d2 1260 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
1261 o->entry.realtime = htole64(ts->realtime);
1262 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
1263 o->entry.xor_hash = htole64(xor_hash);
1264 o->entry.boot_id = f->header->boot_id;
1265
feb12d3e 1266#ifdef HAVE_GCRYPT
5996c7c2 1267 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
b0af6f41
LP
1268 if (r < 0)
1269 return r;
feb12d3e 1270#endif
b0af6f41 1271
cec736d2
LP
1272 r = journal_file_link_entry(f, o, np);
1273 if (r < 0)
1274 return r;
1275
1276 if (ret)
1277 *ret = o;
1278
1279 if (offset)
1280 *offset = np;
1281
1282 return 0;
1283}
1284
cf244689 1285void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
1286 assert(f);
1287
1288 /* inotify() does not receive IN_MODIFY events from file
1289 * accesses done via mmap(). After each access we hence
1290 * trigger IN_MODIFY by truncating the journal file to its
1291 * current size which triggers IN_MODIFY. */
1292
bc85bfee
LP
1293 __sync_synchronize();
1294
50f20cfd 1295 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
c5315881 1296 log_error("Failed to truncate file to its own size: %m");
50f20cfd
LP
1297}
1298
1f2da9ec
LP
1299static int entry_item_cmp(const void *_a, const void *_b) {
1300 const EntryItem *a = _a, *b = _b;
1301
1302 if (le64toh(a->object_offset) < le64toh(b->object_offset))
1303 return -1;
1304 if (le64toh(a->object_offset) > le64toh(b->object_offset))
1305 return 1;
1306 return 0;
1307}
1308
de190aef 1309int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1310 unsigned i;
1311 EntryItem *items;
1312 int r;
1313 uint64_t xor_hash = 0;
de190aef 1314 struct dual_timestamp _ts;
cec736d2
LP
1315
1316 assert(f);
1317 assert(iovec || n_iovec == 0);
1318
de190aef
LP
1319 if (!ts) {
1320 dual_timestamp_get(&_ts);
1321 ts = &_ts;
1322 }
1323
1324 if (f->tail_entry_monotonic_valid &&
1325 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1326 return -EINVAL;
1327
feb12d3e 1328#ifdef HAVE_GCRYPT
7560fffc
LP
1329 r = journal_file_maybe_append_tag(f, ts->realtime);
1330 if (r < 0)
1331 return r;
feb12d3e 1332#endif
7560fffc 1333
64825d3c 1334 /* alloca() can't take 0, hence let's allocate at least one */
9607d947 1335 items = alloca(sizeof(EntryItem) * MAX(1u, n_iovec));
cec736d2
LP
1336
1337 for (i = 0; i < n_iovec; i++) {
1338 uint64_t p;
1339 Object *o;
1340
1341 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1342 if (r < 0)
cf244689 1343 return r;
cec736d2
LP
1344
1345 xor_hash ^= le64toh(o->data.hash);
1346 items[i].object_offset = htole64(p);
de7b95cd 1347 items[i].hash = o->data.hash;
cec736d2
LP
1348 }
1349
1f2da9ec
LP
1350 /* Order by the position on disk, in order to improve seek
1351 * times for rotating media. */
7ff7394d 1352 qsort_safe(items, n_iovec, sizeof(EntryItem), entry_item_cmp);
1f2da9ec 1353
de190aef 1354 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1355
50f20cfd
LP
1356 journal_file_post_change(f);
1357
cec736d2
LP
1358 return r;
1359}
1360
a4bcff5b
LP
1361typedef struct ChainCacheItem {
1362 uint64_t first; /* the array at the begin of the chain */
1363 uint64_t array; /* the cached array */
1364 uint64_t begin; /* the first item in the cached array */
1365 uint64_t total; /* the total number of items in all arrays before this one in the chain */
f268980d 1366 uint64_t last_index; /* the last index we looked at, to optimize locality when bisecting */
a4bcff5b
LP
1367} ChainCacheItem;
1368
1369static void chain_cache_put(
1370 Hashmap *h,
1371 ChainCacheItem *ci,
1372 uint64_t first,
1373 uint64_t array,
1374 uint64_t begin,
f268980d
LP
1375 uint64_t total,
1376 uint64_t last_index) {
a4bcff5b
LP
1377
1378 if (!ci) {
34741aa3
LP
1379 /* If the chain item to cache for this chain is the
1380 * first one it's not worth caching anything */
1381 if (array == first)
1382 return;
1383
a4bcff5b
LP
1384 if (hashmap_size(h) >= CHAIN_CACHE_MAX)
1385 ci = hashmap_steal_first(h);
1386 else {
1387 ci = new(ChainCacheItem, 1);
1388 if (!ci)
1389 return;
1390 }
1391
1392 ci->first = first;
1393
1394 if (hashmap_put(h, &ci->first, ci) < 0) {
1395 free(ci);
1396 return;
1397 }
1398 } else
1399 assert(ci->first == first);
1400
1401 ci->array = array;
1402 ci->begin = begin;
1403 ci->total = total;
f268980d 1404 ci->last_index = last_index;
a4bcff5b
LP
1405}
1406
f268980d
LP
1407static int generic_array_get(
1408 JournalFile *f,
1409 uint64_t first,
1410 uint64_t i,
1411 Object **ret, uint64_t *offset) {
de190aef 1412
cec736d2 1413 Object *o;
a4bcff5b 1414 uint64_t p = 0, a, t = 0;
cec736d2 1415 int r;
a4bcff5b 1416 ChainCacheItem *ci;
cec736d2
LP
1417
1418 assert(f);
1419
de190aef 1420 a = first;
a4bcff5b
LP
1421
1422 /* Try the chain cache first */
1423 ci = hashmap_get(f->chain_cache, &first);
1424 if (ci && i > ci->total) {
1425 a = ci->array;
1426 i -= ci->total;
1427 t = ci->total;
1428 }
1429
de190aef 1430 while (a > 0) {
a4bcff5b 1431 uint64_t k;
cec736d2 1432
de190aef
LP
1433 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1434 if (r < 0)
1435 return r;
cec736d2 1436
a4bcff5b
LP
1437 k = journal_file_entry_array_n_items(o);
1438 if (i < k) {
de190aef 1439 p = le64toh(o->entry_array.items[i]);
a4bcff5b 1440 goto found;
cec736d2
LP
1441 }
1442
a4bcff5b
LP
1443 i -= k;
1444 t += k;
de190aef
LP
1445 a = le64toh(o->entry_array.next_entry_array_offset);
1446 }
1447
a4bcff5b
LP
1448 return 0;
1449
1450found:
1451 /* Let's cache this item for the next invocation */
af13a6b0 1452 chain_cache_put(f->chain_cache, ci, first, a, le64toh(o->entry_array.items[0]), t, i);
de190aef
LP
1453
1454 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1455 if (r < 0)
1456 return r;
1457
1458 if (ret)
1459 *ret = o;
1460
1461 if (offset)
1462 *offset = p;
1463
1464 return 1;
1465}
1466
f268980d
LP
1467static int generic_array_get_plus_one(
1468 JournalFile *f,
1469 uint64_t extra,
1470 uint64_t first,
1471 uint64_t i,
1472 Object **ret, uint64_t *offset) {
de190aef
LP
1473
1474 Object *o;
1475
1476 assert(f);
1477
1478 if (i == 0) {
1479 int r;
1480
1481 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1482 if (r < 0)
1483 return r;
1484
de190aef
LP
1485 if (ret)
1486 *ret = o;
cec736d2 1487
de190aef
LP
1488 if (offset)
1489 *offset = extra;
cec736d2 1490
de190aef 1491 return 1;
cec736d2
LP
1492 }
1493
de190aef
LP
1494 return generic_array_get(f, first, i-1, ret, offset);
1495}
cec736d2 1496
de190aef
LP
1497enum {
1498 TEST_FOUND,
1499 TEST_LEFT,
1500 TEST_RIGHT
1501};
cec736d2 1502
f268980d
LP
1503static int generic_array_bisect(
1504 JournalFile *f,
1505 uint64_t first,
1506 uint64_t n,
1507 uint64_t needle,
1508 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1509 direction_t direction,
1510 Object **ret,
1511 uint64_t *offset,
1512 uint64_t *idx) {
1513
1514 uint64_t a, p, t = 0, i = 0, last_p = 0, last_index = (uint64_t) -1;
de190aef
LP
1515 bool subtract_one = false;
1516 Object *o, *array = NULL;
1517 int r;
a4bcff5b 1518 ChainCacheItem *ci;
cec736d2 1519
de190aef
LP
1520 assert(f);
1521 assert(test_object);
cec736d2 1522
a4bcff5b 1523 /* Start with the first array in the chain */
de190aef 1524 a = first;
a4bcff5b
LP
1525
1526 ci = hashmap_get(f->chain_cache, &first);
1527 if (ci && n > ci->total) {
1528 /* Ah, we have iterated this bisection array chain
1529 * previously! Let's see if we can skip ahead in the
1530 * chain, as far as the last time. But we can't jump
1531 * backwards in the chain, so let's check that
1532 * first. */
1533
1534 r = test_object(f, ci->begin, needle);
1535 if (r < 0)
1536 return r;
1537
1538 if (r == TEST_LEFT) {
f268980d 1539 /* OK, what we are looking for is right of the
a4bcff5b
LP
1540 * begin of this EntryArray, so let's jump
1541 * straight to previously cached array in the
1542 * chain */
1543
1544 a = ci->array;
1545 n -= ci->total;
1546 t = ci->total;
f268980d 1547 last_index = ci->last_index;
a4bcff5b
LP
1548 }
1549 }
1550
de190aef
LP
1551 while (a > 0) {
1552 uint64_t left, right, k, lp;
1553
1554 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1555 if (r < 0)
1556 return r;
1557
de190aef
LP
1558 k = journal_file_entry_array_n_items(array);
1559 right = MIN(k, n);
1560 if (right <= 0)
1561 return 0;
cec736d2 1562
de190aef
LP
1563 i = right - 1;
1564 lp = p = le64toh(array->entry_array.items[i]);
1565 if (p <= 0)
1566 return -EBADMSG;
cec736d2 1567
de190aef
LP
1568 r = test_object(f, p, needle);
1569 if (r < 0)
1570 return r;
cec736d2 1571
de190aef
LP
1572 if (r == TEST_FOUND)
1573 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1574
1575 if (r == TEST_RIGHT) {
1576 left = 0;
1577 right -= 1;
f268980d
LP
1578
1579 if (last_index != (uint64_t) -1) {
1580 assert(last_index <= right);
1581
1582 /* If we cached the last index we
1583 * looked at, let's try to not to jump
1584 * too wildly around and see if we can
1585 * limit the range to look at early to
1586 * the immediate neighbors of the last
1587 * index we looked at. */
1588
1589 if (last_index > 0) {
1590 uint64_t x = last_index - 1;
1591
1592 p = le64toh(array->entry_array.items[x]);
1593 if (p <= 0)
1594 return -EBADMSG;
1595
1596 r = test_object(f, p, needle);
1597 if (r < 0)
1598 return r;
1599
1600 if (r == TEST_FOUND)
1601 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1602
1603 if (r == TEST_RIGHT)
1604 right = x;
1605 else
1606 left = x + 1;
1607 }
1608
1609 if (last_index < right) {
1610 uint64_t y = last_index + 1;
1611
1612 p = le64toh(array->entry_array.items[y]);
1613 if (p <= 0)
1614 return -EBADMSG;
1615
1616 r = test_object(f, p, needle);
1617 if (r < 0)
1618 return r;
1619
1620 if (r == TEST_FOUND)
1621 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1622
1623 if (r == TEST_RIGHT)
1624 right = y;
1625 else
1626 left = y + 1;
1627 }
f268980d
LP
1628 }
1629
de190aef
LP
1630 for (;;) {
1631 if (left == right) {
1632 if (direction == DIRECTION_UP)
1633 subtract_one = true;
1634
1635 i = left;
1636 goto found;
1637 }
1638
1639 assert(left < right);
de190aef 1640 i = (left + right) / 2;
f268980d 1641
de190aef
LP
1642 p = le64toh(array->entry_array.items[i]);
1643 if (p <= 0)
1644 return -EBADMSG;
1645
1646 r = test_object(f, p, needle);
1647 if (r < 0)
1648 return r;
cec736d2 1649
de190aef
LP
1650 if (r == TEST_FOUND)
1651 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1652
1653 if (r == TEST_RIGHT)
1654 right = i;
1655 else
1656 left = i + 1;
1657 }
1658 }
1659
cbdca852
LP
1660 if (k > n) {
1661 if (direction == DIRECTION_UP) {
1662 i = n;
1663 subtract_one = true;
1664 goto found;
1665 }
1666
cec736d2 1667 return 0;
cbdca852 1668 }
cec736d2 1669
de190aef
LP
1670 last_p = lp;
1671
1672 n -= k;
1673 t += k;
f268980d 1674 last_index = (uint64_t) -1;
de190aef 1675 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1676 }
1677
1678 return 0;
de190aef
LP
1679
1680found:
1681 if (subtract_one && t == 0 && i == 0)
1682 return 0;
1683
a4bcff5b 1684 /* Let's cache this item for the next invocation */
af13a6b0 1685 chain_cache_put(f->chain_cache, ci, first, a, le64toh(array->entry_array.items[0]), t, subtract_one ? (i > 0 ? i-1 : (uint64_t) -1) : i);
a4bcff5b 1686
de190aef
LP
1687 if (subtract_one && i == 0)
1688 p = last_p;
1689 else if (subtract_one)
1690 p = le64toh(array->entry_array.items[i-1]);
1691 else
1692 p = le64toh(array->entry_array.items[i]);
1693
1694 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1695 if (r < 0)
1696 return r;
1697
1698 if (ret)
1699 *ret = o;
1700
1701 if (offset)
1702 *offset = p;
1703
1704 if (idx)
cbdca852 1705 *idx = t + i + (subtract_one ? -1 : 0);
de190aef
LP
1706
1707 return 1;
cec736d2
LP
1708}
1709
f268980d
LP
1710
1711static int generic_array_bisect_plus_one(
1712 JournalFile *f,
1713 uint64_t extra,
1714 uint64_t first,
1715 uint64_t n,
1716 uint64_t needle,
1717 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1718 direction_t direction,
1719 Object **ret,
1720 uint64_t *offset,
1721 uint64_t *idx) {
de190aef 1722
cec736d2 1723 int r;
cbdca852
LP
1724 bool step_back = false;
1725 Object *o;
cec736d2
LP
1726
1727 assert(f);
de190aef 1728 assert(test_object);
cec736d2 1729
de190aef
LP
1730 if (n <= 0)
1731 return 0;
cec736d2 1732
de190aef
LP
1733 /* This bisects the array in object 'first', but first checks
1734 * an extra */
de190aef
LP
1735 r = test_object(f, extra, needle);
1736 if (r < 0)
1737 return r;
a536e261
LP
1738
1739 if (r == TEST_FOUND)
1740 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1741
cbdca852
LP
1742 /* if we are looking with DIRECTION_UP then we need to first
1743 see if in the actual array there is a matching entry, and
1744 return the last one of that. But if there isn't any we need
1745 to return this one. Hence remember this, and return it
1746 below. */
1747 if (r == TEST_LEFT)
1748 step_back = direction == DIRECTION_UP;
de190aef 1749
cbdca852
LP
1750 if (r == TEST_RIGHT) {
1751 if (direction == DIRECTION_DOWN)
1752 goto found;
1753 else
1754 return 0;
a536e261 1755 }
cec736d2 1756
de190aef
LP
1757 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1758
cbdca852
LP
1759 if (r == 0 && step_back)
1760 goto found;
1761
ecf68b1d 1762 if (r > 0 && idx)
de190aef
LP
1763 (*idx) ++;
1764
1765 return r;
cbdca852
LP
1766
1767found:
1768 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1769 if (r < 0)
1770 return r;
1771
1772 if (ret)
1773 *ret = o;
1774
1775 if (offset)
1776 *offset = extra;
1777
1778 if (idx)
1779 *idx = 0;
1780
1781 return 1;
1782}
1783
44a6b1b6 1784_pure_ static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
cbdca852
LP
1785 assert(f);
1786 assert(p > 0);
1787
1788 if (p == needle)
1789 return TEST_FOUND;
1790 else if (p < needle)
1791 return TEST_LEFT;
1792 else
1793 return TEST_RIGHT;
1794}
1795
1796int journal_file_move_to_entry_by_offset(
1797 JournalFile *f,
1798 uint64_t p,
1799 direction_t direction,
1800 Object **ret,
1801 uint64_t *offset) {
1802
1803 return generic_array_bisect(f,
1804 le64toh(f->header->entry_array_offset),
1805 le64toh(f->header->n_entries),
1806 p,
1807 test_object_offset,
1808 direction,
1809 ret, offset, NULL);
de190aef
LP
1810}
1811
cbdca852 1812
de190aef
LP
1813static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1814 Object *o;
1815 int r;
1816
1817 assert(f);
1818 assert(p > 0);
1819
1820 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1821 if (r < 0)
1822 return r;
1823
de190aef
LP
1824 if (le64toh(o->entry.seqnum) == needle)
1825 return TEST_FOUND;
1826 else if (le64toh(o->entry.seqnum) < needle)
1827 return TEST_LEFT;
1828 else
1829 return TEST_RIGHT;
1830}
cec736d2 1831
de190aef
LP
1832int journal_file_move_to_entry_by_seqnum(
1833 JournalFile *f,
1834 uint64_t seqnum,
1835 direction_t direction,
1836 Object **ret,
1837 uint64_t *offset) {
1838
1839 return generic_array_bisect(f,
1840 le64toh(f->header->entry_array_offset),
1841 le64toh(f->header->n_entries),
1842 seqnum,
1843 test_object_seqnum,
1844 direction,
1845 ret, offset, NULL);
1846}
cec736d2 1847
de190aef
LP
1848static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1849 Object *o;
1850 int r;
1851
1852 assert(f);
1853 assert(p > 0);
1854
1855 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1856 if (r < 0)
1857 return r;
1858
1859 if (le64toh(o->entry.realtime) == needle)
1860 return TEST_FOUND;
1861 else if (le64toh(o->entry.realtime) < needle)
1862 return TEST_LEFT;
1863 else
1864 return TEST_RIGHT;
cec736d2
LP
1865}
1866
de190aef
LP
1867int journal_file_move_to_entry_by_realtime(
1868 JournalFile *f,
1869 uint64_t realtime,
1870 direction_t direction,
1871 Object **ret,
1872 uint64_t *offset) {
1873
1874 return generic_array_bisect(f,
1875 le64toh(f->header->entry_array_offset),
1876 le64toh(f->header->n_entries),
1877 realtime,
1878 test_object_realtime,
1879 direction,
1880 ret, offset, NULL);
1881}
1882
1883static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1884 Object *o;
1885 int r;
1886
1887 assert(f);
1888 assert(p > 0);
1889
1890 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1891 if (r < 0)
1892 return r;
1893
1894 if (le64toh(o->entry.monotonic) == needle)
1895 return TEST_FOUND;
1896 else if (le64toh(o->entry.monotonic) < needle)
1897 return TEST_LEFT;
1898 else
1899 return TEST_RIGHT;
1900}
1901
47838ab3
ZJS
1902static inline int find_data_object_by_boot_id(
1903 JournalFile *f,
1904 sd_id128_t boot_id,
1905 Object **o,
1906 uint64_t *b) {
1907 char t[sizeof("_BOOT_ID=")-1 + 32 + 1] = "_BOOT_ID=";
1908
1909 sd_id128_to_string(boot_id, t + 9);
1910 return journal_file_find_data_object(f, t, sizeof(t) - 1, o, b);
1911}
1912
de190aef
LP
1913int journal_file_move_to_entry_by_monotonic(
1914 JournalFile *f,
1915 sd_id128_t boot_id,
1916 uint64_t monotonic,
1917 direction_t direction,
1918 Object **ret,
1919 uint64_t *offset) {
1920
de190aef
LP
1921 Object *o;
1922 int r;
1923
cbdca852 1924 assert(f);
de190aef 1925
47838ab3 1926 r = find_data_object_by_boot_id(f, boot_id, &o, NULL);
de190aef
LP
1927 if (r < 0)
1928 return r;
cbdca852 1929 if (r == 0)
de190aef
LP
1930 return -ENOENT;
1931
1932 return generic_array_bisect_plus_one(f,
1933 le64toh(o->data.entry_offset),
1934 le64toh(o->data.entry_array_offset),
1935 le64toh(o->data.n_entries),
1936 monotonic,
1937 test_object_monotonic,
1938 direction,
1939 ret, offset, NULL);
1940}
1941
de190aef
LP
1942int journal_file_next_entry(
1943 JournalFile *f,
1944 Object *o, uint64_t p,
1945 direction_t direction,
1946 Object **ret, uint64_t *offset) {
1947
1948 uint64_t i, n;
cec736d2
LP
1949 int r;
1950
1951 assert(f);
de190aef
LP
1952 assert(p > 0 || !o);
1953
1954 n = le64toh(f->header->n_entries);
1955 if (n <= 0)
1956 return 0;
cec736d2
LP
1957
1958 if (!o)
de190aef 1959 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1960 else {
de190aef 1961 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1962 return -EINVAL;
1963
de190aef
LP
1964 r = generic_array_bisect(f,
1965 le64toh(f->header->entry_array_offset),
1966 le64toh(f->header->n_entries),
1967 p,
1968 test_object_offset,
1969 DIRECTION_DOWN,
1970 NULL, NULL,
1971 &i);
1972 if (r <= 0)
1973 return r;
1974
1975 if (direction == DIRECTION_DOWN) {
1976 if (i >= n - 1)
1977 return 0;
1978
1979 i++;
1980 } else {
1981 if (i <= 0)
1982 return 0;
1983
1984 i--;
1985 }
cec736d2
LP
1986 }
1987
de190aef
LP
1988 /* And jump to it */
1989 return generic_array_get(f,
1990 le64toh(f->header->entry_array_offset),
1991 i,
1992 ret, offset);
1993}
cec736d2 1994
de190aef
LP
1995int journal_file_skip_entry(
1996 JournalFile *f,
1997 Object *o, uint64_t p,
1998 int64_t skip,
1999 Object **ret, uint64_t *offset) {
2000
2001 uint64_t i, n;
2002 int r;
2003
2004 assert(f);
2005 assert(o);
2006 assert(p > 0);
2007
2008 if (o->object.type != OBJECT_ENTRY)
2009 return -EINVAL;
2010
2011 r = generic_array_bisect(f,
2012 le64toh(f->header->entry_array_offset),
2013 le64toh(f->header->n_entries),
2014 p,
2015 test_object_offset,
2016 DIRECTION_DOWN,
2017 NULL, NULL,
2018 &i);
2019 if (r <= 0)
cec736d2
LP
2020 return r;
2021
de190aef
LP
2022 /* Calculate new index */
2023 if (skip < 0) {
2024 if ((uint64_t) -skip >= i)
2025 i = 0;
2026 else
2027 i = i - (uint64_t) -skip;
2028 } else
2029 i += (uint64_t) skip;
cec736d2 2030
de190aef
LP
2031 n = le64toh(f->header->n_entries);
2032 if (n <= 0)
2033 return -EBADMSG;
cec736d2 2034
de190aef
LP
2035 if (i >= n)
2036 i = n-1;
2037
2038 return generic_array_get(f,
2039 le64toh(f->header->entry_array_offset),
2040 i,
2041 ret, offset);
cec736d2
LP
2042}
2043
de190aef
LP
2044int journal_file_next_entry_for_data(
2045 JournalFile *f,
2046 Object *o, uint64_t p,
2047 uint64_t data_offset,
2048 direction_t direction,
2049 Object **ret, uint64_t *offset) {
2050
2051 uint64_t n, i;
cec736d2 2052 int r;
de190aef 2053 Object *d;
cec736d2
LP
2054
2055 assert(f);
de190aef 2056 assert(p > 0 || !o);
cec736d2 2057
de190aef 2058 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 2059 if (r < 0)
de190aef 2060 return r;
cec736d2 2061
de190aef
LP
2062 n = le64toh(d->data.n_entries);
2063 if (n <= 0)
2064 return n;
cec736d2 2065
de190aef
LP
2066 if (!o)
2067 i = direction == DIRECTION_DOWN ? 0 : n - 1;
2068 else {
2069 if (o->object.type != OBJECT_ENTRY)
2070 return -EINVAL;
cec736d2 2071
de190aef
LP
2072 r = generic_array_bisect_plus_one(f,
2073 le64toh(d->data.entry_offset),
2074 le64toh(d->data.entry_array_offset),
2075 le64toh(d->data.n_entries),
2076 p,
2077 test_object_offset,
2078 DIRECTION_DOWN,
2079 NULL, NULL,
2080 &i);
2081
2082 if (r <= 0)
cec736d2
LP
2083 return r;
2084
de190aef
LP
2085 if (direction == DIRECTION_DOWN) {
2086 if (i >= n - 1)
2087 return 0;
cec736d2 2088
de190aef
LP
2089 i++;
2090 } else {
2091 if (i <= 0)
2092 return 0;
cec736d2 2093
de190aef
LP
2094 i--;
2095 }
cec736d2 2096
de190aef 2097 }
cec736d2 2098
de190aef
LP
2099 return generic_array_get_plus_one(f,
2100 le64toh(d->data.entry_offset),
2101 le64toh(d->data.entry_array_offset),
2102 i,
2103 ret, offset);
2104}
cec736d2 2105
cbdca852
LP
2106int journal_file_move_to_entry_by_offset_for_data(
2107 JournalFile *f,
2108 uint64_t data_offset,
2109 uint64_t p,
2110 direction_t direction,
2111 Object **ret, uint64_t *offset) {
2112
2113 int r;
2114 Object *d;
2115
2116 assert(f);
2117
2118 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2119 if (r < 0)
2120 return r;
2121
2122 return generic_array_bisect_plus_one(f,
2123 le64toh(d->data.entry_offset),
2124 le64toh(d->data.entry_array_offset),
2125 le64toh(d->data.n_entries),
2126 p,
2127 test_object_offset,
2128 direction,
2129 ret, offset, NULL);
2130}
2131
2132int journal_file_move_to_entry_by_monotonic_for_data(
2133 JournalFile *f,
2134 uint64_t data_offset,
2135 sd_id128_t boot_id,
2136 uint64_t monotonic,
2137 direction_t direction,
2138 Object **ret, uint64_t *offset) {
2139
cbdca852
LP
2140 Object *o, *d;
2141 int r;
2142 uint64_t b, z;
2143
2144 assert(f);
2145
2146 /* First, seek by time */
47838ab3 2147 r = find_data_object_by_boot_id(f, boot_id, &o, &b);
cbdca852
LP
2148 if (r < 0)
2149 return r;
2150 if (r == 0)
2151 return -ENOENT;
2152
2153 r = generic_array_bisect_plus_one(f,
2154 le64toh(o->data.entry_offset),
2155 le64toh(o->data.entry_array_offset),
2156 le64toh(o->data.n_entries),
2157 monotonic,
2158 test_object_monotonic,
2159 direction,
2160 NULL, &z, NULL);
2161 if (r <= 0)
2162 return r;
2163
2164 /* And now, continue seeking until we find an entry that
2165 * exists in both bisection arrays */
2166
2167 for (;;) {
2168 Object *qo;
2169 uint64_t p, q;
2170
2171 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2172 if (r < 0)
2173 return r;
2174
2175 r = generic_array_bisect_plus_one(f,
2176 le64toh(d->data.entry_offset),
2177 le64toh(d->data.entry_array_offset),
2178 le64toh(d->data.n_entries),
2179 z,
2180 test_object_offset,
2181 direction,
2182 NULL, &p, NULL);
2183 if (r <= 0)
2184 return r;
2185
2186 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
2187 if (r < 0)
2188 return r;
2189
2190 r = generic_array_bisect_plus_one(f,
2191 le64toh(o->data.entry_offset),
2192 le64toh(o->data.entry_array_offset),
2193 le64toh(o->data.n_entries),
2194 p,
2195 test_object_offset,
2196 direction,
2197 &qo, &q, NULL);
2198
2199 if (r <= 0)
2200 return r;
2201
2202 if (p == q) {
2203 if (ret)
2204 *ret = qo;
2205 if (offset)
2206 *offset = q;
2207
2208 return 1;
2209 }
2210
2211 z = q;
2212 }
2213
2214 return 0;
2215}
2216
de190aef
LP
2217int journal_file_move_to_entry_by_seqnum_for_data(
2218 JournalFile *f,
2219 uint64_t data_offset,
2220 uint64_t seqnum,
2221 direction_t direction,
2222 Object **ret, uint64_t *offset) {
cec736d2 2223
de190aef
LP
2224 Object *d;
2225 int r;
cec736d2 2226
91a31dde
LP
2227 assert(f);
2228
de190aef 2229 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 2230 if (r < 0)
de190aef 2231 return r;
cec736d2 2232
de190aef
LP
2233 return generic_array_bisect_plus_one(f,
2234 le64toh(d->data.entry_offset),
2235 le64toh(d->data.entry_array_offset),
2236 le64toh(d->data.n_entries),
2237 seqnum,
2238 test_object_seqnum,
2239 direction,
2240 ret, offset, NULL);
2241}
cec736d2 2242
de190aef
LP
2243int journal_file_move_to_entry_by_realtime_for_data(
2244 JournalFile *f,
2245 uint64_t data_offset,
2246 uint64_t realtime,
2247 direction_t direction,
2248 Object **ret, uint64_t *offset) {
2249
2250 Object *d;
2251 int r;
2252
91a31dde
LP
2253 assert(f);
2254
de190aef 2255 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 2256 if (r < 0)
de190aef
LP
2257 return r;
2258
2259 return generic_array_bisect_plus_one(f,
2260 le64toh(d->data.entry_offset),
2261 le64toh(d->data.entry_array_offset),
2262 le64toh(d->data.n_entries),
2263 realtime,
2264 test_object_realtime,
2265 direction,
2266 ret, offset, NULL);
cec736d2
LP
2267}
2268
0284adc6 2269void journal_file_dump(JournalFile *f) {
7560fffc 2270 Object *o;
7560fffc 2271 int r;
0284adc6 2272 uint64_t p;
7560fffc
LP
2273
2274 assert(f);
2275
0284adc6 2276 journal_file_print_header(f);
7560fffc 2277
0284adc6
LP
2278 p = le64toh(f->header->header_size);
2279 while (p != 0) {
2280 r = journal_file_move_to_object(f, -1, p, &o);
2281 if (r < 0)
2282 goto fail;
7560fffc 2283
0284adc6 2284 switch (o->object.type) {
d98cc1f2 2285
0284adc6
LP
2286 case OBJECT_UNUSED:
2287 printf("Type: OBJECT_UNUSED\n");
2288 break;
d98cc1f2 2289
0284adc6
LP
2290 case OBJECT_DATA:
2291 printf("Type: OBJECT_DATA\n");
2292 break;
7560fffc 2293
3c1668da
LP
2294 case OBJECT_FIELD:
2295 printf("Type: OBJECT_FIELD\n");
2296 break;
2297
0284adc6 2298 case OBJECT_ENTRY:
507f22bd
ZJS
2299 printf("Type: OBJECT_ENTRY seqnum=%"PRIu64" monotonic=%"PRIu64" realtime=%"PRIu64"\n",
2300 le64toh(o->entry.seqnum),
2301 le64toh(o->entry.monotonic),
2302 le64toh(o->entry.realtime));
0284adc6 2303 break;
7560fffc 2304
0284adc6
LP
2305 case OBJECT_FIELD_HASH_TABLE:
2306 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
2307 break;
7560fffc 2308
0284adc6
LP
2309 case OBJECT_DATA_HASH_TABLE:
2310 printf("Type: OBJECT_DATA_HASH_TABLE\n");
2311 break;
7560fffc 2312
0284adc6
LP
2313 case OBJECT_ENTRY_ARRAY:
2314 printf("Type: OBJECT_ENTRY_ARRAY\n");
2315 break;
7560fffc 2316
0284adc6 2317 case OBJECT_TAG:
507f22bd
ZJS
2318 printf("Type: OBJECT_TAG seqnum=%"PRIu64" epoch=%"PRIu64"\n",
2319 le64toh(o->tag.seqnum),
2320 le64toh(o->tag.epoch));
0284adc6 2321 break;
3c1668da
LP
2322
2323 default:
2324 printf("Type: unknown (%u)\n", o->object.type);
2325 break;
0284adc6 2326 }
7560fffc 2327
0284adc6
LP
2328 if (o->object.flags & OBJECT_COMPRESSED)
2329 printf("Flags: COMPRESSED\n");
7560fffc 2330
0284adc6
LP
2331 if (p == le64toh(f->header->tail_object_offset))
2332 p = 0;
2333 else
2334 p = p + ALIGN64(le64toh(o->object.size));
2335 }
7560fffc 2336
0284adc6
LP
2337 return;
2338fail:
2339 log_error("File corrupt");
7560fffc
LP
2340}
2341
718fe4b1
ZJS
2342static const char* format_timestamp_safe(char *buf, size_t l, usec_t t) {
2343 const char *x;
2344
2345 x = format_timestamp(buf, l, t);
2346 if (x)
2347 return x;
2348 return " --- ";
2349}
2350
0284adc6 2351void journal_file_print_header(JournalFile *f) {
2765b7bb 2352 char a[33], b[33], c[33], d[33];
ed375beb 2353 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX], z[FORMAT_TIMESTAMP_MAX];
a1a03e30
LP
2354 struct stat st;
2355 char bytes[FORMAT_BYTES_MAX];
7560fffc
LP
2356
2357 assert(f);
7560fffc 2358
0284adc6
LP
2359 printf("File Path: %s\n"
2360 "File ID: %s\n"
2361 "Machine ID: %s\n"
2362 "Boot ID: %s\n"
2363 "Sequential Number ID: %s\n"
2364 "State: %s\n"
2365 "Compatible Flags:%s%s\n"
2366 "Incompatible Flags:%s%s\n"
507f22bd
ZJS
2367 "Header size: %"PRIu64"\n"
2368 "Arena size: %"PRIu64"\n"
2369 "Data Hash Table Size: %"PRIu64"\n"
2370 "Field Hash Table Size: %"PRIu64"\n"
0284adc6 2371 "Rotate Suggested: %s\n"
507f22bd
ZJS
2372 "Head Sequential Number: %"PRIu64"\n"
2373 "Tail Sequential Number: %"PRIu64"\n"
0284adc6 2374 "Head Realtime Timestamp: %s\n"
3223f44f 2375 "Tail Realtime Timestamp: %s\n"
ed375beb 2376 "Tail Monotonic Timestamp: %s\n"
507f22bd
ZJS
2377 "Objects: %"PRIu64"\n"
2378 "Entry Objects: %"PRIu64"\n",
0284adc6
LP
2379 f->path,
2380 sd_id128_to_string(f->header->file_id, a),
2381 sd_id128_to_string(f->header->machine_id, b),
2382 sd_id128_to_string(f->header->boot_id, c),
2765b7bb 2383 sd_id128_to_string(f->header->seqnum_id, d),
3223f44f
LP
2384 f->header->state == STATE_OFFLINE ? "OFFLINE" :
2385 f->header->state == STATE_ONLINE ? "ONLINE" :
2386 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
8088cbd3
LP
2387 JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
2388 (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
2389 JOURNAL_HEADER_COMPRESSED(f->header) ? " COMPRESSED" : "",
2390 (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
507f22bd
ZJS
2391 le64toh(f->header->header_size),
2392 le64toh(f->header->arena_size),
2393 le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2394 le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
fb0951b0 2395 yes_no(journal_file_rotate_suggested(f, 0)),
507f22bd
ZJS
2396 le64toh(f->header->head_entry_seqnum),
2397 le64toh(f->header->tail_entry_seqnum),
718fe4b1
ZJS
2398 format_timestamp_safe(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
2399 format_timestamp_safe(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
ed375beb 2400 format_timespan(z, sizeof(z), le64toh(f->header->tail_entry_monotonic), USEC_PER_MSEC),
507f22bd
ZJS
2401 le64toh(f->header->n_objects),
2402 le64toh(f->header->n_entries));
7560fffc 2403
0284adc6 2404 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
507f22bd 2405 printf("Data Objects: %"PRIu64"\n"
0284adc6 2406 "Data Hash Table Fill: %.1f%%\n",
507f22bd 2407 le64toh(f->header->n_data),
0284adc6 2408 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
7560fffc 2409
0284adc6 2410 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
507f22bd 2411 printf("Field Objects: %"PRIu64"\n"
0284adc6 2412 "Field Hash Table Fill: %.1f%%\n",
507f22bd 2413 le64toh(f->header->n_fields),
0284adc6 2414 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
3223f44f
LP
2415
2416 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
507f22bd
ZJS
2417 printf("Tag Objects: %"PRIu64"\n",
2418 le64toh(f->header->n_tags));
3223f44f 2419 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
507f22bd
ZJS
2420 printf("Entry Array Objects: %"PRIu64"\n",
2421 le64toh(f->header->n_entry_arrays));
a1a03e30
LP
2422
2423 if (fstat(f->fd, &st) >= 0)
2424 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
7560fffc
LP
2425}
2426
0284adc6
LP
2427int journal_file_open(
2428 const char *fname,
2429 int flags,
2430 mode_t mode,
2431 bool compress,
baed47c3 2432 bool seal,
0284adc6
LP
2433 JournalMetrics *metrics,
2434 MMapCache *mmap_cache,
2435 JournalFile *template,
2436 JournalFile **ret) {
7560fffc 2437
0284adc6
LP
2438 JournalFile *f;
2439 int r;
2440 bool newly_created = false;
7560fffc 2441
0284adc6 2442 assert(fname);
0559d3a5 2443 assert(ret);
7560fffc 2444
0284adc6
LP
2445 if ((flags & O_ACCMODE) != O_RDONLY &&
2446 (flags & O_ACCMODE) != O_RDWR)
2447 return -EINVAL;
7560fffc 2448
a0108012
LP
2449 if (!endswith(fname, ".journal") &&
2450 !endswith(fname, ".journal~"))
0284adc6 2451 return -EINVAL;
7560fffc 2452
0284adc6
LP
2453 f = new0(JournalFile, 1);
2454 if (!f)
2455 return -ENOMEM;
7560fffc 2456
0284adc6
LP
2457 f->fd = -1;
2458 f->mode = mode;
7560fffc 2459
0284adc6
LP
2460 f->flags = flags;
2461 f->prot = prot_from_flags(flags);
2462 f->writable = (flags & O_ACCMODE) != O_RDONLY;
48b61739 2463#ifdef HAVE_XZ
0284adc6 2464 f->compress = compress;
48b61739 2465#endif
49a32d43 2466#ifdef HAVE_GCRYPT
baed47c3 2467 f->seal = seal;
49a32d43 2468#endif
7560fffc 2469
0284adc6
LP
2470 if (mmap_cache)
2471 f->mmap = mmap_cache_ref(mmap_cache);
2472 else {
84168d80 2473 f->mmap = mmap_cache_new();
0284adc6
LP
2474 if (!f->mmap) {
2475 r = -ENOMEM;
2476 goto fail;
2477 }
2478 }
7560fffc 2479
0284adc6
LP
2480 f->path = strdup(fname);
2481 if (!f->path) {
2482 r = -ENOMEM;
2483 goto fail;
2484 }
7560fffc 2485
a4bcff5b
LP
2486 f->chain_cache = hashmap_new(uint64_hash_func, uint64_compare_func);
2487 if (!f->chain_cache) {
2488 r = -ENOMEM;
2489 goto fail;
2490 }
2491
0284adc6
LP
2492 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2493 if (f->fd < 0) {
2494 r = -errno;
2495 goto fail;
7560fffc 2496 }
7560fffc 2497
0284adc6
LP
2498 if (fstat(f->fd, &f->last_stat) < 0) {
2499 r = -errno;
2500 goto fail;
2501 }
7560fffc 2502
0284adc6 2503 if (f->last_stat.st_size == 0 && f->writable) {
fb0951b0
LP
2504#ifdef HAVE_XATTR
2505 uint64_t crtime;
2506
2507 /* Let's attach the creation time to the journal file,
2508 * so that the vacuuming code knows the age of this
2509 * file even if the file might end up corrupted one
2510 * day... Ideally we'd just use the creation time many
2511 * file systems maintain for each file, but there is
2512 * currently no usable API to query this, hence let's
2513 * emulate this via extended attributes. If extended
2514 * attributes are not supported we'll just skip this,
2515 * and rely solely on mtime/atime/ctime of the file.*/
2516
2517 crtime = htole64((uint64_t) now(CLOCK_REALTIME));
2518 fsetxattr(f->fd, "user.crtime_usec", &crtime, sizeof(crtime), XATTR_CREATE);
2519#endif
7560fffc 2520
feb12d3e 2521#ifdef HAVE_GCRYPT
0284adc6 2522 /* Try to load the FSPRG state, and if we can't, then
baed47c3 2523 * just don't do sealing */
49a32d43
LP
2524 if (f->seal) {
2525 r = journal_file_fss_load(f);
2526 if (r < 0)
2527 f->seal = false;
2528 }
feb12d3e 2529#endif
7560fffc 2530
0284adc6
LP
2531 r = journal_file_init_header(f, template);
2532 if (r < 0)
2533 goto fail;
7560fffc 2534
0284adc6
LP
2535 if (fstat(f->fd, &f->last_stat) < 0) {
2536 r = -errno;
2537 goto fail;
2538 }
fb0951b0
LP
2539
2540 newly_created = true;
0284adc6 2541 }
7560fffc 2542
0284adc6
LP
2543 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2544 r = -EIO;
2545 goto fail;
2546 }
7560fffc 2547
0284adc6
LP
2548 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2549 if (f->header == MAP_FAILED) {
2550 f->header = NULL;
2551 r = -errno;
2552 goto fail;
2553 }
7560fffc 2554
0284adc6
LP
2555 if (!newly_created) {
2556 r = journal_file_verify_header(f);
2557 if (r < 0)
2558 goto fail;
2559 }
7560fffc 2560
feb12d3e 2561#ifdef HAVE_GCRYPT
0284adc6 2562 if (!newly_created && f->writable) {
baed47c3 2563 r = journal_file_fss_load(f);
0284adc6
LP
2564 if (r < 0)
2565 goto fail;
2566 }
feb12d3e 2567#endif
cec736d2
LP
2568
2569 if (f->writable) {
4a92baf3
LP
2570 if (metrics) {
2571 journal_default_metrics(metrics, f->fd);
2572 f->metrics = *metrics;
2573 } else if (template)
2574 f->metrics = template->metrics;
2575
cec736d2
LP
2576 r = journal_file_refresh_header(f);
2577 if (r < 0)
2578 goto fail;
2579 }
2580
feb12d3e 2581#ifdef HAVE_GCRYPT
baed47c3 2582 r = journal_file_hmac_setup(f);
14d10188
LP
2583 if (r < 0)
2584 goto fail;
feb12d3e 2585#endif
14d10188 2586
cec736d2 2587 if (newly_created) {
de190aef 2588 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
2589 if (r < 0)
2590 goto fail;
2591
de190aef 2592 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
2593 if (r < 0)
2594 goto fail;
7560fffc 2595
feb12d3e 2596#ifdef HAVE_GCRYPT
7560fffc
LP
2597 r = journal_file_append_first_tag(f);
2598 if (r < 0)
2599 goto fail;
feb12d3e 2600#endif
cec736d2
LP
2601 }
2602
de190aef 2603 r = journal_file_map_field_hash_table(f);
cec736d2
LP
2604 if (r < 0)
2605 goto fail;
2606
de190aef 2607 r = journal_file_map_data_hash_table(f);
cec736d2
LP
2608 if (r < 0)
2609 goto fail;
2610
0559d3a5 2611 *ret = f;
cec736d2
LP
2612 return 0;
2613
2614fail:
2615 journal_file_close(f);
2616
2617 return r;
2618}
0ac38b70 2619
baed47c3 2620int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
57535f47 2621 _cleanup_free_ char *p = NULL;
0ac38b70
LP
2622 size_t l;
2623 JournalFile *old_file, *new_file = NULL;
2624 int r;
2625
2626 assert(f);
2627 assert(*f);
2628
2629 old_file = *f;
2630
2631 if (!old_file->writable)
2632 return -EINVAL;
2633
2634 if (!endswith(old_file->path, ".journal"))
2635 return -EINVAL;
2636
2637 l = strlen(old_file->path);
57535f47
ZJS
2638 r = asprintf(&p, "%.*s@" SD_ID128_FORMAT_STR "-%016"PRIx64"-%016"PRIx64".journal",
2639 (int) l - 8, old_file->path,
2640 SD_ID128_FORMAT_VAL(old_file->header->seqnum_id),
2641 le64toh((*f)->header->head_entry_seqnum),
2642 le64toh((*f)->header->head_entry_realtime));
2643 if (r < 0)
0ac38b70
LP
2644 return -ENOMEM;
2645
0ac38b70 2646 r = rename(old_file->path, p);
0ac38b70
LP
2647 if (r < 0)
2648 return -errno;
2649
ccdbaf91 2650 old_file->header->state = STATE_ARCHIVED;
0ac38b70 2651
baed47c3 2652 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
0ac38b70
LP
2653 journal_file_close(old_file);
2654
2655 *f = new_file;
2656 return r;
2657}
2658
9447a7f1
LP
2659int journal_file_open_reliably(
2660 const char *fname,
2661 int flags,
2662 mode_t mode,
7560fffc 2663 bool compress,
baed47c3 2664 bool seal,
4a92baf3 2665 JournalMetrics *metrics,
27370278 2666 MMapCache *mmap_cache,
9447a7f1
LP
2667 JournalFile *template,
2668 JournalFile **ret) {
2669
2670 int r;
2671 size_t l;
ed375beb 2672 _cleanup_free_ char *p = NULL;
9447a7f1 2673
baed47c3 2674 r = journal_file_open(fname, flags, mode, compress, seal,
27370278 2675 metrics, mmap_cache, template, ret);
0071d9f1
LP
2676 if (r != -EBADMSG && /* corrupted */
2677 r != -ENODATA && /* truncated */
2678 r != -EHOSTDOWN && /* other machine */
a1a1898f
LP
2679 r != -EPROTONOSUPPORT && /* incompatible feature */
2680 r != -EBUSY && /* unclean shutdown */
2681 r != -ESHUTDOWN /* already archived */)
9447a7f1
LP
2682 return r;
2683
2684 if ((flags & O_ACCMODE) == O_RDONLY)
2685 return r;
2686
2687 if (!(flags & O_CREAT))
2688 return r;
2689
7560fffc
LP
2690 if (!endswith(fname, ".journal"))
2691 return r;
2692
5c70eab4
LP
2693 /* The file is corrupted. Rotate it away and try it again (but only once) */
2694
9447a7f1 2695 l = strlen(fname);
9bf3b535 2696 if (asprintf(&p, "%.*s@%016llx-%016" PRIx64 ".journal~",
57535f47 2697 (int) l - 8, fname,
9447a7f1 2698 (unsigned long long) now(CLOCK_REALTIME),
9bf3b535 2699 random_u64()) < 0)
9447a7f1
LP
2700 return -ENOMEM;
2701
2702 r = rename(fname, p);
9447a7f1
LP
2703 if (r < 0)
2704 return -errno;
2705
a1a1898f 2706 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
9447a7f1 2707
baed47c3 2708 return journal_file_open(fname, flags, mode, compress, seal,
27370278 2709 metrics, mmap_cache, template, ret);
9447a7f1
LP
2710}
2711
cf244689
LP
2712int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2713 uint64_t i, n;
2714 uint64_t q, xor_hash = 0;
2715 int r;
2716 EntryItem *items;
2717 dual_timestamp ts;
2718
2719 assert(from);
2720 assert(to);
2721 assert(o);
2722 assert(p);
2723
2724 if (!to->writable)
2725 return -EPERM;
2726
2727 ts.monotonic = le64toh(o->entry.monotonic);
2728 ts.realtime = le64toh(o->entry.realtime);
2729
cf244689 2730 n = journal_file_entry_n_items(o);
4faa7004
TA
2731 /* alloca() can't take 0, hence let's allocate at least one */
2732 items = alloca(sizeof(EntryItem) * MAX(1u, n));
cf244689
LP
2733
2734 for (i = 0; i < n; i++) {
4fd052ae
FC
2735 uint64_t l, h;
2736 le64_t le_hash;
cf244689
LP
2737 size_t t;
2738 void *data;
2739 Object *u;
2740
2741 q = le64toh(o->entry.items[i].object_offset);
2742 le_hash = o->entry.items[i].hash;
2743
2744 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2745 if (r < 0)
2746 return r;
2747
2748 if (le_hash != o->data.hash)
2749 return -EBADMSG;
2750
2751 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2752 t = (size_t) l;
2753
2754 /* We hit the limit on 32bit machines */
2755 if ((uint64_t) t != l)
2756 return -E2BIG;
2757
2758 if (o->object.flags & OBJECT_COMPRESSED) {
2759#ifdef HAVE_XZ
2760 uint64_t rsize;
2761
93b73b06 2762 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize, 0))
cf244689
LP
2763 return -EBADMSG;
2764
2765 data = from->compress_buffer;
2766 l = rsize;
2767#else
2768 return -EPROTONOSUPPORT;
2769#endif
2770 } else
2771 data = o->data.payload;
2772
2773 r = journal_file_append_data(to, data, l, &u, &h);
2774 if (r < 0)
2775 return r;
2776
2777 xor_hash ^= le64toh(u->data.hash);
2778 items[i].object_offset = htole64(h);
2779 items[i].hash = u->data.hash;
2780
2781 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2782 if (r < 0)
2783 return r;
2784 }
2785
2786 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2787}
babfc091
LP
2788
2789void journal_default_metrics(JournalMetrics *m, int fd) {
2790 uint64_t fs_size = 0;
2791 struct statvfs ss;
a7bc2c2a 2792 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2793
2794 assert(m);
2795 assert(fd >= 0);
2796
2797 if (fstatvfs(fd, &ss) >= 0)
2798 fs_size = ss.f_frsize * ss.f_blocks;
2799
2800 if (m->max_use == (uint64_t) -1) {
2801
2802 if (fs_size > 0) {
2803 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2804
2805 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2806 m->max_use = DEFAULT_MAX_USE_UPPER;
2807
2808 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2809 m->max_use = DEFAULT_MAX_USE_LOWER;
2810 } else
2811 m->max_use = DEFAULT_MAX_USE_LOWER;
2812 } else {
2813 m->max_use = PAGE_ALIGN(m->max_use);
2814
2815 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2816 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2817 }
2818
2819 if (m->max_size == (uint64_t) -1) {
2820 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2821
2822 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2823 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2824 } else
2825 m->max_size = PAGE_ALIGN(m->max_size);
2826
2827 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2828 m->max_size = JOURNAL_FILE_SIZE_MIN;
2829
2830 if (m->max_size*2 > m->max_use)
2831 m->max_use = m->max_size*2;
2832
2833 if (m->min_size == (uint64_t) -1)
2834 m->min_size = JOURNAL_FILE_SIZE_MIN;
2835 else {
2836 m->min_size = PAGE_ALIGN(m->min_size);
2837
2838 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2839 m->min_size = JOURNAL_FILE_SIZE_MIN;
2840
2841 if (m->min_size > m->max_size)
2842 m->max_size = m->min_size;
2843 }
2844
2845 if (m->keep_free == (uint64_t) -1) {
2846
2847 if (fs_size > 0) {
8621b110 2848 m->keep_free = PAGE_ALIGN(fs_size * 3 / 20); /* 15% of file system size */
babfc091
LP
2849
2850 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2851 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2852
2853 } else
2854 m->keep_free = DEFAULT_KEEP_FREE;
2855 }
2856
2b43f939
LP
2857 log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2858 format_bytes(a, sizeof(a), m->max_use),
2859 format_bytes(b, sizeof(b), m->max_size),
2860 format_bytes(c, sizeof(c), m->min_size),
2861 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2862}
08984293
LP
2863
2864int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
08984293
LP
2865 assert(f);
2866 assert(from || to);
2867
2868 if (from) {
162566a4
LP
2869 if (f->header->head_entry_realtime == 0)
2870 return -ENOENT;
08984293 2871
162566a4 2872 *from = le64toh(f->header->head_entry_realtime);
08984293
LP
2873 }
2874
2875 if (to) {
162566a4
LP
2876 if (f->header->tail_entry_realtime == 0)
2877 return -ENOENT;
08984293 2878
162566a4 2879 *to = le64toh(f->header->tail_entry_realtime);
08984293
LP
2880 }
2881
2882 return 1;
2883}
2884
2885int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
08984293
LP
2886 Object *o;
2887 uint64_t p;
2888 int r;
2889
2890 assert(f);
2891 assert(from || to);
2892
47838ab3 2893 r = find_data_object_by_boot_id(f, boot_id, &o, &p);
08984293
LP
2894 if (r <= 0)
2895 return r;
2896
2897 if (le64toh(o->data.n_entries) <= 0)
2898 return 0;
2899
2900 if (from) {
2901 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2902 if (r < 0)
2903 return r;
2904
2905 *from = le64toh(o->entry.monotonic);
2906 }
2907
2908 if (to) {
2909 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2910 if (r < 0)
2911 return r;
2912
2913 r = generic_array_get_plus_one(f,
2914 le64toh(o->data.entry_offset),
2915 le64toh(o->data.entry_array_offset),
2916 le64toh(o->data.n_entries)-1,
2917 &o, NULL);
2918 if (r <= 0)
2919 return r;
2920
2921 *to = le64toh(o->entry.monotonic);
2922 }
2923
2924 return 1;
2925}
dca6219e 2926
fb0951b0 2927bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
dca6219e
LP
2928 assert(f);
2929
2930 /* If we gained new header fields we gained new features,
2931 * hence suggest a rotation */
361f9cbc
LP
2932 if (le64toh(f->header->header_size) < sizeof(Header)) {
2933 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
dca6219e 2934 return true;
361f9cbc 2935 }
dca6219e
LP
2936
2937 /* Let's check if the hash tables grew over a certain fill
2938 * level (75%, borrowing this value from Java's hash table
2939 * implementation), and if so suggest a rotation. To calculate
2940 * the fill level we need the n_data field, which only exists
2941 * in newer versions. */
2942
2943 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
361f9cbc 2944 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
507f22bd 2945 log_debug("Data hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items, %llu file size, %"PRIu64" bytes per hash table item), suggesting rotation.",
361f9cbc
LP
2946 f->path,
2947 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
507f22bd
ZJS
2948 le64toh(f->header->n_data),
2949 le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2950 (unsigned long long) f->last_stat.st_size,
2951 f->last_stat.st_size / le64toh(f->header->n_data));
dca6219e 2952 return true;
361f9cbc 2953 }
dca6219e
LP
2954
2955 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
361f9cbc 2956 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
507f22bd 2957 log_debug("Field hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items), suggesting rotation.",
361f9cbc
LP
2958 f->path,
2959 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
507f22bd
ZJS
2960 le64toh(f->header->n_fields),
2961 le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
dca6219e 2962 return true;
361f9cbc 2963 }
dca6219e 2964
0598fd4a
LP
2965 /* Are the data objects properly indexed by field objects? */
2966 if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
2967 JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
2968 le64toh(f->header->n_data) > 0 &&
2969 le64toh(f->header->n_fields) == 0)
2970 return true;
2971
fb0951b0
LP
2972 if (max_file_usec > 0) {
2973 usec_t t, h;
2974
2975 h = le64toh(f->header->head_entry_realtime);
2976 t = now(CLOCK_REALTIME);
2977
2978 if (h > 0 && t > h + max_file_usec)
2979 return true;
2980 }
2981
dca6219e
LP
2982 return false;
2983}