]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
shared/install: use char** convention for strvs
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
fb0951b0
LP
30#ifdef HAVE_XATTR
31#include <attr/xattr.h>
32#endif
33
cec736d2
LP
34#include "journal-def.h"
35#include "journal-file.h"
0284adc6 36#include "journal-authenticate.h"
cec736d2 37#include "lookup3.h"
807e17f0 38#include "compress.h"
7560fffc 39#include "fsprg.h"
cec736d2 40
4a92baf3
LP
41#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
42#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
cec736d2 43
be19b7df 44#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 45
babfc091 46/* This is the minimum journal file size */
253f59df 47#define JOURNAL_FILE_SIZE_MIN (4ULL*1024ULL*1024ULL) /* 4 MiB */
babfc091
LP
48
49/* These are the lower and upper bounds if we deduce the max_use value
50 * from the file system size */
51#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
52#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
53
54/* This is the upper bound if we deduce max_size from max_use */
71100051 55#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
56
57/* This is the upper bound if we deduce the keep_free value from the
58 * file system size */
59#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
60
61/* This is the keep_free value when we can't determine the system
62 * size */
63#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
64
dca6219e
LP
65/* n_data was the first entry we added after the initial file format design */
66#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
cec736d2 67
a4bcff5b
LP
68/* How many entries to keep in the entry array chain cache at max */
69#define CHAIN_CACHE_MAX 20
70
a676e665
LP
71/* How much to increase the journal file size at once each time we allocate something new. */
72#define FILE_SIZE_INCREASE (8ULL*1024ULL*1024ULL) /* 8MB */
73
9588bc32 74static int journal_file_set_online(JournalFile *f) {
26687bf8
OS
75 assert(f);
76
77 if (!f->writable)
78 return -EPERM;
79
80 if (!(f->fd >= 0 && f->header))
81 return -EINVAL;
82
83 switch(f->header->state) {
84 case STATE_ONLINE:
85 return 0;
86
87 case STATE_OFFLINE:
88 f->header->state = STATE_ONLINE;
89 fsync(f->fd);
90 return 0;
91
92 default:
93 return -EINVAL;
94 }
95}
96
97int journal_file_set_offline(JournalFile *f) {
98 assert(f);
99
100 if (!f->writable)
101 return -EPERM;
102
103 if (!(f->fd >= 0 && f->header))
104 return -EINVAL;
105
106 if (f->header->state != STATE_ONLINE)
107 return 0;
108
109 fsync(f->fd);
110
111 f->header->state = STATE_OFFLINE;
112
113 fsync(f->fd);
114
115 return 0;
116}
117
cec736d2 118void journal_file_close(JournalFile *f) {
de190aef 119 assert(f);
cec736d2 120
feb12d3e 121#ifdef HAVE_GCRYPT
b0af6f41 122 /* Write the final tag */
c586dbf1 123 if (f->seal && f->writable)
b0af6f41 124 journal_file_append_tag(f);
feb12d3e 125#endif
b0af6f41 126
7560fffc 127 /* Sync everything to disk, before we mark the file offline */
16e9f408
LP
128 if (f->mmap && f->fd >= 0)
129 mmap_cache_close_fd(f->mmap, f->fd);
7560fffc 130
26687bf8 131 journal_file_set_offline(f);
cec736d2 132
26687bf8 133 if (f->header)
d384c7a8 134 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
cec736d2 135
0ac38b70
LP
136 if (f->fd >= 0)
137 close_nointr_nofail(f->fd);
138
cec736d2 139 free(f->path);
807e17f0 140
16e9f408
LP
141 if (f->mmap)
142 mmap_cache_unref(f->mmap);
143
a4bcff5b
LP
144 hashmap_free_free(f->chain_cache);
145
807e17f0
LP
146#ifdef HAVE_XZ
147 free(f->compress_buffer);
148#endif
149
7560fffc 150#ifdef HAVE_GCRYPT
baed47c3
LP
151 if (f->fss_file)
152 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
b7c9ae91
LP
153 else if (f->fsprg_state)
154 free(f->fsprg_state);
155
156 free(f->fsprg_seed);
7560fffc
LP
157
158 if (f->hmac)
159 gcry_md_close(f->hmac);
160#endif
161
cec736d2
LP
162 free(f);
163}
164
0ac38b70 165static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
166 Header h;
167 ssize_t k;
168 int r;
169
170 assert(f);
171
172 zero(h);
7560fffc 173 memcpy(h.signature, HEADER_SIGNATURE, 8);
23b0b2b2 174 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2 175
7560fffc
LP
176 h.incompatible_flags =
177 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
178
179 h.compatible_flags =
baed47c3 180 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
7560fffc 181
cec736d2
LP
182 r = sd_id128_randomize(&h.file_id);
183 if (r < 0)
184 return r;
185
0ac38b70
LP
186 if (template) {
187 h.seqnum_id = template->header->seqnum_id;
beec0085 188 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
0ac38b70
LP
189 } else
190 h.seqnum_id = h.file_id;
cec736d2
LP
191
192 k = pwrite(f->fd, &h, sizeof(h), 0);
193 if (k < 0)
194 return -errno;
195
196 if (k != sizeof(h))
197 return -EIO;
198
199 return 0;
200}
201
202static int journal_file_refresh_header(JournalFile *f) {
203 int r;
de190aef 204 sd_id128_t boot_id;
cec736d2
LP
205
206 assert(f);
207
208 r = sd_id128_get_machine(&f->header->machine_id);
209 if (r < 0)
210 return r;
211
de190aef 212 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
213 if (r < 0)
214 return r;
215
de190aef
LP
216 if (sd_id128_equal(boot_id, f->header->boot_id))
217 f->tail_entry_monotonic_valid = true;
218
219 f->header->boot_id = boot_id;
220
26687bf8 221 journal_file_set_online(f);
b788cc23 222
7560fffc 223 /* Sync the online state to disk */
a676e665 224 fsync(f->fd);
b788cc23 225
cec736d2
LP
226 return 0;
227}
228
229static int journal_file_verify_header(JournalFile *f) {
230 assert(f);
231
7560fffc 232 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
cec736d2
LP
233 return -EBADMSG;
234
7560fffc
LP
235 /* In both read and write mode we refuse to open files with
236 * incompatible flags we don't know */
807e17f0 237#ifdef HAVE_XZ
7560fffc 238 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
807e17f0
LP
239 return -EPROTONOSUPPORT;
240#else
cec736d2
LP
241 if (f->header->incompatible_flags != 0)
242 return -EPROTONOSUPPORT;
807e17f0 243#endif
cec736d2 244
7560fffc
LP
245 /* When open for writing we refuse to open files with
246 * compatible flags, too */
247 if (f->writable) {
248#ifdef HAVE_GCRYPT
baed47c3 249 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
7560fffc
LP
250 return -EPROTONOSUPPORT;
251#else
252 if (f->header->compatible_flags != 0)
253 return -EPROTONOSUPPORT;
254#endif
255 }
256
db11ac1a
LP
257 if (f->header->state >= _STATE_MAX)
258 return -EBADMSG;
259
dca6219e
LP
260 /* The first addition was n_data, so check that we are at least this large */
261 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
23b0b2b2
LP
262 return -EBADMSG;
263
8088cbd3 264 if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
beec0085
LP
265 return -EBADMSG;
266
db11ac1a
LP
267 if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
268 return -ENODATA;
269
270 if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
271 return -ENODATA;
272
7762e02b
LP
273 if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
274 !VALID64(le64toh(f->header->field_hash_table_offset)) ||
275 !VALID64(le64toh(f->header->tail_object_offset)) ||
276 !VALID64(le64toh(f->header->entry_array_offset)))
277 return -ENODATA;
278
279 if (le64toh(f->header->data_hash_table_offset) < le64toh(f->header->header_size) ||
280 le64toh(f->header->field_hash_table_offset) < le64toh(f->header->header_size) ||
281 le64toh(f->header->tail_object_offset) < le64toh(f->header->header_size) ||
282 le64toh(f->header->entry_array_offset) < le64toh(f->header->header_size))
cec736d2
LP
283 return -ENODATA;
284
285 if (f->writable) {
ccdbaf91 286 uint8_t state;
cec736d2
LP
287 sd_id128_t machine_id;
288 int r;
289
290 r = sd_id128_get_machine(&machine_id);
291 if (r < 0)
292 return r;
293
294 if (!sd_id128_equal(machine_id, f->header->machine_id))
295 return -EHOSTDOWN;
296
de190aef 297 state = f->header->state;
cec736d2 298
71fa6f00
LP
299 if (state == STATE_ONLINE) {
300 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
301 return -EBUSY;
302 } else if (state == STATE_ARCHIVED)
cec736d2 303 return -ESHUTDOWN;
71fa6f00
LP
304 else if (state != STATE_OFFLINE) {
305 log_debug("Journal file %s has unknown state %u.", f->path, state);
306 return -EBUSY;
307 }
cec736d2
LP
308 }
309
8088cbd3 310 f->compress = JOURNAL_HEADER_COMPRESSED(f->header);
c586dbf1 311
f1889c91 312 f->seal = JOURNAL_HEADER_SEALED(f->header);
7560fffc 313
cec736d2
LP
314 return 0;
315}
316
317static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
eda4b58b 318 uint64_t old_size, new_size;
fec2aa2f 319 int r;
cec736d2
LP
320
321 assert(f);
322
cec736d2 323 /* We assume that this file is not sparse, and we know that
38ac38b2 324 * for sure, since we always call posix_fallocate()
cec736d2
LP
325 * ourselves */
326
327 old_size =
23b0b2b2 328 le64toh(f->header->header_size) +
cec736d2
LP
329 le64toh(f->header->arena_size);
330
bc85bfee 331 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
332 if (new_size < le64toh(f->header->header_size))
333 new_size = le64toh(f->header->header_size);
bc85bfee
LP
334
335 if (new_size <= old_size)
cec736d2
LP
336 return 0;
337
a676e665 338 if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
bc85bfee 339 return -E2BIG;
cec736d2 340
a676e665 341 if (new_size > f->metrics.min_size && f->metrics.keep_free > 0) {
cec736d2
LP
342 struct statvfs svfs;
343
344 if (fstatvfs(f->fd, &svfs) >= 0) {
345 uint64_t available;
346
347 available = svfs.f_bfree * svfs.f_bsize;
348
bc85bfee
LP
349 if (available >= f->metrics.keep_free)
350 available -= f->metrics.keep_free;
cec736d2
LP
351 else
352 available = 0;
353
354 if (new_size - old_size > available)
355 return -E2BIG;
356 }
357 }
358
eda4b58b
LP
359 /* Increase by larger blocks at once */
360 new_size = ((new_size+FILE_SIZE_INCREASE-1) / FILE_SIZE_INCREASE) * FILE_SIZE_INCREASE;
361 if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
362 new_size = f->metrics.max_size;
363
bc85bfee
LP
364 /* Note that the glibc fallocate() fallback is very
365 inefficient, hence we try to minimize the allocation area
366 as we can. */
fec2aa2f
GV
367 r = posix_fallocate(f->fd, old_size, new_size - old_size);
368 if (r != 0)
369 return -r;
cec736d2 370
eda4b58b
LP
371 if (fstat(f->fd, &f->last_stat) < 0)
372 return -errno;
cec736d2 373
23b0b2b2 374 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
375
376 return 0;
377}
378
fcde2389 379static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
cec736d2 380 assert(f);
cec736d2
LP
381 assert(ret);
382
7762e02b
LP
383 if (size <= 0)
384 return -EINVAL;
385
2a59ea54 386 /* Avoid SIGBUS on invalid accesses */
4bbdcdb3
LP
387 if (offset + size > (uint64_t) f->last_stat.st_size) {
388 /* Hmm, out of range? Let's refresh the fstat() data
389 * first, before we trust that check. */
390
391 if (fstat(f->fd, &f->last_stat) < 0 ||
392 offset + size > (uint64_t) f->last_stat.st_size)
393 return -EADDRNOTAVAIL;
394 }
395
fcde2389 396 return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
cec736d2
LP
397}
398
16e9f408
LP
399static uint64_t minimum_header_size(Object *o) {
400
b8e891e6 401 static const uint64_t table[] = {
16e9f408
LP
402 [OBJECT_DATA] = sizeof(DataObject),
403 [OBJECT_FIELD] = sizeof(FieldObject),
404 [OBJECT_ENTRY] = sizeof(EntryObject),
405 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
406 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
407 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
408 [OBJECT_TAG] = sizeof(TagObject),
409 };
410
411 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
412 return sizeof(ObjectHeader);
413
414 return table[o->object.type];
415}
416
de190aef 417int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
418 int r;
419 void *t;
420 Object *o;
421 uint64_t s;
16e9f408 422 unsigned context;
cec736d2
LP
423
424 assert(f);
425 assert(ret);
426
db11ac1a
LP
427 /* Objects may only be located at multiple of 64 bit */
428 if (!VALID64(offset))
429 return -EFAULT;
430
16e9f408
LP
431 /* One context for each type, plus one catch-all for the rest */
432 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
433
fcde2389 434 r = journal_file_move_to(f, context, false, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
435 if (r < 0)
436 return r;
437
438 o = (Object*) t;
439 s = le64toh(o->object.size);
440
441 if (s < sizeof(ObjectHeader))
442 return -EBADMSG;
443
16e9f408
LP
444 if (o->object.type <= OBJECT_UNUSED)
445 return -EBADMSG;
446
447 if (s < minimum_header_size(o))
448 return -EBADMSG;
449
3c1668da 450 if (type > 0 && o->object.type != type)
cec736d2
LP
451 return -EBADMSG;
452
453 if (s > sizeof(ObjectHeader)) {
fcde2389 454 r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
cec736d2
LP
455 if (r < 0)
456 return r;
457
458 o = (Object*) t;
459 }
460
cec736d2
LP
461 *ret = o;
462 return 0;
463}
464
d98cc1f2 465static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
466 uint64_t r;
467
468 assert(f);
469
beec0085 470 r = le64toh(f->header->tail_entry_seqnum) + 1;
c2373f84
LP
471
472 if (seqnum) {
de190aef 473 /* If an external seqnum counter was passed, we update
c2373f84
LP
474 * both the local and the external one, and set it to
475 * the maximum of both */
476
477 if (*seqnum + 1 > r)
478 r = *seqnum + 1;
479
480 *seqnum = r;
481 }
482
beec0085 483 f->header->tail_entry_seqnum = htole64(r);
cec736d2 484
beec0085
LP
485 if (f->header->head_entry_seqnum == 0)
486 f->header->head_entry_seqnum = htole64(r);
de190aef 487
cec736d2
LP
488 return r;
489}
490
0284adc6 491int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
492 int r;
493 uint64_t p;
494 Object *tail, *o;
495 void *t;
496
497 assert(f);
16e9f408 498 assert(type > 0 && type < _OBJECT_TYPE_MAX);
cec736d2
LP
499 assert(size >= sizeof(ObjectHeader));
500 assert(offset);
501 assert(ret);
502
26687bf8
OS
503 r = journal_file_set_online(f);
504 if (r < 0)
505 return r;
506
cec736d2 507 p = le64toh(f->header->tail_object_offset);
cec736d2 508 if (p == 0)
23b0b2b2 509 p = le64toh(f->header->header_size);
cec736d2 510 else {
de190aef 511 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
512 if (r < 0)
513 return r;
514
515 p += ALIGN64(le64toh(tail->object.size));
516 }
517
518 r = journal_file_allocate(f, p, size);
519 if (r < 0)
520 return r;
521
fcde2389 522 r = journal_file_move_to(f, type, false, p, size, &t);
cec736d2
LP
523 if (r < 0)
524 return r;
525
526 o = (Object*) t;
527
528 zero(o->object);
de190aef 529 o->object.type = type;
cec736d2
LP
530 o->object.size = htole64(size);
531
532 f->header->tail_object_offset = htole64(p);
cec736d2
LP
533 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
534
535 *ret = o;
536 *offset = p;
537
538 return 0;
539}
540
de190aef 541static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
542 uint64_t s, p;
543 Object *o;
544 int r;
545
546 assert(f);
547
dfabe643 548 /* We estimate that we need 1 hash table entry per 768 of
4a92baf3
LP
549 journal file and we want to make sure we never get beyond
550 75% fill level. Calculate the hash table size for the
551 maximum file size based on these metrics. */
552
dfabe643 553 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
4a92baf3
LP
554 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
555 s = DEFAULT_DATA_HASH_TABLE_SIZE;
556
507f22bd 557 log_debug("Reserving %"PRIu64" entries in hash table.", s / sizeof(HashItem));
4a92baf3 558
de190aef
LP
559 r = journal_file_append_object(f,
560 OBJECT_DATA_HASH_TABLE,
561 offsetof(Object, hash_table.items) + s,
562 &o, &p);
cec736d2
LP
563 if (r < 0)
564 return r;
565
de190aef 566 memset(o->hash_table.items, 0, s);
cec736d2 567
de190aef
LP
568 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
569 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
570
571 return 0;
572}
573
de190aef 574static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
575 uint64_t s, p;
576 Object *o;
577 int r;
578
579 assert(f);
580
3c1668da
LP
581 /* We use a fixed size hash table for the fields as this
582 * number should grow very slowly only */
583
de190aef
LP
584 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
585 r = journal_file_append_object(f,
586 OBJECT_FIELD_HASH_TABLE,
587 offsetof(Object, hash_table.items) + s,
588 &o, &p);
cec736d2
LP
589 if (r < 0)
590 return r;
591
de190aef 592 memset(o->hash_table.items, 0, s);
cec736d2 593
de190aef
LP
594 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
595 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
596
597 return 0;
598}
599
de190aef 600static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
601 uint64_t s, p;
602 void *t;
603 int r;
604
605 assert(f);
606
de190aef
LP
607 p = le64toh(f->header->data_hash_table_offset);
608 s = le64toh(f->header->data_hash_table_size);
cec736d2 609
de190aef 610 r = journal_file_move_to(f,
16e9f408 611 OBJECT_DATA_HASH_TABLE,
fcde2389 612 true,
de190aef
LP
613 p, s,
614 &t);
cec736d2
LP
615 if (r < 0)
616 return r;
617
de190aef 618 f->data_hash_table = t;
cec736d2
LP
619 return 0;
620}
621
de190aef 622static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
623 uint64_t s, p;
624 void *t;
625 int r;
626
627 assert(f);
628
de190aef
LP
629 p = le64toh(f->header->field_hash_table_offset);
630 s = le64toh(f->header->field_hash_table_size);
cec736d2 631
de190aef 632 r = journal_file_move_to(f,
16e9f408 633 OBJECT_FIELD_HASH_TABLE,
fcde2389 634 true,
de190aef
LP
635 p, s,
636 &t);
cec736d2
LP
637 if (r < 0)
638 return r;
639
de190aef 640 f->field_hash_table = t;
cec736d2
LP
641 return 0;
642}
643
3c1668da
LP
644static int journal_file_link_field(
645 JournalFile *f,
646 Object *o,
647 uint64_t offset,
648 uint64_t hash) {
649
650 uint64_t p, h;
651 int r;
652
653 assert(f);
654 assert(o);
655 assert(offset > 0);
656
657 if (o->object.type != OBJECT_FIELD)
658 return -EINVAL;
659
660 /* This might alter the window we are looking at */
661
662 o->field.next_hash_offset = o->field.head_data_offset = 0;
663
664 h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
665 p = le64toh(f->field_hash_table[h].tail_hash_offset);
666 if (p == 0)
667 f->field_hash_table[h].head_hash_offset = htole64(offset);
668 else {
669 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
670 if (r < 0)
671 return r;
672
673 o->field.next_hash_offset = htole64(offset);
674 }
675
676 f->field_hash_table[h].tail_hash_offset = htole64(offset);
677
678 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
679 f->header->n_fields = htole64(le64toh(f->header->n_fields) + 1);
680
681 return 0;
682}
683
684static int journal_file_link_data(
685 JournalFile *f,
686 Object *o,
687 uint64_t offset,
688 uint64_t hash) {
689
de190aef 690 uint64_t p, h;
cec736d2
LP
691 int r;
692
693 assert(f);
694 assert(o);
695 assert(offset > 0);
b588975f
LP
696
697 if (o->object.type != OBJECT_DATA)
698 return -EINVAL;
cec736d2 699
48496df6
LP
700 /* This might alter the window we are looking at */
701
de190aef
LP
702 o->data.next_hash_offset = o->data.next_field_offset = 0;
703 o->data.entry_offset = o->data.entry_array_offset = 0;
704 o->data.n_entries = 0;
cec736d2 705
de190aef 706 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
8db4213e 707 p = le64toh(f->data_hash_table[h].tail_hash_offset);
3c1668da 708 if (p == 0)
cec736d2 709 /* Only entry in the hash table is easy */
de190aef 710 f->data_hash_table[h].head_hash_offset = htole64(offset);
3c1668da 711 else {
48496df6
LP
712 /* Move back to the previous data object, to patch in
713 * pointer */
cec736d2 714
de190aef 715 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
716 if (r < 0)
717 return r;
718
de190aef 719 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
720 }
721
de190aef 722 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2 723
dca6219e
LP
724 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
725 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
726
cec736d2
LP
727 return 0;
728}
729
3c1668da
LP
730int journal_file_find_field_object_with_hash(
731 JournalFile *f,
732 const void *field, uint64_t size, uint64_t hash,
733 Object **ret, uint64_t *offset) {
734
735 uint64_t p, osize, h;
736 int r;
737
738 assert(f);
739 assert(field && size > 0);
740
741 osize = offsetof(Object, field.payload) + size;
742
743 if (f->header->field_hash_table_size == 0)
744 return -EBADMSG;
745
746 h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
747 p = le64toh(f->field_hash_table[h].head_hash_offset);
748
749 while (p > 0) {
750 Object *o;
751
752 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
753 if (r < 0)
754 return r;
755
756 if (le64toh(o->field.hash) == hash &&
757 le64toh(o->object.size) == osize &&
758 memcmp(o->field.payload, field, size) == 0) {
759
760 if (ret)
761 *ret = o;
762 if (offset)
763 *offset = p;
764
765 return 1;
766 }
767
768 p = le64toh(o->field.next_hash_offset);
769 }
770
771 return 0;
772}
773
774int journal_file_find_field_object(
775 JournalFile *f,
776 const void *field, uint64_t size,
777 Object **ret, uint64_t *offset) {
778
779 uint64_t hash;
780
781 assert(f);
782 assert(field && size > 0);
783
784 hash = hash64(field, size);
785
786 return journal_file_find_field_object_with_hash(f,
787 field, size, hash,
788 ret, offset);
789}
790
de190aef
LP
791int journal_file_find_data_object_with_hash(
792 JournalFile *f,
793 const void *data, uint64_t size, uint64_t hash,
794 Object **ret, uint64_t *offset) {
48496df6 795
de190aef 796 uint64_t p, osize, h;
cec736d2
LP
797 int r;
798
799 assert(f);
800 assert(data || size == 0);
801
802 osize = offsetof(Object, data.payload) + size;
803
bc85bfee
LP
804 if (f->header->data_hash_table_size == 0)
805 return -EBADMSG;
806
de190aef
LP
807 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
808 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 809
de190aef
LP
810 while (p > 0) {
811 Object *o;
cec736d2 812
de190aef 813 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
814 if (r < 0)
815 return r;
816
807e17f0 817 if (le64toh(o->data.hash) != hash)
85a131e8 818 goto next;
807e17f0
LP
819
820 if (o->object.flags & OBJECT_COMPRESSED) {
821#ifdef HAVE_XZ
b785c858 822 uint64_t l, rsize;
cec736d2 823
807e17f0
LP
824 l = le64toh(o->object.size);
825 if (l <= offsetof(Object, data.payload))
cec736d2
LP
826 return -EBADMSG;
827
807e17f0
LP
828 l -= offsetof(Object, data.payload);
829
93b73b06 830 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize, 0))
807e17f0
LP
831 return -EBADMSG;
832
b785c858 833 if (rsize == size &&
807e17f0
LP
834 memcmp(f->compress_buffer, data, size) == 0) {
835
836 if (ret)
837 *ret = o;
838
839 if (offset)
840 *offset = p;
841
842 return 1;
843 }
844#else
845 return -EPROTONOSUPPORT;
846#endif
847
848 } else if (le64toh(o->object.size) == osize &&
849 memcmp(o->data.payload, data, size) == 0) {
850
cec736d2
LP
851 if (ret)
852 *ret = o;
853
854 if (offset)
855 *offset = p;
856
de190aef 857 return 1;
cec736d2
LP
858 }
859
85a131e8 860 next:
cec736d2
LP
861 p = le64toh(o->data.next_hash_offset);
862 }
863
de190aef
LP
864 return 0;
865}
866
867int journal_file_find_data_object(
868 JournalFile *f,
869 const void *data, uint64_t size,
870 Object **ret, uint64_t *offset) {
871
872 uint64_t hash;
873
874 assert(f);
875 assert(data || size == 0);
876
877 hash = hash64(data, size);
878
879 return journal_file_find_data_object_with_hash(f,
880 data, size, hash,
881 ret, offset);
882}
883
3c1668da
LP
884static int journal_file_append_field(
885 JournalFile *f,
886 const void *field, uint64_t size,
887 Object **ret, uint64_t *offset) {
888
889 uint64_t hash, p;
890 uint64_t osize;
891 Object *o;
892 int r;
893
894 assert(f);
895 assert(field && size > 0);
896
897 hash = hash64(field, size);
898
899 r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
900 if (r < 0)
901 return r;
902 else if (r > 0) {
903
904 if (ret)
905 *ret = o;
906
907 if (offset)
908 *offset = p;
909
910 return 0;
911 }
912
913 osize = offsetof(Object, field.payload) + size;
914 r = journal_file_append_object(f, OBJECT_FIELD, osize, &o, &p);
8c92d4bb
LP
915 if (r < 0)
916 return r;
3c1668da
LP
917
918 o->field.hash = htole64(hash);
919 memcpy(o->field.payload, field, size);
920
921 r = journal_file_link_field(f, o, p, hash);
922 if (r < 0)
923 return r;
924
925 /* The linking might have altered the window, so let's
926 * refresh our pointer */
927 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
928 if (r < 0)
929 return r;
930
931#ifdef HAVE_GCRYPT
932 r = journal_file_hmac_put_object(f, OBJECT_FIELD, o, p);
933 if (r < 0)
934 return r;
935#endif
936
937 if (ret)
938 *ret = o;
939
940 if (offset)
941 *offset = p;
942
943 return 0;
944}
945
48496df6
LP
946static int journal_file_append_data(
947 JournalFile *f,
948 const void *data, uint64_t size,
949 Object **ret, uint64_t *offset) {
950
de190aef
LP
951 uint64_t hash, p;
952 uint64_t osize;
953 Object *o;
954 int r;
807e17f0 955 bool compressed = false;
3c1668da 956 const void *eq;
de190aef
LP
957
958 assert(f);
959 assert(data || size == 0);
960
961 hash = hash64(data, size);
962
963 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
964 if (r < 0)
965 return r;
966 else if (r > 0) {
967
968 if (ret)
969 *ret = o;
970
971 if (offset)
972 *offset = p;
973
974 return 0;
975 }
976
977 osize = offsetof(Object, data.payload) + size;
978 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
979 if (r < 0)
980 return r;
981
cec736d2 982 o->data.hash = htole64(hash);
807e17f0
LP
983
984#ifdef HAVE_XZ
985 if (f->compress &&
986 size >= COMPRESSION_SIZE_THRESHOLD) {
987 uint64_t rsize;
988
989 compressed = compress_blob(data, size, o->data.payload, &rsize);
990
991 if (compressed) {
992 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
993 o->object.flags |= OBJECT_COMPRESSED;
994
507f22bd 995 log_debug("Compressed data object %"PRIu64" -> %"PRIu64, size, rsize);
807e17f0
LP
996 }
997 }
998#endif
999
64825d3c 1000 if (!compressed && size > 0)
807e17f0 1001 memcpy(o->data.payload, data, size);
cec736d2 1002
de190aef 1003 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
1004 if (r < 0)
1005 return r;
1006
48496df6
LP
1007 /* The linking might have altered the window, so let's
1008 * refresh our pointer */
1009 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1010 if (r < 0)
1011 return r;
1012
08c6f819
SL
1013 if (!data)
1014 eq = NULL;
1015 else
1016 eq = memchr(data, '=', size);
3c1668da
LP
1017 if (eq && eq > data) {
1018 uint64_t fp;
1019 Object *fo;
1020
1021 /* Create field object ... */
1022 r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp);
1023 if (r < 0)
1024 return r;
1025
1026 /* ... and link it in. */
1027 o->data.next_field_offset = fo->field.head_data_offset;
1028 fo->field.head_data_offset = le64toh(p);
1029 }
1030
5996c7c2
LP
1031#ifdef HAVE_GCRYPT
1032 r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
1033 if (r < 0)
1034 return r;
1035#endif
1036
cec736d2
LP
1037 if (ret)
1038 *ret = o;
1039
1040 if (offset)
de190aef 1041 *offset = p;
cec736d2
LP
1042
1043 return 0;
1044}
1045
1046uint64_t journal_file_entry_n_items(Object *o) {
1047 assert(o);
b588975f
LP
1048
1049 if (o->object.type != OBJECT_ENTRY)
1050 return 0;
cec736d2
LP
1051
1052 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
1053}
1054
0284adc6 1055uint64_t journal_file_entry_array_n_items(Object *o) {
de190aef 1056 assert(o);
b588975f
LP
1057
1058 if (o->object.type != OBJECT_ENTRY_ARRAY)
1059 return 0;
de190aef
LP
1060
1061 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
1062}
1063
fb9a24b6
LP
1064uint64_t journal_file_hash_table_n_items(Object *o) {
1065 assert(o);
b588975f
LP
1066
1067 if (o->object.type != OBJECT_DATA_HASH_TABLE &&
1068 o->object.type != OBJECT_FIELD_HASH_TABLE)
1069 return 0;
fb9a24b6
LP
1070
1071 return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
1072}
1073
de190aef 1074static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
1075 le64_t *first,
1076 le64_t *idx,
de190aef 1077 uint64_t p) {
cec736d2 1078 int r;
de190aef
LP
1079 uint64_t n = 0, ap = 0, q, i, a, hidx;
1080 Object *o;
1081
cec736d2 1082 assert(f);
de190aef
LP
1083 assert(first);
1084 assert(idx);
1085 assert(p > 0);
cec736d2 1086
de190aef
LP
1087 a = le64toh(*first);
1088 i = hidx = le64toh(*idx);
1089 while (a > 0) {
1090
1091 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1092 if (r < 0)
1093 return r;
cec736d2 1094
de190aef
LP
1095 n = journal_file_entry_array_n_items(o);
1096 if (i < n) {
1097 o->entry_array.items[i] = htole64(p);
1098 *idx = htole64(hidx + 1);
1099 return 0;
1100 }
cec736d2 1101
de190aef
LP
1102 i -= n;
1103 ap = a;
1104 a = le64toh(o->entry_array.next_entry_array_offset);
1105 }
1106
1107 if (hidx > n)
1108 n = (hidx+1) * 2;
1109 else
1110 n = n * 2;
1111
1112 if (n < 4)
1113 n = 4;
1114
1115 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
1116 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
1117 &o, &q);
cec736d2
LP
1118 if (r < 0)
1119 return r;
1120
feb12d3e 1121#ifdef HAVE_GCRYPT
5996c7c2 1122 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
b0af6f41
LP
1123 if (r < 0)
1124 return r;
feb12d3e 1125#endif
b0af6f41 1126
de190aef 1127 o->entry_array.items[i] = htole64(p);
cec736d2 1128
de190aef 1129 if (ap == 0)
7be3aa17 1130 *first = htole64(q);
cec736d2 1131 else {
de190aef 1132 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
1133 if (r < 0)
1134 return r;
1135
de190aef
LP
1136 o->entry_array.next_entry_array_offset = htole64(q);
1137 }
cec736d2 1138
2dee23eb
LP
1139 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1140 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
1141
de190aef
LP
1142 *idx = htole64(hidx + 1);
1143
1144 return 0;
1145}
cec736d2 1146
de190aef 1147static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
1148 le64_t *extra,
1149 le64_t *first,
1150 le64_t *idx,
de190aef
LP
1151 uint64_t p) {
1152
1153 int r;
1154
1155 assert(f);
1156 assert(extra);
1157 assert(first);
1158 assert(idx);
1159 assert(p > 0);
1160
1161 if (*idx == 0)
1162 *extra = htole64(p);
1163 else {
4fd052ae 1164 le64_t i;
de190aef 1165
7be3aa17 1166 i = htole64(le64toh(*idx) - 1);
de190aef
LP
1167 r = link_entry_into_array(f, first, &i, p);
1168 if (r < 0)
1169 return r;
cec736d2
LP
1170 }
1171
de190aef
LP
1172 *idx = htole64(le64toh(*idx) + 1);
1173 return 0;
1174}
1175
1176static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
1177 uint64_t p;
1178 int r;
1179 assert(f);
1180 assert(o);
1181 assert(offset > 0);
1182
1183 p = le64toh(o->entry.items[i].object_offset);
1184 if (p == 0)
1185 return -EINVAL;
1186
1187 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
1188 if (r < 0)
1189 return r;
1190
de190aef
LP
1191 return link_entry_into_array_plus_one(f,
1192 &o->data.entry_offset,
1193 &o->data.entry_array_offset,
1194 &o->data.n_entries,
1195 offset);
cec736d2
LP
1196}
1197
1198static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 1199 uint64_t n, i;
cec736d2
LP
1200 int r;
1201
1202 assert(f);
1203 assert(o);
1204 assert(offset > 0);
b588975f
LP
1205
1206 if (o->object.type != OBJECT_ENTRY)
1207 return -EINVAL;
cec736d2 1208
b788cc23
LP
1209 __sync_synchronize();
1210
cec736d2 1211 /* Link up the entry itself */
de190aef
LP
1212 r = link_entry_into_array(f,
1213 &f->header->entry_array_offset,
1214 &f->header->n_entries,
1215 offset);
1216 if (r < 0)
1217 return r;
cec736d2 1218
507f22bd 1219 /* log_debug("=> %s seqnr=%"PRIu64" n_entries=%"PRIu64, f->path, o->entry.seqnum, f->header->n_entries); */
cec736d2 1220
de190aef 1221 if (f->header->head_entry_realtime == 0)
0ac38b70 1222 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 1223
0ac38b70 1224 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
1225 f->header->tail_entry_monotonic = o->entry.monotonic;
1226
1227 f->tail_entry_monotonic_valid = true;
cec736d2
LP
1228
1229 /* Link up the items */
1230 n = journal_file_entry_n_items(o);
1231 for (i = 0; i < n; i++) {
1232 r = journal_file_link_entry_item(f, o, offset, i);
1233 if (r < 0)
1234 return r;
1235 }
1236
cec736d2
LP
1237 return 0;
1238}
1239
1240static int journal_file_append_entry_internal(
1241 JournalFile *f,
1242 const dual_timestamp *ts,
1243 uint64_t xor_hash,
1244 const EntryItem items[], unsigned n_items,
de190aef 1245 uint64_t *seqnum,
cec736d2
LP
1246 Object **ret, uint64_t *offset) {
1247 uint64_t np;
1248 uint64_t osize;
1249 Object *o;
1250 int r;
1251
1252 assert(f);
1253 assert(items || n_items == 0);
de190aef 1254 assert(ts);
cec736d2
LP
1255
1256 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1257
de190aef 1258 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
1259 if (r < 0)
1260 return r;
1261
d98cc1f2 1262 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
cec736d2 1263 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
1264 o->entry.realtime = htole64(ts->realtime);
1265 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
1266 o->entry.xor_hash = htole64(xor_hash);
1267 o->entry.boot_id = f->header->boot_id;
1268
feb12d3e 1269#ifdef HAVE_GCRYPT
5996c7c2 1270 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
b0af6f41
LP
1271 if (r < 0)
1272 return r;
feb12d3e 1273#endif
b0af6f41 1274
cec736d2
LP
1275 r = journal_file_link_entry(f, o, np);
1276 if (r < 0)
1277 return r;
1278
1279 if (ret)
1280 *ret = o;
1281
1282 if (offset)
1283 *offset = np;
1284
1285 return 0;
1286}
1287
cf244689 1288void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
1289 assert(f);
1290
1291 /* inotify() does not receive IN_MODIFY events from file
1292 * accesses done via mmap(). After each access we hence
1293 * trigger IN_MODIFY by truncating the journal file to its
1294 * current size which triggers IN_MODIFY. */
1295
bc85bfee
LP
1296 __sync_synchronize();
1297
50f20cfd 1298 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
c5315881 1299 log_error("Failed to truncate file to its own size: %m");
50f20cfd
LP
1300}
1301
1f2da9ec
LP
1302static int entry_item_cmp(const void *_a, const void *_b) {
1303 const EntryItem *a = _a, *b = _b;
1304
1305 if (le64toh(a->object_offset) < le64toh(b->object_offset))
1306 return -1;
1307 if (le64toh(a->object_offset) > le64toh(b->object_offset))
1308 return 1;
1309 return 0;
1310}
1311
de190aef 1312int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1313 unsigned i;
1314 EntryItem *items;
1315 int r;
1316 uint64_t xor_hash = 0;
de190aef 1317 struct dual_timestamp _ts;
cec736d2
LP
1318
1319 assert(f);
1320 assert(iovec || n_iovec == 0);
1321
de190aef
LP
1322 if (!ts) {
1323 dual_timestamp_get(&_ts);
1324 ts = &_ts;
1325 }
1326
1327 if (f->tail_entry_monotonic_valid &&
1328 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1329 return -EINVAL;
1330
feb12d3e 1331#ifdef HAVE_GCRYPT
7560fffc
LP
1332 r = journal_file_maybe_append_tag(f, ts->realtime);
1333 if (r < 0)
1334 return r;
feb12d3e 1335#endif
7560fffc 1336
64825d3c 1337 /* alloca() can't take 0, hence let's allocate at least one */
9607d947 1338 items = alloca(sizeof(EntryItem) * MAX(1u, n_iovec));
cec736d2
LP
1339
1340 for (i = 0; i < n_iovec; i++) {
1341 uint64_t p;
1342 Object *o;
1343
1344 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1345 if (r < 0)
cf244689 1346 return r;
cec736d2
LP
1347
1348 xor_hash ^= le64toh(o->data.hash);
1349 items[i].object_offset = htole64(p);
de7b95cd 1350 items[i].hash = o->data.hash;
cec736d2
LP
1351 }
1352
1f2da9ec
LP
1353 /* Order by the position on disk, in order to improve seek
1354 * times for rotating media. */
7ff7394d 1355 qsort_safe(items, n_iovec, sizeof(EntryItem), entry_item_cmp);
1f2da9ec 1356
de190aef 1357 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1358
50f20cfd
LP
1359 journal_file_post_change(f);
1360
cec736d2
LP
1361 return r;
1362}
1363
a4bcff5b
LP
1364typedef struct ChainCacheItem {
1365 uint64_t first; /* the array at the begin of the chain */
1366 uint64_t array; /* the cached array */
1367 uint64_t begin; /* the first item in the cached array */
1368 uint64_t total; /* the total number of items in all arrays before this one in the chain */
f268980d 1369 uint64_t last_index; /* the last index we looked at, to optimize locality when bisecting */
a4bcff5b
LP
1370} ChainCacheItem;
1371
1372static void chain_cache_put(
1373 Hashmap *h,
1374 ChainCacheItem *ci,
1375 uint64_t first,
1376 uint64_t array,
1377 uint64_t begin,
f268980d
LP
1378 uint64_t total,
1379 uint64_t last_index) {
a4bcff5b
LP
1380
1381 if (!ci) {
34741aa3
LP
1382 /* If the chain item to cache for this chain is the
1383 * first one it's not worth caching anything */
1384 if (array == first)
1385 return;
1386
a4bcff5b
LP
1387 if (hashmap_size(h) >= CHAIN_CACHE_MAX)
1388 ci = hashmap_steal_first(h);
1389 else {
1390 ci = new(ChainCacheItem, 1);
1391 if (!ci)
1392 return;
1393 }
1394
1395 ci->first = first;
1396
1397 if (hashmap_put(h, &ci->first, ci) < 0) {
1398 free(ci);
1399 return;
1400 }
1401 } else
1402 assert(ci->first == first);
1403
1404 ci->array = array;
1405 ci->begin = begin;
1406 ci->total = total;
f268980d 1407 ci->last_index = last_index;
a4bcff5b
LP
1408}
1409
f268980d
LP
1410static int generic_array_get(
1411 JournalFile *f,
1412 uint64_t first,
1413 uint64_t i,
1414 Object **ret, uint64_t *offset) {
de190aef 1415
cec736d2 1416 Object *o;
a4bcff5b 1417 uint64_t p = 0, a, t = 0;
cec736d2 1418 int r;
a4bcff5b 1419 ChainCacheItem *ci;
cec736d2
LP
1420
1421 assert(f);
1422
de190aef 1423 a = first;
a4bcff5b
LP
1424
1425 /* Try the chain cache first */
1426 ci = hashmap_get(f->chain_cache, &first);
1427 if (ci && i > ci->total) {
1428 a = ci->array;
1429 i -= ci->total;
1430 t = ci->total;
1431 }
1432
de190aef 1433 while (a > 0) {
a4bcff5b 1434 uint64_t k;
cec736d2 1435
de190aef
LP
1436 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1437 if (r < 0)
1438 return r;
cec736d2 1439
a4bcff5b
LP
1440 k = journal_file_entry_array_n_items(o);
1441 if (i < k) {
de190aef 1442 p = le64toh(o->entry_array.items[i]);
a4bcff5b 1443 goto found;
cec736d2
LP
1444 }
1445
a4bcff5b
LP
1446 i -= k;
1447 t += k;
de190aef
LP
1448 a = le64toh(o->entry_array.next_entry_array_offset);
1449 }
1450
a4bcff5b
LP
1451 return 0;
1452
1453found:
1454 /* Let's cache this item for the next invocation */
f268980d 1455 chain_cache_put(f->chain_cache, ci, first, a, o->entry_array.items[0], t, i);
de190aef
LP
1456
1457 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1458 if (r < 0)
1459 return r;
1460
1461 if (ret)
1462 *ret = o;
1463
1464 if (offset)
1465 *offset = p;
1466
1467 return 1;
1468}
1469
f268980d
LP
1470static int generic_array_get_plus_one(
1471 JournalFile *f,
1472 uint64_t extra,
1473 uint64_t first,
1474 uint64_t i,
1475 Object **ret, uint64_t *offset) {
de190aef
LP
1476
1477 Object *o;
1478
1479 assert(f);
1480
1481 if (i == 0) {
1482 int r;
1483
1484 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1485 if (r < 0)
1486 return r;
1487
de190aef
LP
1488 if (ret)
1489 *ret = o;
cec736d2 1490
de190aef
LP
1491 if (offset)
1492 *offset = extra;
cec736d2 1493
de190aef 1494 return 1;
cec736d2
LP
1495 }
1496
de190aef
LP
1497 return generic_array_get(f, first, i-1, ret, offset);
1498}
cec736d2 1499
de190aef
LP
1500enum {
1501 TEST_FOUND,
1502 TEST_LEFT,
1503 TEST_RIGHT
1504};
cec736d2 1505
f268980d
LP
1506static int generic_array_bisect(
1507 JournalFile *f,
1508 uint64_t first,
1509 uint64_t n,
1510 uint64_t needle,
1511 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1512 direction_t direction,
1513 Object **ret,
1514 uint64_t *offset,
1515 uint64_t *idx) {
1516
1517 uint64_t a, p, t = 0, i = 0, last_p = 0, last_index = (uint64_t) -1;
de190aef
LP
1518 bool subtract_one = false;
1519 Object *o, *array = NULL;
1520 int r;
a4bcff5b 1521 ChainCacheItem *ci;
cec736d2 1522
de190aef
LP
1523 assert(f);
1524 assert(test_object);
cec736d2 1525
a4bcff5b 1526 /* Start with the first array in the chain */
de190aef 1527 a = first;
a4bcff5b
LP
1528
1529 ci = hashmap_get(f->chain_cache, &first);
1530 if (ci && n > ci->total) {
1531 /* Ah, we have iterated this bisection array chain
1532 * previously! Let's see if we can skip ahead in the
1533 * chain, as far as the last time. But we can't jump
1534 * backwards in the chain, so let's check that
1535 * first. */
1536
1537 r = test_object(f, ci->begin, needle);
1538 if (r < 0)
1539 return r;
1540
1541 if (r == TEST_LEFT) {
f268980d 1542 /* OK, what we are looking for is right of the
a4bcff5b
LP
1543 * begin of this EntryArray, so let's jump
1544 * straight to previously cached array in the
1545 * chain */
1546
1547 a = ci->array;
1548 n -= ci->total;
1549 t = ci->total;
f268980d 1550 last_index = ci->last_index;
a4bcff5b
LP
1551 }
1552 }
1553
de190aef
LP
1554 while (a > 0) {
1555 uint64_t left, right, k, lp;
1556
1557 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1558 if (r < 0)
1559 return r;
1560
de190aef
LP
1561 k = journal_file_entry_array_n_items(array);
1562 right = MIN(k, n);
1563 if (right <= 0)
1564 return 0;
cec736d2 1565
de190aef
LP
1566 i = right - 1;
1567 lp = p = le64toh(array->entry_array.items[i]);
1568 if (p <= 0)
1569 return -EBADMSG;
cec736d2 1570
de190aef
LP
1571 r = test_object(f, p, needle);
1572 if (r < 0)
1573 return r;
cec736d2 1574
de190aef
LP
1575 if (r == TEST_FOUND)
1576 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1577
1578 if (r == TEST_RIGHT) {
1579 left = 0;
1580 right -= 1;
f268980d
LP
1581
1582 if (last_index != (uint64_t) -1) {
1583 assert(last_index <= right);
1584
1585 /* If we cached the last index we
1586 * looked at, let's try to not to jump
1587 * too wildly around and see if we can
1588 * limit the range to look at early to
1589 * the immediate neighbors of the last
1590 * index we looked at. */
1591
1592 if (last_index > 0) {
1593 uint64_t x = last_index - 1;
1594
1595 p = le64toh(array->entry_array.items[x]);
1596 if (p <= 0)
1597 return -EBADMSG;
1598
1599 r = test_object(f, p, needle);
1600 if (r < 0)
1601 return r;
1602
1603 if (r == TEST_FOUND)
1604 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1605
1606 if (r == TEST_RIGHT)
1607 right = x;
1608 else
1609 left = x + 1;
1610 }
1611
1612 if (last_index < right) {
1613 uint64_t y = last_index + 1;
1614
1615 p = le64toh(array->entry_array.items[y]);
1616 if (p <= 0)
1617 return -EBADMSG;
1618
1619 r = test_object(f, p, needle);
1620 if (r < 0)
1621 return r;
1622
1623 if (r == TEST_FOUND)
1624 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1625
1626 if (r == TEST_RIGHT)
1627 right = y;
1628 else
1629 left = y + 1;
1630 }
f268980d
LP
1631 }
1632
de190aef
LP
1633 for (;;) {
1634 if (left == right) {
1635 if (direction == DIRECTION_UP)
1636 subtract_one = true;
1637
1638 i = left;
1639 goto found;
1640 }
1641
1642 assert(left < right);
de190aef 1643 i = (left + right) / 2;
f268980d 1644
de190aef
LP
1645 p = le64toh(array->entry_array.items[i]);
1646 if (p <= 0)
1647 return -EBADMSG;
1648
1649 r = test_object(f, p, needle);
1650 if (r < 0)
1651 return r;
cec736d2 1652
de190aef
LP
1653 if (r == TEST_FOUND)
1654 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1655
1656 if (r == TEST_RIGHT)
1657 right = i;
1658 else
1659 left = i + 1;
1660 }
1661 }
1662
cbdca852
LP
1663 if (k > n) {
1664 if (direction == DIRECTION_UP) {
1665 i = n;
1666 subtract_one = true;
1667 goto found;
1668 }
1669
cec736d2 1670 return 0;
cbdca852 1671 }
cec736d2 1672
de190aef
LP
1673 last_p = lp;
1674
1675 n -= k;
1676 t += k;
f268980d 1677 last_index = (uint64_t) -1;
de190aef 1678 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1679 }
1680
1681 return 0;
de190aef
LP
1682
1683found:
1684 if (subtract_one && t == 0 && i == 0)
1685 return 0;
1686
a4bcff5b 1687 /* Let's cache this item for the next invocation */
e5462cd8 1688 chain_cache_put(f->chain_cache, ci, first, a, array->entry_array.items[0], t, subtract_one ? (i > 0 ? i-1 : (uint64_t) -1) : i);
a4bcff5b 1689
de190aef
LP
1690 if (subtract_one && i == 0)
1691 p = last_p;
1692 else if (subtract_one)
1693 p = le64toh(array->entry_array.items[i-1]);
1694 else
1695 p = le64toh(array->entry_array.items[i]);
1696
1697 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1698 if (r < 0)
1699 return r;
1700
1701 if (ret)
1702 *ret = o;
1703
1704 if (offset)
1705 *offset = p;
1706
1707 if (idx)
cbdca852 1708 *idx = t + i + (subtract_one ? -1 : 0);
de190aef
LP
1709
1710 return 1;
cec736d2
LP
1711}
1712
f268980d
LP
1713
1714static int generic_array_bisect_plus_one(
1715 JournalFile *f,
1716 uint64_t extra,
1717 uint64_t first,
1718 uint64_t n,
1719 uint64_t needle,
1720 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1721 direction_t direction,
1722 Object **ret,
1723 uint64_t *offset,
1724 uint64_t *idx) {
de190aef 1725
cec736d2 1726 int r;
cbdca852
LP
1727 bool step_back = false;
1728 Object *o;
cec736d2
LP
1729
1730 assert(f);
de190aef 1731 assert(test_object);
cec736d2 1732
de190aef
LP
1733 if (n <= 0)
1734 return 0;
cec736d2 1735
de190aef
LP
1736 /* This bisects the array in object 'first', but first checks
1737 * an extra */
de190aef
LP
1738 r = test_object(f, extra, needle);
1739 if (r < 0)
1740 return r;
a536e261
LP
1741
1742 if (r == TEST_FOUND)
1743 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1744
cbdca852
LP
1745 /* if we are looking with DIRECTION_UP then we need to first
1746 see if in the actual array there is a matching entry, and
1747 return the last one of that. But if there isn't any we need
1748 to return this one. Hence remember this, and return it
1749 below. */
1750 if (r == TEST_LEFT)
1751 step_back = direction == DIRECTION_UP;
de190aef 1752
cbdca852
LP
1753 if (r == TEST_RIGHT) {
1754 if (direction == DIRECTION_DOWN)
1755 goto found;
1756 else
1757 return 0;
a536e261 1758 }
cec736d2 1759
de190aef
LP
1760 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1761
cbdca852
LP
1762 if (r == 0 && step_back)
1763 goto found;
1764
ecf68b1d 1765 if (r > 0 && idx)
de190aef
LP
1766 (*idx) ++;
1767
1768 return r;
cbdca852
LP
1769
1770found:
1771 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1772 if (r < 0)
1773 return r;
1774
1775 if (ret)
1776 *ret = o;
1777
1778 if (offset)
1779 *offset = extra;
1780
1781 if (idx)
1782 *idx = 0;
1783
1784 return 1;
1785}
1786
44a6b1b6 1787_pure_ static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
cbdca852
LP
1788 assert(f);
1789 assert(p > 0);
1790
1791 if (p == needle)
1792 return TEST_FOUND;
1793 else if (p < needle)
1794 return TEST_LEFT;
1795 else
1796 return TEST_RIGHT;
1797}
1798
1799int journal_file_move_to_entry_by_offset(
1800 JournalFile *f,
1801 uint64_t p,
1802 direction_t direction,
1803 Object **ret,
1804 uint64_t *offset) {
1805
1806 return generic_array_bisect(f,
1807 le64toh(f->header->entry_array_offset),
1808 le64toh(f->header->n_entries),
1809 p,
1810 test_object_offset,
1811 direction,
1812 ret, offset, NULL);
de190aef
LP
1813}
1814
cbdca852 1815
de190aef
LP
1816static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1817 Object *o;
1818 int r;
1819
1820 assert(f);
1821 assert(p > 0);
1822
1823 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1824 if (r < 0)
1825 return r;
1826
de190aef
LP
1827 if (le64toh(o->entry.seqnum) == needle)
1828 return TEST_FOUND;
1829 else if (le64toh(o->entry.seqnum) < needle)
1830 return TEST_LEFT;
1831 else
1832 return TEST_RIGHT;
1833}
cec736d2 1834
de190aef
LP
1835int journal_file_move_to_entry_by_seqnum(
1836 JournalFile *f,
1837 uint64_t seqnum,
1838 direction_t direction,
1839 Object **ret,
1840 uint64_t *offset) {
1841
1842 return generic_array_bisect(f,
1843 le64toh(f->header->entry_array_offset),
1844 le64toh(f->header->n_entries),
1845 seqnum,
1846 test_object_seqnum,
1847 direction,
1848 ret, offset, NULL);
1849}
cec736d2 1850
de190aef
LP
1851static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1852 Object *o;
1853 int r;
1854
1855 assert(f);
1856 assert(p > 0);
1857
1858 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1859 if (r < 0)
1860 return r;
1861
1862 if (le64toh(o->entry.realtime) == needle)
1863 return TEST_FOUND;
1864 else if (le64toh(o->entry.realtime) < needle)
1865 return TEST_LEFT;
1866 else
1867 return TEST_RIGHT;
cec736d2
LP
1868}
1869
de190aef
LP
1870int journal_file_move_to_entry_by_realtime(
1871 JournalFile *f,
1872 uint64_t realtime,
1873 direction_t direction,
1874 Object **ret,
1875 uint64_t *offset) {
1876
1877 return generic_array_bisect(f,
1878 le64toh(f->header->entry_array_offset),
1879 le64toh(f->header->n_entries),
1880 realtime,
1881 test_object_realtime,
1882 direction,
1883 ret, offset, NULL);
1884}
1885
1886static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1887 Object *o;
1888 int r;
1889
1890 assert(f);
1891 assert(p > 0);
1892
1893 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1894 if (r < 0)
1895 return r;
1896
1897 if (le64toh(o->entry.monotonic) == needle)
1898 return TEST_FOUND;
1899 else if (le64toh(o->entry.monotonic) < needle)
1900 return TEST_LEFT;
1901 else
1902 return TEST_RIGHT;
1903}
1904
47838ab3
ZJS
1905static inline int find_data_object_by_boot_id(
1906 JournalFile *f,
1907 sd_id128_t boot_id,
1908 Object **o,
1909 uint64_t *b) {
1910 char t[sizeof("_BOOT_ID=")-1 + 32 + 1] = "_BOOT_ID=";
1911
1912 sd_id128_to_string(boot_id, t + 9);
1913 return journal_file_find_data_object(f, t, sizeof(t) - 1, o, b);
1914}
1915
de190aef
LP
1916int journal_file_move_to_entry_by_monotonic(
1917 JournalFile *f,
1918 sd_id128_t boot_id,
1919 uint64_t monotonic,
1920 direction_t direction,
1921 Object **ret,
1922 uint64_t *offset) {
1923
de190aef
LP
1924 Object *o;
1925 int r;
1926
cbdca852 1927 assert(f);
de190aef 1928
47838ab3 1929 r = find_data_object_by_boot_id(f, boot_id, &o, NULL);
de190aef
LP
1930 if (r < 0)
1931 return r;
cbdca852 1932 if (r == 0)
de190aef
LP
1933 return -ENOENT;
1934
1935 return generic_array_bisect_plus_one(f,
1936 le64toh(o->data.entry_offset),
1937 le64toh(o->data.entry_array_offset),
1938 le64toh(o->data.n_entries),
1939 monotonic,
1940 test_object_monotonic,
1941 direction,
1942 ret, offset, NULL);
1943}
1944
de190aef
LP
1945int journal_file_next_entry(
1946 JournalFile *f,
1947 Object *o, uint64_t p,
1948 direction_t direction,
1949 Object **ret, uint64_t *offset) {
1950
1951 uint64_t i, n;
cec736d2
LP
1952 int r;
1953
1954 assert(f);
de190aef
LP
1955 assert(p > 0 || !o);
1956
1957 n = le64toh(f->header->n_entries);
1958 if (n <= 0)
1959 return 0;
cec736d2
LP
1960
1961 if (!o)
de190aef 1962 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1963 else {
de190aef 1964 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1965 return -EINVAL;
1966
de190aef
LP
1967 r = generic_array_bisect(f,
1968 le64toh(f->header->entry_array_offset),
1969 le64toh(f->header->n_entries),
1970 p,
1971 test_object_offset,
1972 DIRECTION_DOWN,
1973 NULL, NULL,
1974 &i);
1975 if (r <= 0)
1976 return r;
1977
1978 if (direction == DIRECTION_DOWN) {
1979 if (i >= n - 1)
1980 return 0;
1981
1982 i++;
1983 } else {
1984 if (i <= 0)
1985 return 0;
1986
1987 i--;
1988 }
cec736d2
LP
1989 }
1990
de190aef
LP
1991 /* And jump to it */
1992 return generic_array_get(f,
1993 le64toh(f->header->entry_array_offset),
1994 i,
1995 ret, offset);
1996}
cec736d2 1997
de190aef
LP
1998int journal_file_skip_entry(
1999 JournalFile *f,
2000 Object *o, uint64_t p,
2001 int64_t skip,
2002 Object **ret, uint64_t *offset) {
2003
2004 uint64_t i, n;
2005 int r;
2006
2007 assert(f);
2008 assert(o);
2009 assert(p > 0);
2010
2011 if (o->object.type != OBJECT_ENTRY)
2012 return -EINVAL;
2013
2014 r = generic_array_bisect(f,
2015 le64toh(f->header->entry_array_offset),
2016 le64toh(f->header->n_entries),
2017 p,
2018 test_object_offset,
2019 DIRECTION_DOWN,
2020 NULL, NULL,
2021 &i);
2022 if (r <= 0)
cec736d2
LP
2023 return r;
2024
de190aef
LP
2025 /* Calculate new index */
2026 if (skip < 0) {
2027 if ((uint64_t) -skip >= i)
2028 i = 0;
2029 else
2030 i = i - (uint64_t) -skip;
2031 } else
2032 i += (uint64_t) skip;
cec736d2 2033
de190aef
LP
2034 n = le64toh(f->header->n_entries);
2035 if (n <= 0)
2036 return -EBADMSG;
cec736d2 2037
de190aef
LP
2038 if (i >= n)
2039 i = n-1;
2040
2041 return generic_array_get(f,
2042 le64toh(f->header->entry_array_offset),
2043 i,
2044 ret, offset);
cec736d2
LP
2045}
2046
de190aef
LP
2047int journal_file_next_entry_for_data(
2048 JournalFile *f,
2049 Object *o, uint64_t p,
2050 uint64_t data_offset,
2051 direction_t direction,
2052 Object **ret, uint64_t *offset) {
2053
2054 uint64_t n, i;
cec736d2 2055 int r;
de190aef 2056 Object *d;
cec736d2
LP
2057
2058 assert(f);
de190aef 2059 assert(p > 0 || !o);
cec736d2 2060
de190aef 2061 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 2062 if (r < 0)
de190aef 2063 return r;
cec736d2 2064
de190aef
LP
2065 n = le64toh(d->data.n_entries);
2066 if (n <= 0)
2067 return n;
cec736d2 2068
de190aef
LP
2069 if (!o)
2070 i = direction == DIRECTION_DOWN ? 0 : n - 1;
2071 else {
2072 if (o->object.type != OBJECT_ENTRY)
2073 return -EINVAL;
cec736d2 2074
de190aef
LP
2075 r = generic_array_bisect_plus_one(f,
2076 le64toh(d->data.entry_offset),
2077 le64toh(d->data.entry_array_offset),
2078 le64toh(d->data.n_entries),
2079 p,
2080 test_object_offset,
2081 DIRECTION_DOWN,
2082 NULL, NULL,
2083 &i);
2084
2085 if (r <= 0)
cec736d2
LP
2086 return r;
2087
de190aef
LP
2088 if (direction == DIRECTION_DOWN) {
2089 if (i >= n - 1)
2090 return 0;
cec736d2 2091
de190aef
LP
2092 i++;
2093 } else {
2094 if (i <= 0)
2095 return 0;
cec736d2 2096
de190aef
LP
2097 i--;
2098 }
cec736d2 2099
de190aef 2100 }
cec736d2 2101
de190aef
LP
2102 return generic_array_get_plus_one(f,
2103 le64toh(d->data.entry_offset),
2104 le64toh(d->data.entry_array_offset),
2105 i,
2106 ret, offset);
2107}
cec736d2 2108
cbdca852
LP
2109int journal_file_move_to_entry_by_offset_for_data(
2110 JournalFile *f,
2111 uint64_t data_offset,
2112 uint64_t p,
2113 direction_t direction,
2114 Object **ret, uint64_t *offset) {
2115
2116 int r;
2117 Object *d;
2118
2119 assert(f);
2120
2121 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2122 if (r < 0)
2123 return r;
2124
2125 return generic_array_bisect_plus_one(f,
2126 le64toh(d->data.entry_offset),
2127 le64toh(d->data.entry_array_offset),
2128 le64toh(d->data.n_entries),
2129 p,
2130 test_object_offset,
2131 direction,
2132 ret, offset, NULL);
2133}
2134
2135int journal_file_move_to_entry_by_monotonic_for_data(
2136 JournalFile *f,
2137 uint64_t data_offset,
2138 sd_id128_t boot_id,
2139 uint64_t monotonic,
2140 direction_t direction,
2141 Object **ret, uint64_t *offset) {
2142
cbdca852
LP
2143 Object *o, *d;
2144 int r;
2145 uint64_t b, z;
2146
2147 assert(f);
2148
2149 /* First, seek by time */
47838ab3 2150 r = find_data_object_by_boot_id(f, boot_id, &o, &b);
cbdca852
LP
2151 if (r < 0)
2152 return r;
2153 if (r == 0)
2154 return -ENOENT;
2155
2156 r = generic_array_bisect_plus_one(f,
2157 le64toh(o->data.entry_offset),
2158 le64toh(o->data.entry_array_offset),
2159 le64toh(o->data.n_entries),
2160 monotonic,
2161 test_object_monotonic,
2162 direction,
2163 NULL, &z, NULL);
2164 if (r <= 0)
2165 return r;
2166
2167 /* And now, continue seeking until we find an entry that
2168 * exists in both bisection arrays */
2169
2170 for (;;) {
2171 Object *qo;
2172 uint64_t p, q;
2173
2174 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2175 if (r < 0)
2176 return r;
2177
2178 r = generic_array_bisect_plus_one(f,
2179 le64toh(d->data.entry_offset),
2180 le64toh(d->data.entry_array_offset),
2181 le64toh(d->data.n_entries),
2182 z,
2183 test_object_offset,
2184 direction,
2185 NULL, &p, NULL);
2186 if (r <= 0)
2187 return r;
2188
2189 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
2190 if (r < 0)
2191 return r;
2192
2193 r = generic_array_bisect_plus_one(f,
2194 le64toh(o->data.entry_offset),
2195 le64toh(o->data.entry_array_offset),
2196 le64toh(o->data.n_entries),
2197 p,
2198 test_object_offset,
2199 direction,
2200 &qo, &q, NULL);
2201
2202 if (r <= 0)
2203 return r;
2204
2205 if (p == q) {
2206 if (ret)
2207 *ret = qo;
2208 if (offset)
2209 *offset = q;
2210
2211 return 1;
2212 }
2213
2214 z = q;
2215 }
2216
2217 return 0;
2218}
2219
de190aef
LP
2220int journal_file_move_to_entry_by_seqnum_for_data(
2221 JournalFile *f,
2222 uint64_t data_offset,
2223 uint64_t seqnum,
2224 direction_t direction,
2225 Object **ret, uint64_t *offset) {
cec736d2 2226
de190aef
LP
2227 Object *d;
2228 int r;
cec736d2 2229
91a31dde
LP
2230 assert(f);
2231
de190aef 2232 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 2233 if (r < 0)
de190aef 2234 return r;
cec736d2 2235
de190aef
LP
2236 return generic_array_bisect_plus_one(f,
2237 le64toh(d->data.entry_offset),
2238 le64toh(d->data.entry_array_offset),
2239 le64toh(d->data.n_entries),
2240 seqnum,
2241 test_object_seqnum,
2242 direction,
2243 ret, offset, NULL);
2244}
cec736d2 2245
de190aef
LP
2246int journal_file_move_to_entry_by_realtime_for_data(
2247 JournalFile *f,
2248 uint64_t data_offset,
2249 uint64_t realtime,
2250 direction_t direction,
2251 Object **ret, uint64_t *offset) {
2252
2253 Object *d;
2254 int r;
2255
91a31dde
LP
2256 assert(f);
2257
de190aef 2258 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 2259 if (r < 0)
de190aef
LP
2260 return r;
2261
2262 return generic_array_bisect_plus_one(f,
2263 le64toh(d->data.entry_offset),
2264 le64toh(d->data.entry_array_offset),
2265 le64toh(d->data.n_entries),
2266 realtime,
2267 test_object_realtime,
2268 direction,
2269 ret, offset, NULL);
cec736d2
LP
2270}
2271
0284adc6 2272void journal_file_dump(JournalFile *f) {
7560fffc 2273 Object *o;
7560fffc 2274 int r;
0284adc6 2275 uint64_t p;
7560fffc
LP
2276
2277 assert(f);
2278
0284adc6 2279 journal_file_print_header(f);
7560fffc 2280
0284adc6
LP
2281 p = le64toh(f->header->header_size);
2282 while (p != 0) {
2283 r = journal_file_move_to_object(f, -1, p, &o);
2284 if (r < 0)
2285 goto fail;
7560fffc 2286
0284adc6 2287 switch (o->object.type) {
d98cc1f2 2288
0284adc6
LP
2289 case OBJECT_UNUSED:
2290 printf("Type: OBJECT_UNUSED\n");
2291 break;
d98cc1f2 2292
0284adc6
LP
2293 case OBJECT_DATA:
2294 printf("Type: OBJECT_DATA\n");
2295 break;
7560fffc 2296
3c1668da
LP
2297 case OBJECT_FIELD:
2298 printf("Type: OBJECT_FIELD\n");
2299 break;
2300
0284adc6 2301 case OBJECT_ENTRY:
507f22bd
ZJS
2302 printf("Type: OBJECT_ENTRY seqnum=%"PRIu64" monotonic=%"PRIu64" realtime=%"PRIu64"\n",
2303 le64toh(o->entry.seqnum),
2304 le64toh(o->entry.monotonic),
2305 le64toh(o->entry.realtime));
0284adc6 2306 break;
7560fffc 2307
0284adc6
LP
2308 case OBJECT_FIELD_HASH_TABLE:
2309 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
2310 break;
7560fffc 2311
0284adc6
LP
2312 case OBJECT_DATA_HASH_TABLE:
2313 printf("Type: OBJECT_DATA_HASH_TABLE\n");
2314 break;
7560fffc 2315
0284adc6
LP
2316 case OBJECT_ENTRY_ARRAY:
2317 printf("Type: OBJECT_ENTRY_ARRAY\n");
2318 break;
7560fffc 2319
0284adc6 2320 case OBJECT_TAG:
507f22bd
ZJS
2321 printf("Type: OBJECT_TAG seqnum=%"PRIu64" epoch=%"PRIu64"\n",
2322 le64toh(o->tag.seqnum),
2323 le64toh(o->tag.epoch));
0284adc6 2324 break;
3c1668da
LP
2325
2326 default:
2327 printf("Type: unknown (%u)\n", o->object.type);
2328 break;
0284adc6 2329 }
7560fffc 2330
0284adc6
LP
2331 if (o->object.flags & OBJECT_COMPRESSED)
2332 printf("Flags: COMPRESSED\n");
7560fffc 2333
0284adc6
LP
2334 if (p == le64toh(f->header->tail_object_offset))
2335 p = 0;
2336 else
2337 p = p + ALIGN64(le64toh(o->object.size));
2338 }
7560fffc 2339
0284adc6
LP
2340 return;
2341fail:
2342 log_error("File corrupt");
7560fffc
LP
2343}
2344
718fe4b1
ZJS
2345static const char* format_timestamp_safe(char *buf, size_t l, usec_t t) {
2346 const char *x;
2347
2348 x = format_timestamp(buf, l, t);
2349 if (x)
2350 return x;
2351 return " --- ";
2352}
2353
0284adc6 2354void journal_file_print_header(JournalFile *f) {
2765b7bb 2355 char a[33], b[33], c[33], d[33];
ed375beb 2356 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX], z[FORMAT_TIMESTAMP_MAX];
a1a03e30
LP
2357 struct stat st;
2358 char bytes[FORMAT_BYTES_MAX];
7560fffc
LP
2359
2360 assert(f);
7560fffc 2361
0284adc6
LP
2362 printf("File Path: %s\n"
2363 "File ID: %s\n"
2364 "Machine ID: %s\n"
2365 "Boot ID: %s\n"
2366 "Sequential Number ID: %s\n"
2367 "State: %s\n"
2368 "Compatible Flags:%s%s\n"
2369 "Incompatible Flags:%s%s\n"
507f22bd
ZJS
2370 "Header size: %"PRIu64"\n"
2371 "Arena size: %"PRIu64"\n"
2372 "Data Hash Table Size: %"PRIu64"\n"
2373 "Field Hash Table Size: %"PRIu64"\n"
0284adc6 2374 "Rotate Suggested: %s\n"
507f22bd
ZJS
2375 "Head Sequential Number: %"PRIu64"\n"
2376 "Tail Sequential Number: %"PRIu64"\n"
0284adc6 2377 "Head Realtime Timestamp: %s\n"
3223f44f 2378 "Tail Realtime Timestamp: %s\n"
ed375beb 2379 "Tail Monotonic Timestamp: %s\n"
507f22bd
ZJS
2380 "Objects: %"PRIu64"\n"
2381 "Entry Objects: %"PRIu64"\n",
0284adc6
LP
2382 f->path,
2383 sd_id128_to_string(f->header->file_id, a),
2384 sd_id128_to_string(f->header->machine_id, b),
2385 sd_id128_to_string(f->header->boot_id, c),
2765b7bb 2386 sd_id128_to_string(f->header->seqnum_id, d),
3223f44f
LP
2387 f->header->state == STATE_OFFLINE ? "OFFLINE" :
2388 f->header->state == STATE_ONLINE ? "ONLINE" :
2389 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
8088cbd3
LP
2390 JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
2391 (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
2392 JOURNAL_HEADER_COMPRESSED(f->header) ? " COMPRESSED" : "",
2393 (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
507f22bd
ZJS
2394 le64toh(f->header->header_size),
2395 le64toh(f->header->arena_size),
2396 le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2397 le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
fb0951b0 2398 yes_no(journal_file_rotate_suggested(f, 0)),
507f22bd
ZJS
2399 le64toh(f->header->head_entry_seqnum),
2400 le64toh(f->header->tail_entry_seqnum),
718fe4b1
ZJS
2401 format_timestamp_safe(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
2402 format_timestamp_safe(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
ed375beb 2403 format_timespan(z, sizeof(z), le64toh(f->header->tail_entry_monotonic), USEC_PER_MSEC),
507f22bd
ZJS
2404 le64toh(f->header->n_objects),
2405 le64toh(f->header->n_entries));
7560fffc 2406
0284adc6 2407 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
507f22bd 2408 printf("Data Objects: %"PRIu64"\n"
0284adc6 2409 "Data Hash Table Fill: %.1f%%\n",
507f22bd 2410 le64toh(f->header->n_data),
0284adc6 2411 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
7560fffc 2412
0284adc6 2413 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
507f22bd 2414 printf("Field Objects: %"PRIu64"\n"
0284adc6 2415 "Field Hash Table Fill: %.1f%%\n",
507f22bd 2416 le64toh(f->header->n_fields),
0284adc6 2417 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
3223f44f
LP
2418
2419 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
507f22bd
ZJS
2420 printf("Tag Objects: %"PRIu64"\n",
2421 le64toh(f->header->n_tags));
3223f44f 2422 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
507f22bd
ZJS
2423 printf("Entry Array Objects: %"PRIu64"\n",
2424 le64toh(f->header->n_entry_arrays));
a1a03e30
LP
2425
2426 if (fstat(f->fd, &st) >= 0)
2427 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
7560fffc
LP
2428}
2429
0284adc6
LP
2430int journal_file_open(
2431 const char *fname,
2432 int flags,
2433 mode_t mode,
2434 bool compress,
baed47c3 2435 bool seal,
0284adc6
LP
2436 JournalMetrics *metrics,
2437 MMapCache *mmap_cache,
2438 JournalFile *template,
2439 JournalFile **ret) {
7560fffc 2440
0284adc6
LP
2441 JournalFile *f;
2442 int r;
2443 bool newly_created = false;
7560fffc 2444
0284adc6 2445 assert(fname);
0559d3a5 2446 assert(ret);
7560fffc 2447
0284adc6
LP
2448 if ((flags & O_ACCMODE) != O_RDONLY &&
2449 (flags & O_ACCMODE) != O_RDWR)
2450 return -EINVAL;
7560fffc 2451
a0108012
LP
2452 if (!endswith(fname, ".journal") &&
2453 !endswith(fname, ".journal~"))
0284adc6 2454 return -EINVAL;
7560fffc 2455
0284adc6
LP
2456 f = new0(JournalFile, 1);
2457 if (!f)
2458 return -ENOMEM;
7560fffc 2459
0284adc6
LP
2460 f->fd = -1;
2461 f->mode = mode;
7560fffc 2462
0284adc6
LP
2463 f->flags = flags;
2464 f->prot = prot_from_flags(flags);
2465 f->writable = (flags & O_ACCMODE) != O_RDONLY;
48b61739 2466#ifdef HAVE_XZ
0284adc6 2467 f->compress = compress;
48b61739 2468#endif
49a32d43 2469#ifdef HAVE_GCRYPT
baed47c3 2470 f->seal = seal;
49a32d43 2471#endif
7560fffc 2472
0284adc6
LP
2473 if (mmap_cache)
2474 f->mmap = mmap_cache_ref(mmap_cache);
2475 else {
84168d80 2476 f->mmap = mmap_cache_new();
0284adc6
LP
2477 if (!f->mmap) {
2478 r = -ENOMEM;
2479 goto fail;
2480 }
2481 }
7560fffc 2482
0284adc6
LP
2483 f->path = strdup(fname);
2484 if (!f->path) {
2485 r = -ENOMEM;
2486 goto fail;
2487 }
7560fffc 2488
a4bcff5b
LP
2489 f->chain_cache = hashmap_new(uint64_hash_func, uint64_compare_func);
2490 if (!f->chain_cache) {
2491 r = -ENOMEM;
2492 goto fail;
2493 }
2494
0284adc6
LP
2495 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2496 if (f->fd < 0) {
2497 r = -errno;
2498 goto fail;
7560fffc 2499 }
7560fffc 2500
0284adc6
LP
2501 if (fstat(f->fd, &f->last_stat) < 0) {
2502 r = -errno;
2503 goto fail;
2504 }
7560fffc 2505
0284adc6 2506 if (f->last_stat.st_size == 0 && f->writable) {
fb0951b0
LP
2507#ifdef HAVE_XATTR
2508 uint64_t crtime;
2509
2510 /* Let's attach the creation time to the journal file,
2511 * so that the vacuuming code knows the age of this
2512 * file even if the file might end up corrupted one
2513 * day... Ideally we'd just use the creation time many
2514 * file systems maintain for each file, but there is
2515 * currently no usable API to query this, hence let's
2516 * emulate this via extended attributes. If extended
2517 * attributes are not supported we'll just skip this,
2518 * and rely solely on mtime/atime/ctime of the file.*/
2519
2520 crtime = htole64((uint64_t) now(CLOCK_REALTIME));
2521 fsetxattr(f->fd, "user.crtime_usec", &crtime, sizeof(crtime), XATTR_CREATE);
2522#endif
7560fffc 2523
feb12d3e 2524#ifdef HAVE_GCRYPT
0284adc6 2525 /* Try to load the FSPRG state, and if we can't, then
baed47c3 2526 * just don't do sealing */
49a32d43
LP
2527 if (f->seal) {
2528 r = journal_file_fss_load(f);
2529 if (r < 0)
2530 f->seal = false;
2531 }
feb12d3e 2532#endif
7560fffc 2533
0284adc6
LP
2534 r = journal_file_init_header(f, template);
2535 if (r < 0)
2536 goto fail;
7560fffc 2537
0284adc6
LP
2538 if (fstat(f->fd, &f->last_stat) < 0) {
2539 r = -errno;
2540 goto fail;
2541 }
fb0951b0
LP
2542
2543 newly_created = true;
0284adc6 2544 }
7560fffc 2545
0284adc6
LP
2546 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2547 r = -EIO;
2548 goto fail;
2549 }
7560fffc 2550
0284adc6
LP
2551 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2552 if (f->header == MAP_FAILED) {
2553 f->header = NULL;
2554 r = -errno;
2555 goto fail;
2556 }
7560fffc 2557
0284adc6
LP
2558 if (!newly_created) {
2559 r = journal_file_verify_header(f);
2560 if (r < 0)
2561 goto fail;
2562 }
7560fffc 2563
feb12d3e 2564#ifdef HAVE_GCRYPT
0284adc6 2565 if (!newly_created && f->writable) {
baed47c3 2566 r = journal_file_fss_load(f);
0284adc6
LP
2567 if (r < 0)
2568 goto fail;
2569 }
feb12d3e 2570#endif
cec736d2
LP
2571
2572 if (f->writable) {
4a92baf3
LP
2573 if (metrics) {
2574 journal_default_metrics(metrics, f->fd);
2575 f->metrics = *metrics;
2576 } else if (template)
2577 f->metrics = template->metrics;
2578
cec736d2
LP
2579 r = journal_file_refresh_header(f);
2580 if (r < 0)
2581 goto fail;
2582 }
2583
feb12d3e 2584#ifdef HAVE_GCRYPT
baed47c3 2585 r = journal_file_hmac_setup(f);
14d10188
LP
2586 if (r < 0)
2587 goto fail;
feb12d3e 2588#endif
14d10188 2589
cec736d2 2590 if (newly_created) {
de190aef 2591 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
2592 if (r < 0)
2593 goto fail;
2594
de190aef 2595 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
2596 if (r < 0)
2597 goto fail;
7560fffc 2598
feb12d3e 2599#ifdef HAVE_GCRYPT
7560fffc
LP
2600 r = journal_file_append_first_tag(f);
2601 if (r < 0)
2602 goto fail;
feb12d3e 2603#endif
cec736d2
LP
2604 }
2605
de190aef 2606 r = journal_file_map_field_hash_table(f);
cec736d2
LP
2607 if (r < 0)
2608 goto fail;
2609
de190aef 2610 r = journal_file_map_data_hash_table(f);
cec736d2
LP
2611 if (r < 0)
2612 goto fail;
2613
0559d3a5 2614 *ret = f;
cec736d2
LP
2615 return 0;
2616
2617fail:
2618 journal_file_close(f);
2619
2620 return r;
2621}
0ac38b70 2622
baed47c3 2623int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
57535f47 2624 _cleanup_free_ char *p = NULL;
0ac38b70
LP
2625 size_t l;
2626 JournalFile *old_file, *new_file = NULL;
2627 int r;
2628
2629 assert(f);
2630 assert(*f);
2631
2632 old_file = *f;
2633
2634 if (!old_file->writable)
2635 return -EINVAL;
2636
2637 if (!endswith(old_file->path, ".journal"))
2638 return -EINVAL;
2639
2640 l = strlen(old_file->path);
57535f47
ZJS
2641 r = asprintf(&p, "%.*s@" SD_ID128_FORMAT_STR "-%016"PRIx64"-%016"PRIx64".journal",
2642 (int) l - 8, old_file->path,
2643 SD_ID128_FORMAT_VAL(old_file->header->seqnum_id),
2644 le64toh((*f)->header->head_entry_seqnum),
2645 le64toh((*f)->header->head_entry_realtime));
2646 if (r < 0)
0ac38b70
LP
2647 return -ENOMEM;
2648
0ac38b70 2649 r = rename(old_file->path, p);
0ac38b70
LP
2650 if (r < 0)
2651 return -errno;
2652
ccdbaf91 2653 old_file->header->state = STATE_ARCHIVED;
0ac38b70 2654
baed47c3 2655 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
0ac38b70
LP
2656 journal_file_close(old_file);
2657
2658 *f = new_file;
2659 return r;
2660}
2661
9447a7f1
LP
2662int journal_file_open_reliably(
2663 const char *fname,
2664 int flags,
2665 mode_t mode,
7560fffc 2666 bool compress,
baed47c3 2667 bool seal,
4a92baf3 2668 JournalMetrics *metrics,
27370278 2669 MMapCache *mmap_cache,
9447a7f1
LP
2670 JournalFile *template,
2671 JournalFile **ret) {
2672
2673 int r;
2674 size_t l;
ed375beb 2675 _cleanup_free_ char *p = NULL;
9447a7f1 2676
baed47c3 2677 r = journal_file_open(fname, flags, mode, compress, seal,
27370278 2678 metrics, mmap_cache, template, ret);
0071d9f1
LP
2679 if (r != -EBADMSG && /* corrupted */
2680 r != -ENODATA && /* truncated */
2681 r != -EHOSTDOWN && /* other machine */
a1a1898f
LP
2682 r != -EPROTONOSUPPORT && /* incompatible feature */
2683 r != -EBUSY && /* unclean shutdown */
2684 r != -ESHUTDOWN /* already archived */)
9447a7f1
LP
2685 return r;
2686
2687 if ((flags & O_ACCMODE) == O_RDONLY)
2688 return r;
2689
2690 if (!(flags & O_CREAT))
2691 return r;
2692
7560fffc
LP
2693 if (!endswith(fname, ".journal"))
2694 return r;
2695
5c70eab4
LP
2696 /* The file is corrupted. Rotate it away and try it again (but only once) */
2697
9447a7f1 2698 l = strlen(fname);
9bf3b535 2699 if (asprintf(&p, "%.*s@%016llx-%016" PRIx64 ".journal~",
57535f47 2700 (int) l - 8, fname,
9447a7f1 2701 (unsigned long long) now(CLOCK_REALTIME),
9bf3b535 2702 random_u64()) < 0)
9447a7f1
LP
2703 return -ENOMEM;
2704
2705 r = rename(fname, p);
9447a7f1
LP
2706 if (r < 0)
2707 return -errno;
2708
a1a1898f 2709 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
9447a7f1 2710
baed47c3 2711 return journal_file_open(fname, flags, mode, compress, seal,
27370278 2712 metrics, mmap_cache, template, ret);
9447a7f1
LP
2713}
2714
cf244689
LP
2715int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2716 uint64_t i, n;
2717 uint64_t q, xor_hash = 0;
2718 int r;
2719 EntryItem *items;
2720 dual_timestamp ts;
2721
2722 assert(from);
2723 assert(to);
2724 assert(o);
2725 assert(p);
2726
2727 if (!to->writable)
2728 return -EPERM;
2729
2730 ts.monotonic = le64toh(o->entry.monotonic);
2731 ts.realtime = le64toh(o->entry.realtime);
2732
cf244689 2733 n = journal_file_entry_n_items(o);
4faa7004
TA
2734 /* alloca() can't take 0, hence let's allocate at least one */
2735 items = alloca(sizeof(EntryItem) * MAX(1u, n));
cf244689
LP
2736
2737 for (i = 0; i < n; i++) {
4fd052ae
FC
2738 uint64_t l, h;
2739 le64_t le_hash;
cf244689
LP
2740 size_t t;
2741 void *data;
2742 Object *u;
2743
2744 q = le64toh(o->entry.items[i].object_offset);
2745 le_hash = o->entry.items[i].hash;
2746
2747 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2748 if (r < 0)
2749 return r;
2750
2751 if (le_hash != o->data.hash)
2752 return -EBADMSG;
2753
2754 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2755 t = (size_t) l;
2756
2757 /* We hit the limit on 32bit machines */
2758 if ((uint64_t) t != l)
2759 return -E2BIG;
2760
2761 if (o->object.flags & OBJECT_COMPRESSED) {
2762#ifdef HAVE_XZ
2763 uint64_t rsize;
2764
93b73b06 2765 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize, 0))
cf244689
LP
2766 return -EBADMSG;
2767
2768 data = from->compress_buffer;
2769 l = rsize;
2770#else
2771 return -EPROTONOSUPPORT;
2772#endif
2773 } else
2774 data = o->data.payload;
2775
2776 r = journal_file_append_data(to, data, l, &u, &h);
2777 if (r < 0)
2778 return r;
2779
2780 xor_hash ^= le64toh(u->data.hash);
2781 items[i].object_offset = htole64(h);
2782 items[i].hash = u->data.hash;
2783
2784 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2785 if (r < 0)
2786 return r;
2787 }
2788
2789 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2790}
babfc091
LP
2791
2792void journal_default_metrics(JournalMetrics *m, int fd) {
2793 uint64_t fs_size = 0;
2794 struct statvfs ss;
a7bc2c2a 2795 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2796
2797 assert(m);
2798 assert(fd >= 0);
2799
2800 if (fstatvfs(fd, &ss) >= 0)
2801 fs_size = ss.f_frsize * ss.f_blocks;
2802
2803 if (m->max_use == (uint64_t) -1) {
2804
2805 if (fs_size > 0) {
2806 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2807
2808 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2809 m->max_use = DEFAULT_MAX_USE_UPPER;
2810
2811 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2812 m->max_use = DEFAULT_MAX_USE_LOWER;
2813 } else
2814 m->max_use = DEFAULT_MAX_USE_LOWER;
2815 } else {
2816 m->max_use = PAGE_ALIGN(m->max_use);
2817
2818 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2819 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2820 }
2821
2822 if (m->max_size == (uint64_t) -1) {
2823 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2824
2825 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2826 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2827 } else
2828 m->max_size = PAGE_ALIGN(m->max_size);
2829
2830 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2831 m->max_size = JOURNAL_FILE_SIZE_MIN;
2832
2833 if (m->max_size*2 > m->max_use)
2834 m->max_use = m->max_size*2;
2835
2836 if (m->min_size == (uint64_t) -1)
2837 m->min_size = JOURNAL_FILE_SIZE_MIN;
2838 else {
2839 m->min_size = PAGE_ALIGN(m->min_size);
2840
2841 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2842 m->min_size = JOURNAL_FILE_SIZE_MIN;
2843
2844 if (m->min_size > m->max_size)
2845 m->max_size = m->min_size;
2846 }
2847
2848 if (m->keep_free == (uint64_t) -1) {
2849
2850 if (fs_size > 0) {
8621b110 2851 m->keep_free = PAGE_ALIGN(fs_size * 3 / 20); /* 15% of file system size */
babfc091
LP
2852
2853 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2854 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2855
2856 } else
2857 m->keep_free = DEFAULT_KEEP_FREE;
2858 }
2859
2b43f939
LP
2860 log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2861 format_bytes(a, sizeof(a), m->max_use),
2862 format_bytes(b, sizeof(b), m->max_size),
2863 format_bytes(c, sizeof(c), m->min_size),
2864 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2865}
08984293
LP
2866
2867int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
08984293
LP
2868 assert(f);
2869 assert(from || to);
2870
2871 if (from) {
162566a4
LP
2872 if (f->header->head_entry_realtime == 0)
2873 return -ENOENT;
08984293 2874
162566a4 2875 *from = le64toh(f->header->head_entry_realtime);
08984293
LP
2876 }
2877
2878 if (to) {
162566a4
LP
2879 if (f->header->tail_entry_realtime == 0)
2880 return -ENOENT;
08984293 2881
162566a4 2882 *to = le64toh(f->header->tail_entry_realtime);
08984293
LP
2883 }
2884
2885 return 1;
2886}
2887
2888int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
08984293
LP
2889 Object *o;
2890 uint64_t p;
2891 int r;
2892
2893 assert(f);
2894 assert(from || to);
2895
47838ab3 2896 r = find_data_object_by_boot_id(f, boot_id, &o, &p);
08984293
LP
2897 if (r <= 0)
2898 return r;
2899
2900 if (le64toh(o->data.n_entries) <= 0)
2901 return 0;
2902
2903 if (from) {
2904 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2905 if (r < 0)
2906 return r;
2907
2908 *from = le64toh(o->entry.monotonic);
2909 }
2910
2911 if (to) {
2912 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2913 if (r < 0)
2914 return r;
2915
2916 r = generic_array_get_plus_one(f,
2917 le64toh(o->data.entry_offset),
2918 le64toh(o->data.entry_array_offset),
2919 le64toh(o->data.n_entries)-1,
2920 &o, NULL);
2921 if (r <= 0)
2922 return r;
2923
2924 *to = le64toh(o->entry.monotonic);
2925 }
2926
2927 return 1;
2928}
dca6219e 2929
fb0951b0 2930bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
dca6219e
LP
2931 assert(f);
2932
2933 /* If we gained new header fields we gained new features,
2934 * hence suggest a rotation */
361f9cbc
LP
2935 if (le64toh(f->header->header_size) < sizeof(Header)) {
2936 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
dca6219e 2937 return true;
361f9cbc 2938 }
dca6219e
LP
2939
2940 /* Let's check if the hash tables grew over a certain fill
2941 * level (75%, borrowing this value from Java's hash table
2942 * implementation), and if so suggest a rotation. To calculate
2943 * the fill level we need the n_data field, which only exists
2944 * in newer versions. */
2945
2946 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
361f9cbc 2947 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
507f22bd 2948 log_debug("Data hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items, %llu file size, %"PRIu64" bytes per hash table item), suggesting rotation.",
361f9cbc
LP
2949 f->path,
2950 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
507f22bd
ZJS
2951 le64toh(f->header->n_data),
2952 le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2953 (unsigned long long) f->last_stat.st_size,
2954 f->last_stat.st_size / le64toh(f->header->n_data));
dca6219e 2955 return true;
361f9cbc 2956 }
dca6219e
LP
2957
2958 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
361f9cbc 2959 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
507f22bd 2960 log_debug("Field hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items), suggesting rotation.",
361f9cbc
LP
2961 f->path,
2962 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
507f22bd
ZJS
2963 le64toh(f->header->n_fields),
2964 le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
dca6219e 2965 return true;
361f9cbc 2966 }
dca6219e 2967
0598fd4a
LP
2968 /* Are the data objects properly indexed by field objects? */
2969 if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
2970 JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
2971 le64toh(f->header->n_data) > 0 &&
2972 le64toh(f->header->n_fields) == 0)
2973 return true;
2974
fb0951b0
LP
2975 if (max_file_usec > 0) {
2976 usec_t t, h;
2977
2978 h = le64toh(f->header->head_entry_realtime);
2979 t = now(CLOCK_REALTIME);
2980
2981 if (h > 0 && t > h + max_file_usec)
2982 return true;
2983 }
2984
dca6219e
LP
2985 return false;
2986}