]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
journal: refuse verification of files with unknown flags
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
30#include "journal-def.h"
31#include "journal-file.h"
0284adc6 32#include "journal-authenticate.h"
cec736d2 33#include "lookup3.h"
807e17f0 34#include "compress.h"
7560fffc 35#include "fsprg.h"
cec736d2 36
4a92baf3
LP
37#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
cec736d2 39
be19b7df 40#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 41
babfc091 42/* This is the minimum journal file size */
b47ffcfd 43#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
babfc091
LP
44
45/* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
49
50/* This is the upper bound if we deduce max_size from max_use */
71100051 51#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
52
53/* This is the upper bound if we deduce the keep_free value from the
54 * file system size */
55#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57/* This is the keep_free value when we can't determine the system
58 * size */
59#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
60
dca6219e
LP
61/* n_data was the first entry we added after the initial file format design */
62#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
cec736d2 63
cec736d2 64void journal_file_close(JournalFile *f) {
de190aef 65 assert(f);
cec736d2 66
b0af6f41 67 /* Write the final tag */
c586dbf1 68 if (f->seal && f->writable)
b0af6f41
LP
69 journal_file_append_tag(f);
70
7560fffc 71 /* Sync everything to disk, before we mark the file offline */
16e9f408
LP
72 if (f->mmap && f->fd >= 0)
73 mmap_cache_close_fd(f->mmap, f->fd);
7560fffc
LP
74
75 if (f->writable && f->fd >= 0)
76 fdatasync(f->fd);
77
d384c7a8 78 if (f->header) {
cd96b3b8
LP
79 /* Mark the file offline. Don't override the archived state if it already is set */
80 if (f->writable && f->header->state == STATE_ONLINE)
d384c7a8 81 f->header->state = STATE_OFFLINE;
cec736d2 82
d384c7a8
MS
83 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
84 }
cec736d2 85
0ac38b70
LP
86 if (f->fd >= 0)
87 close_nointr_nofail(f->fd);
88
cec736d2 89 free(f->path);
807e17f0 90
16e9f408
LP
91 if (f->mmap)
92 mmap_cache_unref(f->mmap);
93
807e17f0
LP
94#ifdef HAVE_XZ
95 free(f->compress_buffer);
96#endif
97
7560fffc 98#ifdef HAVE_GCRYPT
baed47c3
LP
99 if (f->fss_file)
100 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
b7c9ae91
LP
101 else if (f->fsprg_state)
102 free(f->fsprg_state);
103
104 free(f->fsprg_seed);
7560fffc
LP
105
106 if (f->hmac)
107 gcry_md_close(f->hmac);
108#endif
109
cec736d2
LP
110 free(f);
111}
112
0ac38b70 113static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
114 Header h;
115 ssize_t k;
116 int r;
117
118 assert(f);
119
120 zero(h);
7560fffc 121 memcpy(h.signature, HEADER_SIGNATURE, 8);
23b0b2b2 122 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2 123
7560fffc
LP
124 h.incompatible_flags =
125 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
126
127 h.compatible_flags =
baed47c3 128 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
7560fffc 129
cec736d2
LP
130 r = sd_id128_randomize(&h.file_id);
131 if (r < 0)
132 return r;
133
0ac38b70
LP
134 if (template) {
135 h.seqnum_id = template->header->seqnum_id;
beec0085 136 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
0ac38b70
LP
137 } else
138 h.seqnum_id = h.file_id;
cec736d2
LP
139
140 k = pwrite(f->fd, &h, sizeof(h), 0);
141 if (k < 0)
142 return -errno;
143
144 if (k != sizeof(h))
145 return -EIO;
146
147 return 0;
148}
149
150static int journal_file_refresh_header(JournalFile *f) {
151 int r;
de190aef 152 sd_id128_t boot_id;
cec736d2
LP
153
154 assert(f);
155
156 r = sd_id128_get_machine(&f->header->machine_id);
157 if (r < 0)
158 return r;
159
de190aef 160 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
161 if (r < 0)
162 return r;
163
de190aef
LP
164 if (sd_id128_equal(boot_id, f->header->boot_id))
165 f->tail_entry_monotonic_valid = true;
166
167 f->header->boot_id = boot_id;
168
169 f->header->state = STATE_ONLINE;
b788cc23 170
7560fffc
LP
171 /* Sync the online state to disk */
172 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
173 fdatasync(f->fd);
b788cc23 174
cec736d2
LP
175 return 0;
176}
177
178static int journal_file_verify_header(JournalFile *f) {
179 assert(f);
180
7560fffc 181 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
cec736d2
LP
182 return -EBADMSG;
183
7560fffc
LP
184 /* In both read and write mode we refuse to open files with
185 * incompatible flags we don't know */
807e17f0 186#ifdef HAVE_XZ
7560fffc 187 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
807e17f0
LP
188 return -EPROTONOSUPPORT;
189#else
cec736d2
LP
190 if (f->header->incompatible_flags != 0)
191 return -EPROTONOSUPPORT;
807e17f0 192#endif
cec736d2 193
7560fffc
LP
194 /* When open for writing we refuse to open files with
195 * compatible flags, too */
196 if (f->writable) {
197#ifdef HAVE_GCRYPT
baed47c3 198 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
7560fffc
LP
199 return -EPROTONOSUPPORT;
200#else
201 if (f->header->compatible_flags != 0)
202 return -EPROTONOSUPPORT;
203#endif
204 }
205
db11ac1a
LP
206 if (f->header->state >= _STATE_MAX)
207 return -EBADMSG;
208
dca6219e
LP
209 /* The first addition was n_data, so check that we are at least this large */
210 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
23b0b2b2
LP
211 return -EBADMSG;
212
baed47c3
LP
213 if ((le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_SEALED) &&
214 !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
beec0085
LP
215 return -EBADMSG;
216
db11ac1a
LP
217 if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
218 return -ENODATA;
219
220 if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
221 return -ENODATA;
222
223 if (!VALID64(f->header->data_hash_table_offset) ||
224 !VALID64(f->header->field_hash_table_offset) ||
225 !VALID64(f->header->tail_object_offset) ||
226 !VALID64(f->header->entry_array_offset))
cec736d2
LP
227 return -ENODATA;
228
229 if (f->writable) {
ccdbaf91 230 uint8_t state;
cec736d2
LP
231 sd_id128_t machine_id;
232 int r;
233
234 r = sd_id128_get_machine(&machine_id);
235 if (r < 0)
236 return r;
237
238 if (!sd_id128_equal(machine_id, f->header->machine_id))
239 return -EHOSTDOWN;
240
de190aef 241 state = f->header->state;
cec736d2 242
71fa6f00
LP
243 if (state == STATE_ONLINE) {
244 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
245 return -EBUSY;
246 } else if (state == STATE_ARCHIVED)
cec736d2 247 return -ESHUTDOWN;
71fa6f00
LP
248 else if (state != STATE_OFFLINE) {
249 log_debug("Journal file %s has unknown state %u.", f->path, state);
250 return -EBUSY;
251 }
cec736d2
LP
252 }
253
7560fffc 254 f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
c586dbf1
LP
255
256 if (f->writable)
257 f->seal = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_SEALED);
7560fffc 258
cec736d2
LP
259 return 0;
260}
261
262static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
cec736d2 263 uint64_t old_size, new_size;
fec2aa2f 264 int r;
cec736d2
LP
265
266 assert(f);
267
cec736d2 268 /* We assume that this file is not sparse, and we know that
38ac38b2 269 * for sure, since we always call posix_fallocate()
cec736d2
LP
270 * ourselves */
271
272 old_size =
23b0b2b2 273 le64toh(f->header->header_size) +
cec736d2
LP
274 le64toh(f->header->arena_size);
275
bc85bfee 276 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
277 if (new_size < le64toh(f->header->header_size))
278 new_size = le64toh(f->header->header_size);
bc85bfee
LP
279
280 if (new_size <= old_size)
cec736d2
LP
281 return 0;
282
bc85bfee
LP
283 if (f->metrics.max_size > 0 &&
284 new_size > f->metrics.max_size)
285 return -E2BIG;
cec736d2 286
bc85bfee
LP
287 if (new_size > f->metrics.min_size &&
288 f->metrics.keep_free > 0) {
cec736d2
LP
289 struct statvfs svfs;
290
291 if (fstatvfs(f->fd, &svfs) >= 0) {
292 uint64_t available;
293
294 available = svfs.f_bfree * svfs.f_bsize;
295
bc85bfee
LP
296 if (available >= f->metrics.keep_free)
297 available -= f->metrics.keep_free;
cec736d2
LP
298 else
299 available = 0;
300
301 if (new_size - old_size > available)
302 return -E2BIG;
303 }
304 }
305
bc85bfee
LP
306 /* Note that the glibc fallocate() fallback is very
307 inefficient, hence we try to minimize the allocation area
308 as we can. */
fec2aa2f
GV
309 r = posix_fallocate(f->fd, old_size, new_size - old_size);
310 if (r != 0)
311 return -r;
cec736d2 312
f65425cb
LP
313 mmap_cache_close_fd_range(f->mmap, f->fd, old_size);
314
cec736d2
LP
315 if (fstat(f->fd, &f->last_stat) < 0)
316 return -errno;
317
23b0b2b2 318 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
319
320 return 0;
321}
322
16e9f408 323static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, uint64_t size, void **ret) {
cec736d2 324 assert(f);
cec736d2
LP
325 assert(ret);
326
2a59ea54 327 /* Avoid SIGBUS on invalid accesses */
4bbdcdb3
LP
328 if (offset + size > (uint64_t) f->last_stat.st_size) {
329 /* Hmm, out of range? Let's refresh the fstat() data
330 * first, before we trust that check. */
331
332 if (fstat(f->fd, &f->last_stat) < 0 ||
333 offset + size > (uint64_t) f->last_stat.st_size)
334 return -EADDRNOTAVAIL;
335 }
336
16e9f408 337 return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
cec736d2
LP
338}
339
16e9f408
LP
340static uint64_t minimum_header_size(Object *o) {
341
342 static uint64_t table[] = {
343 [OBJECT_DATA] = sizeof(DataObject),
344 [OBJECT_FIELD] = sizeof(FieldObject),
345 [OBJECT_ENTRY] = sizeof(EntryObject),
346 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
347 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
348 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
349 [OBJECT_TAG] = sizeof(TagObject),
350 };
351
352 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
353 return sizeof(ObjectHeader);
354
355 return table[o->object.type];
356}
357
de190aef 358int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
359 int r;
360 void *t;
361 Object *o;
362 uint64_t s;
16e9f408 363 unsigned context;
cec736d2
LP
364
365 assert(f);
366 assert(ret);
367
db11ac1a
LP
368 /* Objects may only be located at multiple of 64 bit */
369 if (!VALID64(offset))
370 return -EFAULT;
371
16e9f408
LP
372 /* One context for each type, plus one catch-all for the rest */
373 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
374
375 r = journal_file_move_to(f, context, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
376 if (r < 0)
377 return r;
378
379 o = (Object*) t;
380 s = le64toh(o->object.size);
381
382 if (s < sizeof(ObjectHeader))
383 return -EBADMSG;
384
16e9f408
LP
385 if (o->object.type <= OBJECT_UNUSED)
386 return -EBADMSG;
387
388 if (s < minimum_header_size(o))
389 return -EBADMSG;
390
de190aef 391 if (type >= 0 && o->object.type != type)
cec736d2
LP
392 return -EBADMSG;
393
394 if (s > sizeof(ObjectHeader)) {
de190aef 395 r = journal_file_move_to(f, o->object.type, offset, s, &t);
cec736d2
LP
396 if (r < 0)
397 return r;
398
399 o = (Object*) t;
400 }
401
cec736d2
LP
402 *ret = o;
403 return 0;
404}
405
d98cc1f2 406static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
407 uint64_t r;
408
409 assert(f);
410
beec0085 411 r = le64toh(f->header->tail_entry_seqnum) + 1;
c2373f84
LP
412
413 if (seqnum) {
de190aef 414 /* If an external seqnum counter was passed, we update
c2373f84
LP
415 * both the local and the external one, and set it to
416 * the maximum of both */
417
418 if (*seqnum + 1 > r)
419 r = *seqnum + 1;
420
421 *seqnum = r;
422 }
423
beec0085 424 f->header->tail_entry_seqnum = htole64(r);
cec736d2 425
beec0085
LP
426 if (f->header->head_entry_seqnum == 0)
427 f->header->head_entry_seqnum = htole64(r);
de190aef 428
cec736d2
LP
429 return r;
430}
431
0284adc6 432int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
433 int r;
434 uint64_t p;
435 Object *tail, *o;
436 void *t;
437
438 assert(f);
16e9f408 439 assert(type > 0 && type < _OBJECT_TYPE_MAX);
cec736d2
LP
440 assert(size >= sizeof(ObjectHeader));
441 assert(offset);
442 assert(ret);
443
444 p = le64toh(f->header->tail_object_offset);
cec736d2 445 if (p == 0)
23b0b2b2 446 p = le64toh(f->header->header_size);
cec736d2 447 else {
de190aef 448 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
449 if (r < 0)
450 return r;
451
452 p += ALIGN64(le64toh(tail->object.size));
453 }
454
455 r = journal_file_allocate(f, p, size);
456 if (r < 0)
457 return r;
458
de190aef 459 r = journal_file_move_to(f, type, p, size, &t);
cec736d2
LP
460 if (r < 0)
461 return r;
462
463 o = (Object*) t;
464
465 zero(o->object);
de190aef 466 o->object.type = type;
cec736d2
LP
467 o->object.size = htole64(size);
468
469 f->header->tail_object_offset = htole64(p);
cec736d2
LP
470 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
471
472 *ret = o;
473 *offset = p;
474
475 return 0;
476}
477
de190aef 478static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
479 uint64_t s, p;
480 Object *o;
481 int r;
482
483 assert(f);
484
dfabe643 485 /* We estimate that we need 1 hash table entry per 768 of
4a92baf3
LP
486 journal file and we want to make sure we never get beyond
487 75% fill level. Calculate the hash table size for the
488 maximum file size based on these metrics. */
489
dfabe643 490 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
4a92baf3
LP
491 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
492 s = DEFAULT_DATA_HASH_TABLE_SIZE;
493
dfabe643 494 log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
4a92baf3 495
de190aef
LP
496 r = journal_file_append_object(f,
497 OBJECT_DATA_HASH_TABLE,
498 offsetof(Object, hash_table.items) + s,
499 &o, &p);
cec736d2
LP
500 if (r < 0)
501 return r;
502
de190aef 503 memset(o->hash_table.items, 0, s);
cec736d2 504
de190aef
LP
505 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
506 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
507
508 return 0;
509}
510
de190aef 511static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
512 uint64_t s, p;
513 Object *o;
514 int r;
515
516 assert(f);
517
de190aef
LP
518 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
519 r = journal_file_append_object(f,
520 OBJECT_FIELD_HASH_TABLE,
521 offsetof(Object, hash_table.items) + s,
522 &o, &p);
cec736d2
LP
523 if (r < 0)
524 return r;
525
de190aef 526 memset(o->hash_table.items, 0, s);
cec736d2 527
de190aef
LP
528 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
529 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
530
531 return 0;
532}
533
de190aef 534static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
535 uint64_t s, p;
536 void *t;
537 int r;
538
539 assert(f);
540
de190aef
LP
541 p = le64toh(f->header->data_hash_table_offset);
542 s = le64toh(f->header->data_hash_table_size);
cec736d2 543
de190aef 544 r = journal_file_move_to(f,
16e9f408 545 OBJECT_DATA_HASH_TABLE,
de190aef
LP
546 p, s,
547 &t);
cec736d2
LP
548 if (r < 0)
549 return r;
550
de190aef 551 f->data_hash_table = t;
cec736d2
LP
552 return 0;
553}
554
de190aef 555static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
556 uint64_t s, p;
557 void *t;
558 int r;
559
560 assert(f);
561
de190aef
LP
562 p = le64toh(f->header->field_hash_table_offset);
563 s = le64toh(f->header->field_hash_table_size);
cec736d2 564
de190aef 565 r = journal_file_move_to(f,
16e9f408 566 OBJECT_FIELD_HASH_TABLE,
de190aef
LP
567 p, s,
568 &t);
cec736d2
LP
569 if (r < 0)
570 return r;
571
de190aef 572 f->field_hash_table = t;
cec736d2
LP
573 return 0;
574}
575
de190aef
LP
576static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
577 uint64_t p, h;
cec736d2
LP
578 int r;
579
580 assert(f);
581 assert(o);
582 assert(offset > 0);
de190aef 583 assert(o->object.type == OBJECT_DATA);
cec736d2 584
48496df6
LP
585 /* This might alter the window we are looking at */
586
de190aef
LP
587 o->data.next_hash_offset = o->data.next_field_offset = 0;
588 o->data.entry_offset = o->data.entry_array_offset = 0;
589 o->data.n_entries = 0;
cec736d2 590
de190aef 591 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
8db4213e 592 p = le64toh(f->data_hash_table[h].tail_hash_offset);
cec736d2
LP
593 if (p == 0) {
594 /* Only entry in the hash table is easy */
de190aef 595 f->data_hash_table[h].head_hash_offset = htole64(offset);
cec736d2 596 } else {
48496df6
LP
597 /* Move back to the previous data object, to patch in
598 * pointer */
cec736d2 599
de190aef 600 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
601 if (r < 0)
602 return r;
603
de190aef 604 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
605 }
606
de190aef 607 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2 608
dca6219e
LP
609 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
610 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
611
cec736d2
LP
612 return 0;
613}
614
de190aef
LP
615int journal_file_find_data_object_with_hash(
616 JournalFile *f,
617 const void *data, uint64_t size, uint64_t hash,
618 Object **ret, uint64_t *offset) {
48496df6 619
de190aef 620 uint64_t p, osize, h;
cec736d2
LP
621 int r;
622
623 assert(f);
624 assert(data || size == 0);
625
626 osize = offsetof(Object, data.payload) + size;
627
bc85bfee
LP
628 if (f->header->data_hash_table_size == 0)
629 return -EBADMSG;
630
de190aef
LP
631 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
632 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 633
de190aef
LP
634 while (p > 0) {
635 Object *o;
cec736d2 636
de190aef 637 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
638 if (r < 0)
639 return r;
640
807e17f0 641 if (le64toh(o->data.hash) != hash)
85a131e8 642 goto next;
807e17f0
LP
643
644 if (o->object.flags & OBJECT_COMPRESSED) {
645#ifdef HAVE_XZ
b785c858 646 uint64_t l, rsize;
cec736d2 647
807e17f0
LP
648 l = le64toh(o->object.size);
649 if (l <= offsetof(Object, data.payload))
cec736d2
LP
650 return -EBADMSG;
651
807e17f0
LP
652 l -= offsetof(Object, data.payload);
653
654 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
655 return -EBADMSG;
656
b785c858 657 if (rsize == size &&
807e17f0
LP
658 memcmp(f->compress_buffer, data, size) == 0) {
659
660 if (ret)
661 *ret = o;
662
663 if (offset)
664 *offset = p;
665
666 return 1;
667 }
668#else
669 return -EPROTONOSUPPORT;
670#endif
671
672 } else if (le64toh(o->object.size) == osize &&
673 memcmp(o->data.payload, data, size) == 0) {
674
cec736d2
LP
675 if (ret)
676 *ret = o;
677
678 if (offset)
679 *offset = p;
680
de190aef 681 return 1;
cec736d2
LP
682 }
683
85a131e8 684 next:
cec736d2
LP
685 p = le64toh(o->data.next_hash_offset);
686 }
687
de190aef
LP
688 return 0;
689}
690
691int journal_file_find_data_object(
692 JournalFile *f,
693 const void *data, uint64_t size,
694 Object **ret, uint64_t *offset) {
695
696 uint64_t hash;
697
698 assert(f);
699 assert(data || size == 0);
700
701 hash = hash64(data, size);
702
703 return journal_file_find_data_object_with_hash(f,
704 data, size, hash,
705 ret, offset);
706}
707
48496df6
LP
708static int journal_file_append_data(
709 JournalFile *f,
710 const void *data, uint64_t size,
711 Object **ret, uint64_t *offset) {
712
de190aef
LP
713 uint64_t hash, p;
714 uint64_t osize;
715 Object *o;
716 int r;
807e17f0 717 bool compressed = false;
de190aef
LP
718
719 assert(f);
720 assert(data || size == 0);
721
722 hash = hash64(data, size);
723
724 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
725 if (r < 0)
726 return r;
727 else if (r > 0) {
728
729 if (ret)
730 *ret = o;
731
732 if (offset)
733 *offset = p;
734
735 return 0;
736 }
737
738 osize = offsetof(Object, data.payload) + size;
739 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
740 if (r < 0)
741 return r;
742
cec736d2 743 o->data.hash = htole64(hash);
807e17f0
LP
744
745#ifdef HAVE_XZ
746 if (f->compress &&
747 size >= COMPRESSION_SIZE_THRESHOLD) {
748 uint64_t rsize;
749
750 compressed = compress_blob(data, size, o->data.payload, &rsize);
751
752 if (compressed) {
753 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
754 o->object.flags |= OBJECT_COMPRESSED;
755
807e17f0
LP
756 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
757 }
758 }
759#endif
760
64825d3c 761 if (!compressed && size > 0)
807e17f0 762 memcpy(o->data.payload, data, size);
cec736d2 763
de190aef 764 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
765 if (r < 0)
766 return r;
767
b0af6f41
LP
768 r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
769 if (r < 0)
770 return r;
771
48496df6
LP
772 /* The linking might have altered the window, so let's
773 * refresh our pointer */
774 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
775 if (r < 0)
776 return r;
777
cec736d2
LP
778 if (ret)
779 *ret = o;
780
781 if (offset)
de190aef 782 *offset = p;
cec736d2
LP
783
784 return 0;
785}
786
787uint64_t journal_file_entry_n_items(Object *o) {
788 assert(o);
7be3aa17 789 assert(o->object.type == OBJECT_ENTRY);
cec736d2
LP
790
791 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
792}
793
0284adc6 794uint64_t journal_file_entry_array_n_items(Object *o) {
de190aef 795 assert(o);
7be3aa17 796 assert(o->object.type == OBJECT_ENTRY_ARRAY);
de190aef
LP
797
798 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
799}
800
801static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
802 le64_t *first,
803 le64_t *idx,
de190aef 804 uint64_t p) {
cec736d2 805 int r;
de190aef
LP
806 uint64_t n = 0, ap = 0, q, i, a, hidx;
807 Object *o;
808
cec736d2 809 assert(f);
de190aef
LP
810 assert(first);
811 assert(idx);
812 assert(p > 0);
cec736d2 813
de190aef
LP
814 a = le64toh(*first);
815 i = hidx = le64toh(*idx);
816 while (a > 0) {
817
818 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
819 if (r < 0)
820 return r;
cec736d2 821
de190aef
LP
822 n = journal_file_entry_array_n_items(o);
823 if (i < n) {
824 o->entry_array.items[i] = htole64(p);
825 *idx = htole64(hidx + 1);
826 return 0;
827 }
cec736d2 828
de190aef
LP
829 i -= n;
830 ap = a;
831 a = le64toh(o->entry_array.next_entry_array_offset);
832 }
833
834 if (hidx > n)
835 n = (hidx+1) * 2;
836 else
837 n = n * 2;
838
839 if (n < 4)
840 n = 4;
841
842 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
843 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
844 &o, &q);
cec736d2
LP
845 if (r < 0)
846 return r;
847
b0af6f41
LP
848 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
849 if (r < 0)
850 return r;
851
de190aef 852 o->entry_array.items[i] = htole64(p);
cec736d2 853
de190aef 854 if (ap == 0)
7be3aa17 855 *first = htole64(q);
cec736d2 856 else {
de190aef 857 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
858 if (r < 0)
859 return r;
860
de190aef
LP
861 o->entry_array.next_entry_array_offset = htole64(q);
862 }
cec736d2 863
2dee23eb
LP
864 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
865 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
866
de190aef
LP
867 *idx = htole64(hidx + 1);
868
869 return 0;
870}
cec736d2 871
de190aef 872static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
873 le64_t *extra,
874 le64_t *first,
875 le64_t *idx,
de190aef
LP
876 uint64_t p) {
877
878 int r;
879
880 assert(f);
881 assert(extra);
882 assert(first);
883 assert(idx);
884 assert(p > 0);
885
886 if (*idx == 0)
887 *extra = htole64(p);
888 else {
4fd052ae 889 le64_t i;
de190aef 890
7be3aa17 891 i = htole64(le64toh(*idx) - 1);
de190aef
LP
892 r = link_entry_into_array(f, first, &i, p);
893 if (r < 0)
894 return r;
cec736d2
LP
895 }
896
de190aef
LP
897 *idx = htole64(le64toh(*idx) + 1);
898 return 0;
899}
900
901static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
902 uint64_t p;
903 int r;
904 assert(f);
905 assert(o);
906 assert(offset > 0);
907
908 p = le64toh(o->entry.items[i].object_offset);
909 if (p == 0)
910 return -EINVAL;
911
912 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
913 if (r < 0)
914 return r;
915
de190aef
LP
916 return link_entry_into_array_plus_one(f,
917 &o->data.entry_offset,
918 &o->data.entry_array_offset,
919 &o->data.n_entries,
920 offset);
cec736d2
LP
921}
922
923static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 924 uint64_t n, i;
cec736d2
LP
925 int r;
926
927 assert(f);
928 assert(o);
929 assert(offset > 0);
de190aef 930 assert(o->object.type == OBJECT_ENTRY);
cec736d2 931
b788cc23
LP
932 __sync_synchronize();
933
cec736d2 934 /* Link up the entry itself */
de190aef
LP
935 r = link_entry_into_array(f,
936 &f->header->entry_array_offset,
937 &f->header->n_entries,
938 offset);
939 if (r < 0)
940 return r;
cec736d2 941
aaf53376 942 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
cec736d2 943
de190aef 944 if (f->header->head_entry_realtime == 0)
0ac38b70 945 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 946
0ac38b70 947 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
948 f->header->tail_entry_monotonic = o->entry.monotonic;
949
950 f->tail_entry_monotonic_valid = true;
cec736d2
LP
951
952 /* Link up the items */
953 n = journal_file_entry_n_items(o);
954 for (i = 0; i < n; i++) {
955 r = journal_file_link_entry_item(f, o, offset, i);
956 if (r < 0)
957 return r;
958 }
959
cec736d2
LP
960 return 0;
961}
962
963static int journal_file_append_entry_internal(
964 JournalFile *f,
965 const dual_timestamp *ts,
966 uint64_t xor_hash,
967 const EntryItem items[], unsigned n_items,
de190aef 968 uint64_t *seqnum,
cec736d2
LP
969 Object **ret, uint64_t *offset) {
970 uint64_t np;
971 uint64_t osize;
972 Object *o;
973 int r;
974
975 assert(f);
976 assert(items || n_items == 0);
de190aef 977 assert(ts);
cec736d2
LP
978
979 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
980
de190aef 981 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
982 if (r < 0)
983 return r;
984
d98cc1f2 985 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
cec736d2 986 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
987 o->entry.realtime = htole64(ts->realtime);
988 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
989 o->entry.xor_hash = htole64(xor_hash);
990 o->entry.boot_id = f->header->boot_id;
991
b0af6f41
LP
992 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
993 if (r < 0)
994 return r;
995
cec736d2
LP
996 r = journal_file_link_entry(f, o, np);
997 if (r < 0)
998 return r;
999
1000 if (ret)
1001 *ret = o;
1002
1003 if (offset)
1004 *offset = np;
1005
1006 return 0;
1007}
1008
cf244689 1009void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
1010 assert(f);
1011
1012 /* inotify() does not receive IN_MODIFY events from file
1013 * accesses done via mmap(). After each access we hence
1014 * trigger IN_MODIFY by truncating the journal file to its
1015 * current size which triggers IN_MODIFY. */
1016
bc85bfee
LP
1017 __sync_synchronize();
1018
50f20cfd
LP
1019 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1020 log_error("Failed to to truncate file to its own size: %m");
1021}
1022
de190aef 1023int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1024 unsigned i;
1025 EntryItem *items;
1026 int r;
1027 uint64_t xor_hash = 0;
de190aef 1028 struct dual_timestamp _ts;
cec736d2
LP
1029
1030 assert(f);
1031 assert(iovec || n_iovec == 0);
1032
de190aef
LP
1033 if (!f->writable)
1034 return -EPERM;
1035
1036 if (!ts) {
1037 dual_timestamp_get(&_ts);
1038 ts = &_ts;
1039 }
1040
1041 if (f->tail_entry_monotonic_valid &&
1042 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1043 return -EINVAL;
1044
7560fffc
LP
1045 r = journal_file_maybe_append_tag(f, ts->realtime);
1046 if (r < 0)
1047 return r;
1048
64825d3c
LP
1049 /* alloca() can't take 0, hence let's allocate at least one */
1050 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
cec736d2
LP
1051
1052 for (i = 0; i < n_iovec; i++) {
1053 uint64_t p;
1054 Object *o;
1055
1056 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1057 if (r < 0)
cf244689 1058 return r;
cec736d2
LP
1059
1060 xor_hash ^= le64toh(o->data.hash);
1061 items[i].object_offset = htole64(p);
de7b95cd 1062 items[i].hash = o->data.hash;
cec736d2
LP
1063 }
1064
de190aef 1065 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1066
50f20cfd
LP
1067 journal_file_post_change(f);
1068
cec736d2
LP
1069 return r;
1070}
1071
de190aef
LP
1072static int generic_array_get(JournalFile *f,
1073 uint64_t first,
1074 uint64_t i,
1075 Object **ret, uint64_t *offset) {
1076
cec736d2 1077 Object *o;
6c8a39b8 1078 uint64_t p = 0, a;
cec736d2
LP
1079 int r;
1080
1081 assert(f);
1082
de190aef
LP
1083 a = first;
1084 while (a > 0) {
1085 uint64_t n;
cec736d2 1086
de190aef
LP
1087 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1088 if (r < 0)
1089 return r;
cec736d2 1090
de190aef
LP
1091 n = journal_file_entry_array_n_items(o);
1092 if (i < n) {
1093 p = le64toh(o->entry_array.items[i]);
1094 break;
cec736d2
LP
1095 }
1096
de190aef
LP
1097 i -= n;
1098 a = le64toh(o->entry_array.next_entry_array_offset);
1099 }
1100
1101 if (a <= 0 || p <= 0)
1102 return 0;
1103
1104 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1105 if (r < 0)
1106 return r;
1107
1108 if (ret)
1109 *ret = o;
1110
1111 if (offset)
1112 *offset = p;
1113
1114 return 1;
1115}
1116
1117static int generic_array_get_plus_one(JournalFile *f,
1118 uint64_t extra,
1119 uint64_t first,
1120 uint64_t i,
1121 Object **ret, uint64_t *offset) {
1122
1123 Object *o;
1124
1125 assert(f);
1126
1127 if (i == 0) {
1128 int r;
1129
1130 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1131 if (r < 0)
1132 return r;
1133
de190aef
LP
1134 if (ret)
1135 *ret = o;
cec736d2 1136
de190aef
LP
1137 if (offset)
1138 *offset = extra;
cec736d2 1139
de190aef 1140 return 1;
cec736d2
LP
1141 }
1142
de190aef
LP
1143 return generic_array_get(f, first, i-1, ret, offset);
1144}
cec736d2 1145
de190aef
LP
1146enum {
1147 TEST_FOUND,
1148 TEST_LEFT,
1149 TEST_RIGHT
1150};
cec736d2 1151
de190aef
LP
1152static int generic_array_bisect(JournalFile *f,
1153 uint64_t first,
1154 uint64_t n,
1155 uint64_t needle,
1156 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1157 direction_t direction,
1158 Object **ret,
1159 uint64_t *offset,
1160 uint64_t *idx) {
1161
1162 uint64_t a, p, t = 0, i = 0, last_p = 0;
1163 bool subtract_one = false;
1164 Object *o, *array = NULL;
1165 int r;
cec736d2 1166
de190aef
LP
1167 assert(f);
1168 assert(test_object);
cec736d2 1169
de190aef
LP
1170 a = first;
1171 while (a > 0) {
1172 uint64_t left, right, k, lp;
1173
1174 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1175 if (r < 0)
1176 return r;
1177
de190aef
LP
1178 k = journal_file_entry_array_n_items(array);
1179 right = MIN(k, n);
1180 if (right <= 0)
1181 return 0;
cec736d2 1182
de190aef
LP
1183 i = right - 1;
1184 lp = p = le64toh(array->entry_array.items[i]);
1185 if (p <= 0)
1186 return -EBADMSG;
cec736d2 1187
de190aef
LP
1188 r = test_object(f, p, needle);
1189 if (r < 0)
1190 return r;
cec736d2 1191
de190aef
LP
1192 if (r == TEST_FOUND)
1193 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1194
1195 if (r == TEST_RIGHT) {
1196 left = 0;
1197 right -= 1;
1198 for (;;) {
1199 if (left == right) {
1200 if (direction == DIRECTION_UP)
1201 subtract_one = true;
1202
1203 i = left;
1204 goto found;
1205 }
1206
1207 assert(left < right);
1208
1209 i = (left + right) / 2;
1210 p = le64toh(array->entry_array.items[i]);
1211 if (p <= 0)
1212 return -EBADMSG;
1213
1214 r = test_object(f, p, needle);
1215 if (r < 0)
1216 return r;
cec736d2 1217
de190aef
LP
1218 if (r == TEST_FOUND)
1219 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1220
1221 if (r == TEST_RIGHT)
1222 right = i;
1223 else
1224 left = i + 1;
1225 }
1226 }
1227
cbdca852
LP
1228 if (k > n) {
1229 if (direction == DIRECTION_UP) {
1230 i = n;
1231 subtract_one = true;
1232 goto found;
1233 }
1234
cec736d2 1235 return 0;
cbdca852 1236 }
cec736d2 1237
de190aef
LP
1238 last_p = lp;
1239
1240 n -= k;
1241 t += k;
1242 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1243 }
1244
1245 return 0;
de190aef
LP
1246
1247found:
1248 if (subtract_one && t == 0 && i == 0)
1249 return 0;
1250
1251 if (subtract_one && i == 0)
1252 p = last_p;
1253 else if (subtract_one)
1254 p = le64toh(array->entry_array.items[i-1]);
1255 else
1256 p = le64toh(array->entry_array.items[i]);
1257
1258 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1259 if (r < 0)
1260 return r;
1261
1262 if (ret)
1263 *ret = o;
1264
1265 if (offset)
1266 *offset = p;
1267
1268 if (idx)
cbdca852 1269 *idx = t + i + (subtract_one ? -1 : 0);
de190aef
LP
1270
1271 return 1;
cec736d2
LP
1272}
1273
de190aef
LP
1274static int generic_array_bisect_plus_one(JournalFile *f,
1275 uint64_t extra,
1276 uint64_t first,
1277 uint64_t n,
1278 uint64_t needle,
1279 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1280 direction_t direction,
1281 Object **ret,
1282 uint64_t *offset,
1283 uint64_t *idx) {
1284
cec736d2 1285 int r;
cbdca852
LP
1286 bool step_back = false;
1287 Object *o;
cec736d2
LP
1288
1289 assert(f);
de190aef 1290 assert(test_object);
cec736d2 1291
de190aef
LP
1292 if (n <= 0)
1293 return 0;
cec736d2 1294
de190aef
LP
1295 /* This bisects the array in object 'first', but first checks
1296 * an extra */
de190aef
LP
1297 r = test_object(f, extra, needle);
1298 if (r < 0)
1299 return r;
a536e261
LP
1300
1301 if (r == TEST_FOUND)
1302 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1303
cbdca852
LP
1304 /* if we are looking with DIRECTION_UP then we need to first
1305 see if in the actual array there is a matching entry, and
1306 return the last one of that. But if there isn't any we need
1307 to return this one. Hence remember this, and return it
1308 below. */
1309 if (r == TEST_LEFT)
1310 step_back = direction == DIRECTION_UP;
de190aef 1311
cbdca852
LP
1312 if (r == TEST_RIGHT) {
1313 if (direction == DIRECTION_DOWN)
1314 goto found;
1315 else
1316 return 0;
a536e261 1317 }
cec736d2 1318
de190aef
LP
1319 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1320
cbdca852
LP
1321 if (r == 0 && step_back)
1322 goto found;
1323
ecf68b1d 1324 if (r > 0 && idx)
de190aef
LP
1325 (*idx) ++;
1326
1327 return r;
cbdca852
LP
1328
1329found:
1330 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1331 if (r < 0)
1332 return r;
1333
1334 if (ret)
1335 *ret = o;
1336
1337 if (offset)
1338 *offset = extra;
1339
1340 if (idx)
1341 *idx = 0;
1342
1343 return 1;
1344}
1345
1346static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1347 assert(f);
1348 assert(p > 0);
1349
1350 if (p == needle)
1351 return TEST_FOUND;
1352 else if (p < needle)
1353 return TEST_LEFT;
1354 else
1355 return TEST_RIGHT;
1356}
1357
1358int journal_file_move_to_entry_by_offset(
1359 JournalFile *f,
1360 uint64_t p,
1361 direction_t direction,
1362 Object **ret,
1363 uint64_t *offset) {
1364
1365 return generic_array_bisect(f,
1366 le64toh(f->header->entry_array_offset),
1367 le64toh(f->header->n_entries),
1368 p,
1369 test_object_offset,
1370 direction,
1371 ret, offset, NULL);
de190aef
LP
1372}
1373
cbdca852 1374
de190aef
LP
1375static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1376 Object *o;
1377 int r;
1378
1379 assert(f);
1380 assert(p > 0);
1381
1382 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1383 if (r < 0)
1384 return r;
1385
de190aef
LP
1386 if (le64toh(o->entry.seqnum) == needle)
1387 return TEST_FOUND;
1388 else if (le64toh(o->entry.seqnum) < needle)
1389 return TEST_LEFT;
1390 else
1391 return TEST_RIGHT;
1392}
cec736d2 1393
de190aef
LP
1394int journal_file_move_to_entry_by_seqnum(
1395 JournalFile *f,
1396 uint64_t seqnum,
1397 direction_t direction,
1398 Object **ret,
1399 uint64_t *offset) {
1400
1401 return generic_array_bisect(f,
1402 le64toh(f->header->entry_array_offset),
1403 le64toh(f->header->n_entries),
1404 seqnum,
1405 test_object_seqnum,
1406 direction,
1407 ret, offset, NULL);
1408}
cec736d2 1409
de190aef
LP
1410static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1411 Object *o;
1412 int r;
1413
1414 assert(f);
1415 assert(p > 0);
1416
1417 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1418 if (r < 0)
1419 return r;
1420
1421 if (le64toh(o->entry.realtime) == needle)
1422 return TEST_FOUND;
1423 else if (le64toh(o->entry.realtime) < needle)
1424 return TEST_LEFT;
1425 else
1426 return TEST_RIGHT;
cec736d2
LP
1427}
1428
de190aef
LP
1429int journal_file_move_to_entry_by_realtime(
1430 JournalFile *f,
1431 uint64_t realtime,
1432 direction_t direction,
1433 Object **ret,
1434 uint64_t *offset) {
1435
1436 return generic_array_bisect(f,
1437 le64toh(f->header->entry_array_offset),
1438 le64toh(f->header->n_entries),
1439 realtime,
1440 test_object_realtime,
1441 direction,
1442 ret, offset, NULL);
1443}
1444
1445static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1446 Object *o;
1447 int r;
1448
1449 assert(f);
1450 assert(p > 0);
1451
1452 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1453 if (r < 0)
1454 return r;
1455
1456 if (le64toh(o->entry.monotonic) == needle)
1457 return TEST_FOUND;
1458 else if (le64toh(o->entry.monotonic) < needle)
1459 return TEST_LEFT;
1460 else
1461 return TEST_RIGHT;
1462}
1463
1464int journal_file_move_to_entry_by_monotonic(
1465 JournalFile *f,
1466 sd_id128_t boot_id,
1467 uint64_t monotonic,
1468 direction_t direction,
1469 Object **ret,
1470 uint64_t *offset) {
1471
10b6f904 1472 char t[9+32+1] = "_BOOT_ID=";
de190aef
LP
1473 Object *o;
1474 int r;
1475
cbdca852 1476 assert(f);
de190aef 1477
cbdca852 1478 sd_id128_to_string(boot_id, t + 9);
de190aef
LP
1479 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1480 if (r < 0)
1481 return r;
cbdca852 1482 if (r == 0)
de190aef
LP
1483 return -ENOENT;
1484
1485 return generic_array_bisect_plus_one(f,
1486 le64toh(o->data.entry_offset),
1487 le64toh(o->data.entry_array_offset),
1488 le64toh(o->data.n_entries),
1489 monotonic,
1490 test_object_monotonic,
1491 direction,
1492 ret, offset, NULL);
1493}
1494
de190aef
LP
1495int journal_file_next_entry(
1496 JournalFile *f,
1497 Object *o, uint64_t p,
1498 direction_t direction,
1499 Object **ret, uint64_t *offset) {
1500
1501 uint64_t i, n;
cec736d2
LP
1502 int r;
1503
1504 assert(f);
de190aef
LP
1505 assert(p > 0 || !o);
1506
1507 n = le64toh(f->header->n_entries);
1508 if (n <= 0)
1509 return 0;
cec736d2
LP
1510
1511 if (!o)
de190aef 1512 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1513 else {
de190aef 1514 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1515 return -EINVAL;
1516
de190aef
LP
1517 r = generic_array_bisect(f,
1518 le64toh(f->header->entry_array_offset),
1519 le64toh(f->header->n_entries),
1520 p,
1521 test_object_offset,
1522 DIRECTION_DOWN,
1523 NULL, NULL,
1524 &i);
1525 if (r <= 0)
1526 return r;
1527
1528 if (direction == DIRECTION_DOWN) {
1529 if (i >= n - 1)
1530 return 0;
1531
1532 i++;
1533 } else {
1534 if (i <= 0)
1535 return 0;
1536
1537 i--;
1538 }
cec736d2
LP
1539 }
1540
de190aef
LP
1541 /* And jump to it */
1542 return generic_array_get(f,
1543 le64toh(f->header->entry_array_offset),
1544 i,
1545 ret, offset);
1546}
cec736d2 1547
de190aef
LP
1548int journal_file_skip_entry(
1549 JournalFile *f,
1550 Object *o, uint64_t p,
1551 int64_t skip,
1552 Object **ret, uint64_t *offset) {
1553
1554 uint64_t i, n;
1555 int r;
1556
1557 assert(f);
1558 assert(o);
1559 assert(p > 0);
1560
1561 if (o->object.type != OBJECT_ENTRY)
1562 return -EINVAL;
1563
1564 r = generic_array_bisect(f,
1565 le64toh(f->header->entry_array_offset),
1566 le64toh(f->header->n_entries),
1567 p,
1568 test_object_offset,
1569 DIRECTION_DOWN,
1570 NULL, NULL,
1571 &i);
1572 if (r <= 0)
cec736d2
LP
1573 return r;
1574
de190aef
LP
1575 /* Calculate new index */
1576 if (skip < 0) {
1577 if ((uint64_t) -skip >= i)
1578 i = 0;
1579 else
1580 i = i - (uint64_t) -skip;
1581 } else
1582 i += (uint64_t) skip;
cec736d2 1583
de190aef
LP
1584 n = le64toh(f->header->n_entries);
1585 if (n <= 0)
1586 return -EBADMSG;
cec736d2 1587
de190aef
LP
1588 if (i >= n)
1589 i = n-1;
1590
1591 return generic_array_get(f,
1592 le64toh(f->header->entry_array_offset),
1593 i,
1594 ret, offset);
cec736d2
LP
1595}
1596
de190aef
LP
1597int journal_file_next_entry_for_data(
1598 JournalFile *f,
1599 Object *o, uint64_t p,
1600 uint64_t data_offset,
1601 direction_t direction,
1602 Object **ret, uint64_t *offset) {
1603
1604 uint64_t n, i;
cec736d2 1605 int r;
de190aef 1606 Object *d;
cec736d2
LP
1607
1608 assert(f);
de190aef 1609 assert(p > 0 || !o);
cec736d2 1610
de190aef 1611 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 1612 if (r < 0)
de190aef 1613 return r;
cec736d2 1614
de190aef
LP
1615 n = le64toh(d->data.n_entries);
1616 if (n <= 0)
1617 return n;
cec736d2 1618
de190aef
LP
1619 if (!o)
1620 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1621 else {
1622 if (o->object.type != OBJECT_ENTRY)
1623 return -EINVAL;
cec736d2 1624
de190aef
LP
1625 r = generic_array_bisect_plus_one(f,
1626 le64toh(d->data.entry_offset),
1627 le64toh(d->data.entry_array_offset),
1628 le64toh(d->data.n_entries),
1629 p,
1630 test_object_offset,
1631 DIRECTION_DOWN,
1632 NULL, NULL,
1633 &i);
1634
1635 if (r <= 0)
cec736d2
LP
1636 return r;
1637
de190aef
LP
1638 if (direction == DIRECTION_DOWN) {
1639 if (i >= n - 1)
1640 return 0;
cec736d2 1641
de190aef
LP
1642 i++;
1643 } else {
1644 if (i <= 0)
1645 return 0;
cec736d2 1646
de190aef
LP
1647 i--;
1648 }
cec736d2 1649
de190aef 1650 }
cec736d2 1651
de190aef
LP
1652 return generic_array_get_plus_one(f,
1653 le64toh(d->data.entry_offset),
1654 le64toh(d->data.entry_array_offset),
1655 i,
1656 ret, offset);
1657}
cec736d2 1658
cbdca852
LP
1659int journal_file_move_to_entry_by_offset_for_data(
1660 JournalFile *f,
1661 uint64_t data_offset,
1662 uint64_t p,
1663 direction_t direction,
1664 Object **ret, uint64_t *offset) {
1665
1666 int r;
1667 Object *d;
1668
1669 assert(f);
1670
1671 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1672 if (r < 0)
1673 return r;
1674
1675 return generic_array_bisect_plus_one(f,
1676 le64toh(d->data.entry_offset),
1677 le64toh(d->data.entry_array_offset),
1678 le64toh(d->data.n_entries),
1679 p,
1680 test_object_offset,
1681 direction,
1682 ret, offset, NULL);
1683}
1684
1685int journal_file_move_to_entry_by_monotonic_for_data(
1686 JournalFile *f,
1687 uint64_t data_offset,
1688 sd_id128_t boot_id,
1689 uint64_t monotonic,
1690 direction_t direction,
1691 Object **ret, uint64_t *offset) {
1692
1693 char t[9+32+1] = "_BOOT_ID=";
1694 Object *o, *d;
1695 int r;
1696 uint64_t b, z;
1697
1698 assert(f);
1699
1700 /* First, seek by time */
1701 sd_id128_to_string(boot_id, t + 9);
1702 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1703 if (r < 0)
1704 return r;
1705 if (r == 0)
1706 return -ENOENT;
1707
1708 r = generic_array_bisect_plus_one(f,
1709 le64toh(o->data.entry_offset),
1710 le64toh(o->data.entry_array_offset),
1711 le64toh(o->data.n_entries),
1712 monotonic,
1713 test_object_monotonic,
1714 direction,
1715 NULL, &z, NULL);
1716 if (r <= 0)
1717 return r;
1718
1719 /* And now, continue seeking until we find an entry that
1720 * exists in both bisection arrays */
1721
1722 for (;;) {
1723 Object *qo;
1724 uint64_t p, q;
1725
1726 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1727 if (r < 0)
1728 return r;
1729
1730 r = generic_array_bisect_plus_one(f,
1731 le64toh(d->data.entry_offset),
1732 le64toh(d->data.entry_array_offset),
1733 le64toh(d->data.n_entries),
1734 z,
1735 test_object_offset,
1736 direction,
1737 NULL, &p, NULL);
1738 if (r <= 0)
1739 return r;
1740
1741 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1742 if (r < 0)
1743 return r;
1744
1745 r = generic_array_bisect_plus_one(f,
1746 le64toh(o->data.entry_offset),
1747 le64toh(o->data.entry_array_offset),
1748 le64toh(o->data.n_entries),
1749 p,
1750 test_object_offset,
1751 direction,
1752 &qo, &q, NULL);
1753
1754 if (r <= 0)
1755 return r;
1756
1757 if (p == q) {
1758 if (ret)
1759 *ret = qo;
1760 if (offset)
1761 *offset = q;
1762
1763 return 1;
1764 }
1765
1766 z = q;
1767 }
1768
1769 return 0;
1770}
1771
de190aef
LP
1772int journal_file_move_to_entry_by_seqnum_for_data(
1773 JournalFile *f,
1774 uint64_t data_offset,
1775 uint64_t seqnum,
1776 direction_t direction,
1777 Object **ret, uint64_t *offset) {
cec736d2 1778
de190aef
LP
1779 Object *d;
1780 int r;
cec736d2 1781
91a31dde
LP
1782 assert(f);
1783
de190aef 1784 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1785 if (r < 0)
de190aef 1786 return r;
cec736d2 1787
de190aef
LP
1788 return generic_array_bisect_plus_one(f,
1789 le64toh(d->data.entry_offset),
1790 le64toh(d->data.entry_array_offset),
1791 le64toh(d->data.n_entries),
1792 seqnum,
1793 test_object_seqnum,
1794 direction,
1795 ret, offset, NULL);
1796}
cec736d2 1797
de190aef
LP
1798int journal_file_move_to_entry_by_realtime_for_data(
1799 JournalFile *f,
1800 uint64_t data_offset,
1801 uint64_t realtime,
1802 direction_t direction,
1803 Object **ret, uint64_t *offset) {
1804
1805 Object *d;
1806 int r;
1807
91a31dde
LP
1808 assert(f);
1809
de190aef 1810 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1811 if (r < 0)
de190aef
LP
1812 return r;
1813
1814 return generic_array_bisect_plus_one(f,
1815 le64toh(d->data.entry_offset),
1816 le64toh(d->data.entry_array_offset),
1817 le64toh(d->data.n_entries),
1818 realtime,
1819 test_object_realtime,
1820 direction,
1821 ret, offset, NULL);
cec736d2
LP
1822}
1823
0284adc6 1824void journal_file_dump(JournalFile *f) {
7560fffc 1825 Object *o;
7560fffc 1826 int r;
0284adc6 1827 uint64_t p;
7560fffc
LP
1828
1829 assert(f);
1830
0284adc6 1831 journal_file_print_header(f);
7560fffc 1832
0284adc6
LP
1833 p = le64toh(f->header->header_size);
1834 while (p != 0) {
1835 r = journal_file_move_to_object(f, -1, p, &o);
1836 if (r < 0)
1837 goto fail;
7560fffc 1838
0284adc6 1839 switch (o->object.type) {
d98cc1f2 1840
0284adc6
LP
1841 case OBJECT_UNUSED:
1842 printf("Type: OBJECT_UNUSED\n");
1843 break;
d98cc1f2 1844
0284adc6
LP
1845 case OBJECT_DATA:
1846 printf("Type: OBJECT_DATA\n");
1847 break;
7560fffc 1848
0284adc6
LP
1849 case OBJECT_ENTRY:
1850 printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
1851 (unsigned long long) le64toh(o->entry.seqnum),
1852 (unsigned long long) le64toh(o->entry.monotonic),
1853 (unsigned long long) le64toh(o->entry.realtime));
1854 break;
7560fffc 1855
0284adc6
LP
1856 case OBJECT_FIELD_HASH_TABLE:
1857 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1858 break;
7560fffc 1859
0284adc6
LP
1860 case OBJECT_DATA_HASH_TABLE:
1861 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1862 break;
7560fffc 1863
0284adc6
LP
1864 case OBJECT_ENTRY_ARRAY:
1865 printf("Type: OBJECT_ENTRY_ARRAY\n");
1866 break;
7560fffc 1867
0284adc6
LP
1868 case OBJECT_TAG:
1869 printf("Type: OBJECT_TAG %llu\n",
1870 (unsigned long long) le64toh(o->tag.seqnum));
1871 break;
1872 }
7560fffc 1873
0284adc6
LP
1874 if (o->object.flags & OBJECT_COMPRESSED)
1875 printf("Flags: COMPRESSED\n");
7560fffc 1876
0284adc6
LP
1877 if (p == le64toh(f->header->tail_object_offset))
1878 p = 0;
1879 else
1880 p = p + ALIGN64(le64toh(o->object.size));
1881 }
7560fffc 1882
0284adc6
LP
1883 return;
1884fail:
1885 log_error("File corrupt");
7560fffc
LP
1886}
1887
0284adc6
LP
1888void journal_file_print_header(JournalFile *f) {
1889 char a[33], b[33], c[33];
1890 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
7560fffc
LP
1891
1892 assert(f);
7560fffc 1893
0284adc6
LP
1894 printf("File Path: %s\n"
1895 "File ID: %s\n"
1896 "Machine ID: %s\n"
1897 "Boot ID: %s\n"
1898 "Sequential Number ID: %s\n"
1899 "State: %s\n"
1900 "Compatible Flags:%s%s\n"
1901 "Incompatible Flags:%s%s\n"
1902 "Header size: %llu\n"
1903 "Arena size: %llu\n"
1904 "Data Hash Table Size: %llu\n"
1905 "Field Hash Table Size: %llu\n"
0284adc6
LP
1906 "Rotate Suggested: %s\n"
1907 "Head Sequential Number: %llu\n"
1908 "Tail Sequential Number: %llu\n"
1909 "Head Realtime Timestamp: %s\n"
3223f44f
LP
1910 "Tail Realtime Timestamp: %s\n"
1911 "Objects: %llu\n"
1912 "Entry Objects: %llu\n",
0284adc6
LP
1913 f->path,
1914 sd_id128_to_string(f->header->file_id, a),
1915 sd_id128_to_string(f->header->machine_id, b),
1916 sd_id128_to_string(f->header->boot_id, c),
1917 sd_id128_to_string(f->header->seqnum_id, c),
3223f44f
LP
1918 f->header->state == STATE_OFFLINE ? "OFFLINE" :
1919 f->header->state == STATE_ONLINE ? "ONLINE" :
1920 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
baed47c3
LP
1921 (f->header->compatible_flags & HEADER_COMPATIBLE_SEALED) ? " SEALED" : "",
1922 (f->header->compatible_flags & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
0284adc6
LP
1923 (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
1924 (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1925 (unsigned long long) le64toh(f->header->header_size),
1926 (unsigned long long) le64toh(f->header->arena_size),
1927 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1928 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
0284adc6
LP
1929 yes_no(journal_file_rotate_suggested(f)),
1930 (unsigned long long) le64toh(f->header->head_entry_seqnum),
1931 (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1932 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
3223f44f
LP
1933 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1934 (unsigned long long) le64toh(f->header->n_objects),
1935 (unsigned long long) le64toh(f->header->n_entries));
7560fffc 1936
0284adc6
LP
1937 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1938 printf("Data Objects: %llu\n"
1939 "Data Hash Table Fill: %.1f%%\n",
1940 (unsigned long long) le64toh(f->header->n_data),
1941 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
7560fffc 1942
0284adc6
LP
1943 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1944 printf("Field Objects: %llu\n"
1945 "Field Hash Table Fill: %.1f%%\n",
1946 (unsigned long long) le64toh(f->header->n_fields),
1947 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
3223f44f
LP
1948
1949 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
1950 printf("Tag Objects: %llu\n",
1951 (unsigned long long) le64toh(f->header->n_tags));
1952 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1953 printf("Entry Array Objects: %llu\n",
1954 (unsigned long long) le64toh(f->header->n_entry_arrays));
7560fffc
LP
1955}
1956
0284adc6
LP
1957int journal_file_open(
1958 const char *fname,
1959 int flags,
1960 mode_t mode,
1961 bool compress,
baed47c3 1962 bool seal,
0284adc6
LP
1963 JournalMetrics *metrics,
1964 MMapCache *mmap_cache,
1965 JournalFile *template,
1966 JournalFile **ret) {
7560fffc 1967
0284adc6
LP
1968 JournalFile *f;
1969 int r;
1970 bool newly_created = false;
7560fffc 1971
0284adc6 1972 assert(fname);
7560fffc 1973
0284adc6
LP
1974 if ((flags & O_ACCMODE) != O_RDONLY &&
1975 (flags & O_ACCMODE) != O_RDWR)
1976 return -EINVAL;
7560fffc 1977
a0108012
LP
1978 if (!endswith(fname, ".journal") &&
1979 !endswith(fname, ".journal~"))
0284adc6 1980 return -EINVAL;
7560fffc 1981
0284adc6
LP
1982 f = new0(JournalFile, 1);
1983 if (!f)
1984 return -ENOMEM;
7560fffc 1985
0284adc6
LP
1986 f->fd = -1;
1987 f->mode = mode;
7560fffc 1988
0284adc6
LP
1989 f->flags = flags;
1990 f->prot = prot_from_flags(flags);
1991 f->writable = (flags & O_ACCMODE) != O_RDONLY;
1992 f->compress = compress;
baed47c3 1993 f->seal = seal;
7560fffc 1994
0284adc6
LP
1995 if (mmap_cache)
1996 f->mmap = mmap_cache_ref(mmap_cache);
1997 else {
1998 /* One context for each type, plus the zeroth catchall
1999 * context. One fd for the file plus one for each type
2000 * (which we need during verification */
2001 f->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, 1 + _OBJECT_TYPE_MAX);
2002 if (!f->mmap) {
2003 r = -ENOMEM;
2004 goto fail;
2005 }
2006 }
7560fffc 2007
0284adc6
LP
2008 f->path = strdup(fname);
2009 if (!f->path) {
2010 r = -ENOMEM;
2011 goto fail;
2012 }
7560fffc 2013
0284adc6
LP
2014 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2015 if (f->fd < 0) {
2016 r = -errno;
2017 goto fail;
7560fffc 2018 }
7560fffc 2019
0284adc6
LP
2020 if (fstat(f->fd, &f->last_stat) < 0) {
2021 r = -errno;
2022 goto fail;
2023 }
7560fffc 2024
0284adc6
LP
2025 if (f->last_stat.st_size == 0 && f->writable) {
2026 newly_created = true;
7560fffc 2027
0284adc6 2028 /* Try to load the FSPRG state, and if we can't, then
baed47c3
LP
2029 * just don't do sealing */
2030 r = journal_file_fss_load(f);
0284adc6 2031 if (r < 0)
baed47c3 2032 f->seal = false;
7560fffc 2033
0284adc6
LP
2034 r = journal_file_init_header(f, template);
2035 if (r < 0)
2036 goto fail;
7560fffc 2037
0284adc6
LP
2038 if (fstat(f->fd, &f->last_stat) < 0) {
2039 r = -errno;
2040 goto fail;
2041 }
2042 }
7560fffc 2043
0284adc6
LP
2044 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2045 r = -EIO;
2046 goto fail;
2047 }
7560fffc 2048
0284adc6
LP
2049 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2050 if (f->header == MAP_FAILED) {
2051 f->header = NULL;
2052 r = -errno;
2053 goto fail;
2054 }
7560fffc 2055
0284adc6
LP
2056 if (!newly_created) {
2057 r = journal_file_verify_header(f);
2058 if (r < 0)
2059 goto fail;
2060 }
7560fffc 2061
0284adc6 2062 if (!newly_created && f->writable) {
baed47c3 2063 r = journal_file_fss_load(f);
0284adc6
LP
2064 if (r < 0)
2065 goto fail;
2066 }
cec736d2
LP
2067
2068 if (f->writable) {
4a92baf3
LP
2069 if (metrics) {
2070 journal_default_metrics(metrics, f->fd);
2071 f->metrics = *metrics;
2072 } else if (template)
2073 f->metrics = template->metrics;
2074
cec736d2
LP
2075 r = journal_file_refresh_header(f);
2076 if (r < 0)
2077 goto fail;
2078 }
2079
baed47c3 2080 r = journal_file_hmac_setup(f);
14d10188
LP
2081 if (r < 0)
2082 goto fail;
2083
cec736d2 2084 if (newly_created) {
de190aef 2085 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
2086 if (r < 0)
2087 goto fail;
2088
de190aef 2089 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
2090 if (r < 0)
2091 goto fail;
7560fffc
LP
2092
2093 r = journal_file_append_first_tag(f);
2094 if (r < 0)
2095 goto fail;
cec736d2
LP
2096 }
2097
de190aef 2098 r = journal_file_map_field_hash_table(f);
cec736d2
LP
2099 if (r < 0)
2100 goto fail;
2101
de190aef 2102 r = journal_file_map_data_hash_table(f);
cec736d2
LP
2103 if (r < 0)
2104 goto fail;
2105
2106 if (ret)
2107 *ret = f;
2108
2109 return 0;
2110
2111fail:
2112 journal_file_close(f);
2113
2114 return r;
2115}
0ac38b70 2116
baed47c3 2117int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
0ac38b70
LP
2118 char *p;
2119 size_t l;
2120 JournalFile *old_file, *new_file = NULL;
2121 int r;
2122
2123 assert(f);
2124 assert(*f);
2125
2126 old_file = *f;
2127
2128 if (!old_file->writable)
2129 return -EINVAL;
2130
2131 if (!endswith(old_file->path, ".journal"))
2132 return -EINVAL;
2133
2134 l = strlen(old_file->path);
2135
9447a7f1 2136 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
0ac38b70
LP
2137 if (!p)
2138 return -ENOMEM;
2139
2140 memcpy(p, old_file->path, l - 8);
2141 p[l-8] = '@';
2142 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2143 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2144 "-%016llx-%016llx.journal",
beec0085 2145 (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
0ac38b70
LP
2146 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2147
2148 r = rename(old_file->path, p);
2149 free(p);
2150
2151 if (r < 0)
2152 return -errno;
2153
ccdbaf91 2154 old_file->header->state = STATE_ARCHIVED;
0ac38b70 2155
baed47c3 2156 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
0ac38b70
LP
2157 journal_file_close(old_file);
2158
2159 *f = new_file;
2160 return r;
2161}
2162
9447a7f1
LP
2163int journal_file_open_reliably(
2164 const char *fname,
2165 int flags,
2166 mode_t mode,
7560fffc 2167 bool compress,
baed47c3 2168 bool seal,
4a92baf3 2169 JournalMetrics *metrics,
27370278 2170 MMapCache *mmap_cache,
9447a7f1
LP
2171 JournalFile *template,
2172 JournalFile **ret) {
2173
2174 int r;
2175 size_t l;
2176 char *p;
2177
baed47c3 2178 r = journal_file_open(fname, flags, mode, compress, seal,
27370278 2179 metrics, mmap_cache, template, ret);
0071d9f1
LP
2180 if (r != -EBADMSG && /* corrupted */
2181 r != -ENODATA && /* truncated */
2182 r != -EHOSTDOWN && /* other machine */
a1a1898f
LP
2183 r != -EPROTONOSUPPORT && /* incompatible feature */
2184 r != -EBUSY && /* unclean shutdown */
2185 r != -ESHUTDOWN /* already archived */)
9447a7f1
LP
2186 return r;
2187
2188 if ((flags & O_ACCMODE) == O_RDONLY)
2189 return r;
2190
2191 if (!(flags & O_CREAT))
2192 return r;
2193
7560fffc
LP
2194 if (!endswith(fname, ".journal"))
2195 return r;
2196
5c70eab4
LP
2197 /* The file is corrupted. Rotate it away and try it again (but only once) */
2198
9447a7f1
LP
2199 l = strlen(fname);
2200 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2201 (int) (l-8), fname,
2202 (unsigned long long) now(CLOCK_REALTIME),
2203 random_ull()) < 0)
2204 return -ENOMEM;
2205
2206 r = rename(fname, p);
2207 free(p);
2208 if (r < 0)
2209 return -errno;
2210
a1a1898f 2211 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
9447a7f1 2212
baed47c3 2213 return journal_file_open(fname, flags, mode, compress, seal,
27370278 2214 metrics, mmap_cache, template, ret);
9447a7f1
LP
2215}
2216
cf244689
LP
2217
2218int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2219 uint64_t i, n;
2220 uint64_t q, xor_hash = 0;
2221 int r;
2222 EntryItem *items;
2223 dual_timestamp ts;
2224
2225 assert(from);
2226 assert(to);
2227 assert(o);
2228 assert(p);
2229
2230 if (!to->writable)
2231 return -EPERM;
2232
2233 ts.monotonic = le64toh(o->entry.monotonic);
2234 ts.realtime = le64toh(o->entry.realtime);
2235
2236 if (to->tail_entry_monotonic_valid &&
2237 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2238 return -EINVAL;
2239
cf244689
LP
2240 n = journal_file_entry_n_items(o);
2241 items = alloca(sizeof(EntryItem) * n);
2242
2243 for (i = 0; i < n; i++) {
4fd052ae
FC
2244 uint64_t l, h;
2245 le64_t le_hash;
cf244689
LP
2246 size_t t;
2247 void *data;
2248 Object *u;
2249
2250 q = le64toh(o->entry.items[i].object_offset);
2251 le_hash = o->entry.items[i].hash;
2252
2253 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2254 if (r < 0)
2255 return r;
2256
2257 if (le_hash != o->data.hash)
2258 return -EBADMSG;
2259
2260 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2261 t = (size_t) l;
2262
2263 /* We hit the limit on 32bit machines */
2264 if ((uint64_t) t != l)
2265 return -E2BIG;
2266
2267 if (o->object.flags & OBJECT_COMPRESSED) {
2268#ifdef HAVE_XZ
2269 uint64_t rsize;
2270
2271 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2272 return -EBADMSG;
2273
2274 data = from->compress_buffer;
2275 l = rsize;
2276#else
2277 return -EPROTONOSUPPORT;
2278#endif
2279 } else
2280 data = o->data.payload;
2281
2282 r = journal_file_append_data(to, data, l, &u, &h);
2283 if (r < 0)
2284 return r;
2285
2286 xor_hash ^= le64toh(u->data.hash);
2287 items[i].object_offset = htole64(h);
2288 items[i].hash = u->data.hash;
2289
2290 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2291 if (r < 0)
2292 return r;
2293 }
2294
2295 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2296}
babfc091
LP
2297
2298void journal_default_metrics(JournalMetrics *m, int fd) {
2299 uint64_t fs_size = 0;
2300 struct statvfs ss;
a7bc2c2a 2301 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2302
2303 assert(m);
2304 assert(fd >= 0);
2305
2306 if (fstatvfs(fd, &ss) >= 0)
2307 fs_size = ss.f_frsize * ss.f_blocks;
2308
2309 if (m->max_use == (uint64_t) -1) {
2310
2311 if (fs_size > 0) {
2312 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2313
2314 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2315 m->max_use = DEFAULT_MAX_USE_UPPER;
2316
2317 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2318 m->max_use = DEFAULT_MAX_USE_LOWER;
2319 } else
2320 m->max_use = DEFAULT_MAX_USE_LOWER;
2321 } else {
2322 m->max_use = PAGE_ALIGN(m->max_use);
2323
2324 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2325 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2326 }
2327
2328 if (m->max_size == (uint64_t) -1) {
2329 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2330
2331 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2332 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2333 } else
2334 m->max_size = PAGE_ALIGN(m->max_size);
2335
2336 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2337 m->max_size = JOURNAL_FILE_SIZE_MIN;
2338
2339 if (m->max_size*2 > m->max_use)
2340 m->max_use = m->max_size*2;
2341
2342 if (m->min_size == (uint64_t) -1)
2343 m->min_size = JOURNAL_FILE_SIZE_MIN;
2344 else {
2345 m->min_size = PAGE_ALIGN(m->min_size);
2346
2347 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2348 m->min_size = JOURNAL_FILE_SIZE_MIN;
2349
2350 if (m->min_size > m->max_size)
2351 m->max_size = m->min_size;
2352 }
2353
2354 if (m->keep_free == (uint64_t) -1) {
2355
2356 if (fs_size > 0) {
2357 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2358
2359 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2360 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2361
2362 } else
2363 m->keep_free = DEFAULT_KEEP_FREE;
2364 }
2365
e7bf07b3
LP
2366 log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2367 format_bytes(a, sizeof(a), m->max_use),
2368 format_bytes(b, sizeof(b), m->max_size),
2369 format_bytes(c, sizeof(c), m->min_size),
2370 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2371}
08984293
LP
2372
2373int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
08984293
LP
2374 assert(f);
2375 assert(from || to);
2376
2377 if (from) {
162566a4
LP
2378 if (f->header->head_entry_realtime == 0)
2379 return -ENOENT;
08984293 2380
162566a4 2381 *from = le64toh(f->header->head_entry_realtime);
08984293
LP
2382 }
2383
2384 if (to) {
162566a4
LP
2385 if (f->header->tail_entry_realtime == 0)
2386 return -ENOENT;
08984293 2387
162566a4 2388 *to = le64toh(f->header->tail_entry_realtime);
08984293
LP
2389 }
2390
2391 return 1;
2392}
2393
2394int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2395 char t[9+32+1] = "_BOOT_ID=";
2396 Object *o;
2397 uint64_t p;
2398 int r;
2399
2400 assert(f);
2401 assert(from || to);
2402
2403 sd_id128_to_string(boot_id, t + 9);
2404
2405 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2406 if (r <= 0)
2407 return r;
2408
2409 if (le64toh(o->data.n_entries) <= 0)
2410 return 0;
2411
2412 if (from) {
2413 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2414 if (r < 0)
2415 return r;
2416
2417 *from = le64toh(o->entry.monotonic);
2418 }
2419
2420 if (to) {
2421 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2422 if (r < 0)
2423 return r;
2424
2425 r = generic_array_get_plus_one(f,
2426 le64toh(o->data.entry_offset),
2427 le64toh(o->data.entry_array_offset),
2428 le64toh(o->data.n_entries)-1,
2429 &o, NULL);
2430 if (r <= 0)
2431 return r;
2432
2433 *to = le64toh(o->entry.monotonic);
2434 }
2435
2436 return 1;
2437}
dca6219e
LP
2438
2439bool journal_file_rotate_suggested(JournalFile *f) {
2440 assert(f);
2441
2442 /* If we gained new header fields we gained new features,
2443 * hence suggest a rotation */
361f9cbc
LP
2444 if (le64toh(f->header->header_size) < sizeof(Header)) {
2445 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
dca6219e 2446 return true;
361f9cbc 2447 }
dca6219e
LP
2448
2449 /* Let's check if the hash tables grew over a certain fill
2450 * level (75%, borrowing this value from Java's hash table
2451 * implementation), and if so suggest a rotation. To calculate
2452 * the fill level we need the n_data field, which only exists
2453 * in newer versions. */
2454
2455 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
361f9cbc
LP
2456 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2457 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2458 f->path,
2459 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2460 (unsigned long long) le64toh(f->header->n_data),
2461 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2462 (unsigned long long) (f->last_stat.st_size),
2463 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
dca6219e 2464 return true;
361f9cbc 2465 }
dca6219e
LP
2466
2467 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
361f9cbc
LP
2468 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2469 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2470 f->path,
2471 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2472 (unsigned long long) le64toh(f->header->n_fields),
2473 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
dca6219e 2474 return true;
361f9cbc 2475 }
dca6219e
LP
2476
2477 return false;
2478}