]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
journald: initial version of FSPRG hookup
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
30#include "journal-def.h"
31#include "journal-file.h"
32#include "lookup3.h"
807e17f0 33#include "compress.h"
7560fffc 34#include "fsprg.h"
cec736d2 35
4a92baf3
LP
36#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
37#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
cec736d2 38
1fa80181 39#define DEFAULT_WINDOW_SIZE (8ULL*1024ULL*1024ULL)
cec736d2 40
be19b7df 41#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 42
babfc091 43/* This is the minimum journal file size */
b47ffcfd 44#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
babfc091
LP
45
46/* These are the lower and upper bounds if we deduce the max_use value
47 * from the file system size */
48#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
49#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
50
51/* This is the upper bound if we deduce max_size from max_use */
71100051 52#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
53
54/* This is the upper bound if we deduce the keep_free value from the
55 * file system size */
56#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
57
58/* This is the keep_free value when we can't determine the system
59 * size */
60#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
61
dca6219e
LP
62/* n_data was the first entry we added after the initial file format design */
63#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
cec736d2
LP
64
65#define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
66
dca6219e
LP
67#define JOURNAL_HEADER_CONTAINS(h, field) \
68 (le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field))
69
7560fffc 70static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime);
dca6219e 71
cec736d2 72void journal_file_close(JournalFile *f) {
de190aef 73 int t;
cec736d2 74
de190aef 75 assert(f);
cec736d2 76
7560fffc
LP
77 /* Sync everything to disk, before we mark the file offline */
78 for (t = 0; t < _WINDOW_MAX; t++)
79 if (f->windows[t].ptr)
80 munmap(f->windows[t].ptr, f->windows[t].size);
81
82 if (f->writable && f->fd >= 0)
83 fdatasync(f->fd);
84
d384c7a8 85 if (f->header) {
cd96b3b8
LP
86 /* Mark the file offline. Don't override the archived state if it already is set */
87 if (f->writable && f->header->state == STATE_ONLINE)
d384c7a8 88 f->header->state = STATE_OFFLINE;
cec736d2 89
d384c7a8
MS
90 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
91 }
cec736d2 92
0ac38b70
LP
93 if (f->fd >= 0)
94 close_nointr_nofail(f->fd);
95
cec736d2 96 free(f->path);
807e17f0
LP
97
98#ifdef HAVE_XZ
99 free(f->compress_buffer);
100#endif
101
7560fffc
LP
102#ifdef HAVE_GCRYPT
103 if (f->fsprg_header)
104 munmap(f->fsprg_header, PAGE_ALIGN(f->fsprg_size));
105
106 if (f->hmac)
107 gcry_md_close(f->hmac);
108#endif
109
cec736d2
LP
110 free(f);
111}
112
0ac38b70 113static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
114 Header h;
115 ssize_t k;
116 int r;
117
118 assert(f);
119
120 zero(h);
7560fffc 121 memcpy(h.signature, HEADER_SIGNATURE, 8);
23b0b2b2 122 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2 123
7560fffc
LP
124 h.incompatible_flags =
125 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
126
127 h.compatible_flags =
128 htole32(f->authenticate ? HEADER_COMPATIBLE_AUTHENTICATED : 0);
129
cec736d2
LP
130 r = sd_id128_randomize(&h.file_id);
131 if (r < 0)
132 return r;
133
0ac38b70
LP
134 if (template) {
135 h.seqnum_id = template->header->seqnum_id;
dca6219e 136 h.tail_seqnum = template->header->tail_seqnum;
0ac38b70
LP
137 } else
138 h.seqnum_id = h.file_id;
cec736d2
LP
139
140 k = pwrite(f->fd, &h, sizeof(h), 0);
141 if (k < 0)
142 return -errno;
143
144 if (k != sizeof(h))
145 return -EIO;
146
147 return 0;
148}
149
150static int journal_file_refresh_header(JournalFile *f) {
151 int r;
de190aef 152 sd_id128_t boot_id;
cec736d2
LP
153
154 assert(f);
155
156 r = sd_id128_get_machine(&f->header->machine_id);
157 if (r < 0)
158 return r;
159
de190aef 160 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
161 if (r < 0)
162 return r;
163
de190aef
LP
164 if (sd_id128_equal(boot_id, f->header->boot_id))
165 f->tail_entry_monotonic_valid = true;
166
167 f->header->boot_id = boot_id;
168
169 f->header->state = STATE_ONLINE;
b788cc23 170
7560fffc
LP
171 /* Sync the online state to disk */
172 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
173 fdatasync(f->fd);
b788cc23 174
cec736d2
LP
175 return 0;
176}
177
178static int journal_file_verify_header(JournalFile *f) {
179 assert(f);
180
7560fffc 181 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
cec736d2
LP
182 return -EBADMSG;
183
7560fffc
LP
184 /* In both read and write mode we refuse to open files with
185 * incompatible flags we don't know */
807e17f0 186#ifdef HAVE_XZ
7560fffc 187 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
807e17f0
LP
188 return -EPROTONOSUPPORT;
189#else
cec736d2
LP
190 if (f->header->incompatible_flags != 0)
191 return -EPROTONOSUPPORT;
807e17f0 192#endif
cec736d2 193
7560fffc
LP
194 /* When open for writing we refuse to open files with
195 * compatible flags, too */
196 if (f->writable) {
197#ifdef HAVE_GCRYPT
198 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_AUTHENTICATED) != 0)
199 return -EPROTONOSUPPORT;
200#else
201 if (f->header->compatible_flags != 0)
202 return -EPROTONOSUPPORT;
203#endif
204 }
205
dca6219e
LP
206 /* The first addition was n_data, so check that we are at least this large */
207 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
23b0b2b2
LP
208 return -EBADMSG;
209
210 if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
cec736d2
LP
211 return -ENODATA;
212
213 if (f->writable) {
ccdbaf91 214 uint8_t state;
cec736d2
LP
215 sd_id128_t machine_id;
216 int r;
217
218 r = sd_id128_get_machine(&machine_id);
219 if (r < 0)
220 return r;
221
222 if (!sd_id128_equal(machine_id, f->header->machine_id))
223 return -EHOSTDOWN;
224
de190aef 225 state = f->header->state;
cec736d2 226
71fa6f00
LP
227 if (state == STATE_ONLINE) {
228 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
229 return -EBUSY;
230 } else if (state == STATE_ARCHIVED)
cec736d2 231 return -ESHUTDOWN;
71fa6f00
LP
232 else if (state != STATE_OFFLINE) {
233 log_debug("Journal file %s has unknown state %u.", f->path, state);
234 return -EBUSY;
235 }
cec736d2
LP
236 }
237
7560fffc
LP
238 f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
239 f->authenticate = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED);
240
cec736d2
LP
241 return 0;
242}
243
244static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
cec736d2 245 uint64_t old_size, new_size;
fec2aa2f 246 int r;
cec736d2
LP
247
248 assert(f);
249
cec736d2 250 /* We assume that this file is not sparse, and we know that
38ac38b2 251 * for sure, since we always call posix_fallocate()
cec736d2
LP
252 * ourselves */
253
254 old_size =
23b0b2b2 255 le64toh(f->header->header_size) +
cec736d2
LP
256 le64toh(f->header->arena_size);
257
bc85bfee 258 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
259 if (new_size < le64toh(f->header->header_size))
260 new_size = le64toh(f->header->header_size);
bc85bfee
LP
261
262 if (new_size <= old_size)
cec736d2
LP
263 return 0;
264
bc85bfee
LP
265 if (f->metrics.max_size > 0 &&
266 new_size > f->metrics.max_size)
267 return -E2BIG;
cec736d2 268
bc85bfee
LP
269 if (new_size > f->metrics.min_size &&
270 f->metrics.keep_free > 0) {
cec736d2
LP
271 struct statvfs svfs;
272
273 if (fstatvfs(f->fd, &svfs) >= 0) {
274 uint64_t available;
275
276 available = svfs.f_bfree * svfs.f_bsize;
277
bc85bfee
LP
278 if (available >= f->metrics.keep_free)
279 available -= f->metrics.keep_free;
cec736d2
LP
280 else
281 available = 0;
282
283 if (new_size - old_size > available)
284 return -E2BIG;
285 }
286 }
287
bc85bfee
LP
288 /* Note that the glibc fallocate() fallback is very
289 inefficient, hence we try to minimize the allocation area
290 as we can. */
fec2aa2f
GV
291 r = posix_fallocate(f->fd, old_size, new_size - old_size);
292 if (r != 0)
293 return -r;
cec736d2
LP
294
295 if (fstat(f->fd, &f->last_stat) < 0)
296 return -errno;
297
23b0b2b2 298 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
299
300 return 0;
301}
302
303static int journal_file_map(
304 JournalFile *f,
305 uint64_t offset,
306 uint64_t size,
307 void **_window,
308 uint64_t *_woffset,
309 uint64_t *_wsize,
310 void **ret) {
311
312 uint64_t woffset, wsize;
313 void *window;
314
315 assert(f);
316 assert(size > 0);
317 assert(ret);
318
319 woffset = offset & ~((uint64_t) page_size() - 1ULL);
320 wsize = size + (offset - woffset);
321 wsize = PAGE_ALIGN(wsize);
322
2a59ea54
LP
323 /* Avoid SIGBUS on invalid accesses */
324 if (woffset + wsize > (uint64_t) PAGE_ALIGN(f->last_stat.st_size))
325 return -EADDRNOTAVAIL;
326
cec736d2
LP
327 window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset);
328 if (window == MAP_FAILED)
329 return -errno;
330
331 if (_window)
332 *_window = window;
333
334 if (_woffset)
335 *_woffset = woffset;
336
337 if (_wsize)
338 *_wsize = wsize;
339
340 *ret = (uint8_t*) window + (offset - woffset);
341
342 return 0;
343}
344
de190aef 345static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_t size, void **ret) {
6c8a39b8 346 void *p = NULL;
cec736d2
LP
347 uint64_t delta;
348 int r;
de190aef 349 Window *w;
cec736d2
LP
350
351 assert(f);
352 assert(ret);
de190aef
LP
353 assert(wt >= 0);
354 assert(wt < _WINDOW_MAX);
cec736d2 355
4bbdcdb3
LP
356 if (offset + size > (uint64_t) f->last_stat.st_size) {
357 /* Hmm, out of range? Let's refresh the fstat() data
358 * first, before we trust that check. */
359
360 if (fstat(f->fd, &f->last_stat) < 0 ||
361 offset + size > (uint64_t) f->last_stat.st_size)
362 return -EADDRNOTAVAIL;
363 }
364
de190aef 365 w = f->windows + wt;
cec736d2 366
de190aef
LP
367 if (_likely_(w->ptr &&
368 w->offset <= offset &&
369 w->offset + w->size >= offset + size)) {
370
371 *ret = (uint8_t*) w->ptr + (offset - w->offset);
cec736d2
LP
372 return 0;
373 }
374
de190aef
LP
375 if (w->ptr) {
376 if (munmap(w->ptr, w->size) < 0)
cec736d2
LP
377 return -errno;
378
de190aef
LP
379 w->ptr = NULL;
380 w->size = w->offset = 0;
cec736d2
LP
381 }
382
383 if (size < DEFAULT_WINDOW_SIZE) {
384 /* If the default window size is larger then what was
385 * asked for extend the mapping a bit in the hope to
386 * minimize needed remappings later on. We add half
387 * the window space before and half behind the
388 * requested mapping */
389
1921a5cb 390 delta = (DEFAULT_WINDOW_SIZE - size) / 2;
cec736d2 391
a99c349d 392 if (delta > offset)
cec736d2
LP
393 delta = offset;
394
395 offset -= delta;
a99c349d 396 size = DEFAULT_WINDOW_SIZE;
cec736d2
LP
397 } else
398 delta = 0;
399
2a59ea54 400 if (offset + size > (uint64_t) f->last_stat.st_size)
1921a5cb 401 size = (uint64_t) f->last_stat.st_size - offset;
2a59ea54
LP
402
403 if (size <= 0)
404 return -EADDRNOTAVAIL;
405
cec736d2
LP
406 r = journal_file_map(f,
407 offset, size,
de190aef
LP
408 &w->ptr, &w->offset, &w->size,
409 &p);
cec736d2
LP
410
411 if (r < 0)
412 return r;
413
414 *ret = (uint8_t*) p + delta;
415 return 0;
416}
417
418static bool verify_hash(Object *o) {
de190aef 419 uint64_t h1, h2;
cec736d2
LP
420
421 assert(o);
422
807e17f0 423 if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
cec736d2 424 h1 = le64toh(o->data.hash);
de190aef
LP
425 h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
426 } else if (o->object.type == OBJECT_FIELD) {
427 h1 = le64toh(o->field.hash);
428 h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
429 } else
430 return true;
cec736d2 431
de190aef 432 return h1 == h2;
cec736d2
LP
433}
434
de190aef 435int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
436 int r;
437 void *t;
438 Object *o;
439 uint64_t s;
440
441 assert(f);
442 assert(ret);
de190aef 443 assert(type < _OBJECT_TYPE_MAX);
cec736d2 444
de190aef 445 r = journal_file_move_to(f, type >= 0 ? type : WINDOW_UNKNOWN, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
446 if (r < 0)
447 return r;
448
449 o = (Object*) t;
450 s = le64toh(o->object.size);
451
452 if (s < sizeof(ObjectHeader))
453 return -EBADMSG;
454
de190aef 455 if (type >= 0 && o->object.type != type)
cec736d2
LP
456 return -EBADMSG;
457
458 if (s > sizeof(ObjectHeader)) {
de190aef 459 r = journal_file_move_to(f, o->object.type, offset, s, &t);
cec736d2
LP
460 if (r < 0)
461 return r;
462
463 o = (Object*) t;
464 }
465
466 if (!verify_hash(o))
467 return -EBADMSG;
468
469 *ret = o;
470 return 0;
471}
472
c2373f84 473static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
474 uint64_t r;
475
476 assert(f);
477
dca6219e 478 r = le64toh(f->header->tail_seqnum) + 1;
c2373f84
LP
479
480 if (seqnum) {
de190aef 481 /* If an external seqnum counter was passed, we update
c2373f84
LP
482 * both the local and the external one, and set it to
483 * the maximum of both */
484
485 if (*seqnum + 1 > r)
486 r = *seqnum + 1;
487
488 *seqnum = r;
489 }
490
dca6219e 491 f->header->tail_seqnum = htole64(r);
cec736d2 492
dca6219e
LP
493 if (f->header->head_seqnum == 0)
494 f->header->head_seqnum = htole64(r);
de190aef 495
cec736d2
LP
496 return r;
497}
498
de190aef 499static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
500 int r;
501 uint64_t p;
502 Object *tail, *o;
503 void *t;
504
505 assert(f);
506 assert(size >= sizeof(ObjectHeader));
507 assert(offset);
508 assert(ret);
509
510 p = le64toh(f->header->tail_object_offset);
cec736d2 511 if (p == 0)
23b0b2b2 512 p = le64toh(f->header->header_size);
cec736d2 513 else {
de190aef 514 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
515 if (r < 0)
516 return r;
517
518 p += ALIGN64(le64toh(tail->object.size));
519 }
520
521 r = journal_file_allocate(f, p, size);
522 if (r < 0)
523 return r;
524
de190aef 525 r = journal_file_move_to(f, type, p, size, &t);
cec736d2
LP
526 if (r < 0)
527 return r;
528
529 o = (Object*) t;
530
531 zero(o->object);
de190aef 532 o->object.type = type;
cec736d2
LP
533 o->object.size = htole64(size);
534
535 f->header->tail_object_offset = htole64(p);
cec736d2
LP
536 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
537
538 *ret = o;
539 *offset = p;
540
541 return 0;
542}
543
de190aef 544static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
545 uint64_t s, p;
546 Object *o;
547 int r;
548
549 assert(f);
550
dfabe643 551 /* We estimate that we need 1 hash table entry per 768 of
4a92baf3
LP
552 journal file and we want to make sure we never get beyond
553 75% fill level. Calculate the hash table size for the
554 maximum file size based on these metrics. */
555
dfabe643 556 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
4a92baf3
LP
557 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
558 s = DEFAULT_DATA_HASH_TABLE_SIZE;
559
dfabe643 560 log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
4a92baf3 561
de190aef
LP
562 r = journal_file_append_object(f,
563 OBJECT_DATA_HASH_TABLE,
564 offsetof(Object, hash_table.items) + s,
565 &o, &p);
cec736d2
LP
566 if (r < 0)
567 return r;
568
de190aef 569 memset(o->hash_table.items, 0, s);
cec736d2 570
de190aef
LP
571 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
572 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
573
574 return 0;
575}
576
de190aef 577static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
578 uint64_t s, p;
579 Object *o;
580 int r;
581
582 assert(f);
583
de190aef
LP
584 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
585 r = journal_file_append_object(f,
586 OBJECT_FIELD_HASH_TABLE,
587 offsetof(Object, hash_table.items) + s,
588 &o, &p);
cec736d2
LP
589 if (r < 0)
590 return r;
591
de190aef 592 memset(o->hash_table.items, 0, s);
cec736d2 593
de190aef
LP
594 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
595 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
596
597 return 0;
598}
599
de190aef 600static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
601 uint64_t s, p;
602 void *t;
603 int r;
604
605 assert(f);
606
de190aef
LP
607 p = le64toh(f->header->data_hash_table_offset);
608 s = le64toh(f->header->data_hash_table_size);
cec736d2 609
de190aef
LP
610 r = journal_file_move_to(f,
611 WINDOW_DATA_HASH_TABLE,
612 p, s,
613 &t);
cec736d2
LP
614 if (r < 0)
615 return r;
616
de190aef 617 f->data_hash_table = t;
cec736d2
LP
618 return 0;
619}
620
de190aef 621static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
622 uint64_t s, p;
623 void *t;
624 int r;
625
626 assert(f);
627
de190aef
LP
628 p = le64toh(f->header->field_hash_table_offset);
629 s = le64toh(f->header->field_hash_table_size);
cec736d2 630
de190aef
LP
631 r = journal_file_move_to(f,
632 WINDOW_FIELD_HASH_TABLE,
633 p, s,
634 &t);
cec736d2
LP
635 if (r < 0)
636 return r;
637
de190aef 638 f->field_hash_table = t;
cec736d2
LP
639 return 0;
640}
641
de190aef
LP
642static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
643 uint64_t p, h;
cec736d2
LP
644 int r;
645
646 assert(f);
647 assert(o);
648 assert(offset > 0);
de190aef 649 assert(o->object.type == OBJECT_DATA);
cec736d2 650
48496df6
LP
651 /* This might alter the window we are looking at */
652
de190aef
LP
653 o->data.next_hash_offset = o->data.next_field_offset = 0;
654 o->data.entry_offset = o->data.entry_array_offset = 0;
655 o->data.n_entries = 0;
cec736d2 656
de190aef 657 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
8db4213e 658 p = le64toh(f->data_hash_table[h].tail_hash_offset);
cec736d2
LP
659 if (p == 0) {
660 /* Only entry in the hash table is easy */
de190aef 661 f->data_hash_table[h].head_hash_offset = htole64(offset);
cec736d2 662 } else {
48496df6
LP
663 /* Move back to the previous data object, to patch in
664 * pointer */
cec736d2 665
de190aef 666 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
667 if (r < 0)
668 return r;
669
de190aef 670 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
671 }
672
de190aef 673 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2 674
dca6219e
LP
675 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
676 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
677
cec736d2
LP
678 return 0;
679}
680
de190aef
LP
681int journal_file_find_data_object_with_hash(
682 JournalFile *f,
683 const void *data, uint64_t size, uint64_t hash,
684 Object **ret, uint64_t *offset) {
48496df6 685
de190aef 686 uint64_t p, osize, h;
cec736d2
LP
687 int r;
688
689 assert(f);
690 assert(data || size == 0);
691
692 osize = offsetof(Object, data.payload) + size;
693
bc85bfee
LP
694 if (f->header->data_hash_table_size == 0)
695 return -EBADMSG;
696
de190aef
LP
697 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
698 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 699
de190aef
LP
700 while (p > 0) {
701 Object *o;
cec736d2 702
de190aef 703 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
704 if (r < 0)
705 return r;
706
807e17f0 707 if (le64toh(o->data.hash) != hash)
85a131e8 708 goto next;
807e17f0
LP
709
710 if (o->object.flags & OBJECT_COMPRESSED) {
711#ifdef HAVE_XZ
b785c858 712 uint64_t l, rsize;
cec736d2 713
807e17f0
LP
714 l = le64toh(o->object.size);
715 if (l <= offsetof(Object, data.payload))
cec736d2
LP
716 return -EBADMSG;
717
807e17f0
LP
718 l -= offsetof(Object, data.payload);
719
720 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
721 return -EBADMSG;
722
b785c858 723 if (rsize == size &&
807e17f0
LP
724 memcmp(f->compress_buffer, data, size) == 0) {
725
726 if (ret)
727 *ret = o;
728
729 if (offset)
730 *offset = p;
731
732 return 1;
733 }
734#else
735 return -EPROTONOSUPPORT;
736#endif
737
738 } else if (le64toh(o->object.size) == osize &&
739 memcmp(o->data.payload, data, size) == 0) {
740
cec736d2
LP
741 if (ret)
742 *ret = o;
743
744 if (offset)
745 *offset = p;
746
de190aef 747 return 1;
cec736d2
LP
748 }
749
85a131e8 750 next:
cec736d2
LP
751 p = le64toh(o->data.next_hash_offset);
752 }
753
de190aef
LP
754 return 0;
755}
756
757int journal_file_find_data_object(
758 JournalFile *f,
759 const void *data, uint64_t size,
760 Object **ret, uint64_t *offset) {
761
762 uint64_t hash;
763
764 assert(f);
765 assert(data || size == 0);
766
767 hash = hash64(data, size);
768
769 return journal_file_find_data_object_with_hash(f,
770 data, size, hash,
771 ret, offset);
772}
773
48496df6
LP
774static int journal_file_append_data(
775 JournalFile *f,
776 const void *data, uint64_t size,
777 Object **ret, uint64_t *offset) {
778
de190aef
LP
779 uint64_t hash, p;
780 uint64_t osize;
781 Object *o;
782 int r;
807e17f0 783 bool compressed = false;
de190aef
LP
784
785 assert(f);
786 assert(data || size == 0);
787
788 hash = hash64(data, size);
789
790 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
791 if (r < 0)
792 return r;
793 else if (r > 0) {
794
795 if (ret)
796 *ret = o;
797
798 if (offset)
799 *offset = p;
800
801 return 0;
802 }
803
804 osize = offsetof(Object, data.payload) + size;
805 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
806 if (r < 0)
807 return r;
808
cec736d2 809 o->data.hash = htole64(hash);
807e17f0
LP
810
811#ifdef HAVE_XZ
812 if (f->compress &&
813 size >= COMPRESSION_SIZE_THRESHOLD) {
814 uint64_t rsize;
815
816 compressed = compress_blob(data, size, o->data.payload, &rsize);
817
818 if (compressed) {
819 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
820 o->object.flags |= OBJECT_COMPRESSED;
821
807e17f0
LP
822 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
823 }
824 }
825#endif
826
64825d3c 827 if (!compressed && size > 0)
807e17f0 828 memcpy(o->data.payload, data, size);
cec736d2 829
de190aef 830 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
831 if (r < 0)
832 return r;
833
48496df6
LP
834 /* The linking might have altered the window, so let's
835 * refresh our pointer */
836 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
837 if (r < 0)
838 return r;
839
cec736d2
LP
840 if (ret)
841 *ret = o;
842
843 if (offset)
de190aef 844 *offset = p;
cec736d2
LP
845
846 return 0;
847}
848
849uint64_t journal_file_entry_n_items(Object *o) {
850 assert(o);
7be3aa17 851 assert(o->object.type == OBJECT_ENTRY);
cec736d2
LP
852
853 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
854}
855
de190aef
LP
856static uint64_t journal_file_entry_array_n_items(Object *o) {
857 assert(o);
7be3aa17 858 assert(o->object.type == OBJECT_ENTRY_ARRAY);
de190aef
LP
859
860 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
861}
862
863static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
864 le64_t *first,
865 le64_t *idx,
de190aef 866 uint64_t p) {
cec736d2 867 int r;
de190aef
LP
868 uint64_t n = 0, ap = 0, q, i, a, hidx;
869 Object *o;
870
cec736d2 871 assert(f);
de190aef
LP
872 assert(first);
873 assert(idx);
874 assert(p > 0);
cec736d2 875
de190aef
LP
876 a = le64toh(*first);
877 i = hidx = le64toh(*idx);
878 while (a > 0) {
879
880 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
881 if (r < 0)
882 return r;
cec736d2 883
de190aef
LP
884 n = journal_file_entry_array_n_items(o);
885 if (i < n) {
886 o->entry_array.items[i] = htole64(p);
887 *idx = htole64(hidx + 1);
888 return 0;
889 }
cec736d2 890
de190aef
LP
891 i -= n;
892 ap = a;
893 a = le64toh(o->entry_array.next_entry_array_offset);
894 }
895
896 if (hidx > n)
897 n = (hidx+1) * 2;
898 else
899 n = n * 2;
900
901 if (n < 4)
902 n = 4;
903
904 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
905 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
906 &o, &q);
cec736d2
LP
907 if (r < 0)
908 return r;
909
de190aef 910 o->entry_array.items[i] = htole64(p);
cec736d2 911
de190aef 912 if (ap == 0)
7be3aa17 913 *first = htole64(q);
cec736d2 914 else {
de190aef 915 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
916 if (r < 0)
917 return r;
918
de190aef
LP
919 o->entry_array.next_entry_array_offset = htole64(q);
920 }
cec736d2 921
de190aef
LP
922 *idx = htole64(hidx + 1);
923
924 return 0;
925}
cec736d2 926
de190aef 927static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
928 le64_t *extra,
929 le64_t *first,
930 le64_t *idx,
de190aef
LP
931 uint64_t p) {
932
933 int r;
934
935 assert(f);
936 assert(extra);
937 assert(first);
938 assert(idx);
939 assert(p > 0);
940
941 if (*idx == 0)
942 *extra = htole64(p);
943 else {
4fd052ae 944 le64_t i;
de190aef 945
7be3aa17 946 i = htole64(le64toh(*idx) - 1);
de190aef
LP
947 r = link_entry_into_array(f, first, &i, p);
948 if (r < 0)
949 return r;
cec736d2
LP
950 }
951
de190aef
LP
952 *idx = htole64(le64toh(*idx) + 1);
953 return 0;
954}
955
956static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
957 uint64_t p;
958 int r;
959 assert(f);
960 assert(o);
961 assert(offset > 0);
962
963 p = le64toh(o->entry.items[i].object_offset);
964 if (p == 0)
965 return -EINVAL;
966
967 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
968 if (r < 0)
969 return r;
970
de190aef
LP
971 return link_entry_into_array_plus_one(f,
972 &o->data.entry_offset,
973 &o->data.entry_array_offset,
974 &o->data.n_entries,
975 offset);
cec736d2
LP
976}
977
978static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 979 uint64_t n, i;
cec736d2
LP
980 int r;
981
982 assert(f);
983 assert(o);
984 assert(offset > 0);
de190aef 985 assert(o->object.type == OBJECT_ENTRY);
cec736d2 986
b788cc23
LP
987 __sync_synchronize();
988
cec736d2 989 /* Link up the entry itself */
de190aef
LP
990 r = link_entry_into_array(f,
991 &f->header->entry_array_offset,
992 &f->header->n_entries,
993 offset);
994 if (r < 0)
995 return r;
cec736d2 996
aaf53376 997 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
cec736d2 998
de190aef 999 if (f->header->head_entry_realtime == 0)
0ac38b70 1000 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 1001
0ac38b70 1002 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
1003 f->header->tail_entry_monotonic = o->entry.monotonic;
1004
1005 f->tail_entry_monotonic_valid = true;
cec736d2
LP
1006
1007 /* Link up the items */
1008 n = journal_file_entry_n_items(o);
1009 for (i = 0; i < n; i++) {
1010 r = journal_file_link_entry_item(f, o, offset, i);
1011 if (r < 0)
1012 return r;
1013 }
1014
cec736d2
LP
1015 return 0;
1016}
1017
1018static int journal_file_append_entry_internal(
1019 JournalFile *f,
1020 const dual_timestamp *ts,
1021 uint64_t xor_hash,
1022 const EntryItem items[], unsigned n_items,
de190aef 1023 uint64_t *seqnum,
cec736d2
LP
1024 Object **ret, uint64_t *offset) {
1025 uint64_t np;
1026 uint64_t osize;
1027 Object *o;
1028 int r;
1029
1030 assert(f);
1031 assert(items || n_items == 0);
de190aef 1032 assert(ts);
cec736d2
LP
1033
1034 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1035
de190aef 1036 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
1037 if (r < 0)
1038 return r;
1039
de190aef 1040 o->entry.seqnum = htole64(journal_file_seqnum(f, seqnum));
cec736d2 1041 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
1042 o->entry.realtime = htole64(ts->realtime);
1043 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
1044 o->entry.xor_hash = htole64(xor_hash);
1045 o->entry.boot_id = f->header->boot_id;
1046
1047 r = journal_file_link_entry(f, o, np);
1048 if (r < 0)
1049 return r;
1050
1051 if (ret)
1052 *ret = o;
1053
1054 if (offset)
1055 *offset = np;
1056
1057 return 0;
1058}
1059
cf244689 1060void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
1061 assert(f);
1062
1063 /* inotify() does not receive IN_MODIFY events from file
1064 * accesses done via mmap(). After each access we hence
1065 * trigger IN_MODIFY by truncating the journal file to its
1066 * current size which triggers IN_MODIFY. */
1067
bc85bfee
LP
1068 __sync_synchronize();
1069
50f20cfd
LP
1070 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1071 log_error("Failed to to truncate file to its own size: %m");
1072}
1073
de190aef 1074int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1075 unsigned i;
1076 EntryItem *items;
1077 int r;
1078 uint64_t xor_hash = 0;
de190aef 1079 struct dual_timestamp _ts;
cec736d2
LP
1080
1081 assert(f);
1082 assert(iovec || n_iovec == 0);
1083
de190aef
LP
1084 if (!f->writable)
1085 return -EPERM;
1086
1087 if (!ts) {
1088 dual_timestamp_get(&_ts);
1089 ts = &_ts;
1090 }
1091
1092 if (f->tail_entry_monotonic_valid &&
1093 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1094 return -EINVAL;
1095
7560fffc
LP
1096 r = journal_file_maybe_append_tag(f, ts->realtime);
1097 if (r < 0)
1098 return r;
1099
64825d3c
LP
1100 /* alloca() can't take 0, hence let's allocate at least one */
1101 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
cec736d2
LP
1102
1103 for (i = 0; i < n_iovec; i++) {
1104 uint64_t p;
1105 Object *o;
1106
1107 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1108 if (r < 0)
cf244689 1109 return r;
cec736d2
LP
1110
1111 xor_hash ^= le64toh(o->data.hash);
1112 items[i].object_offset = htole64(p);
de7b95cd 1113 items[i].hash = o->data.hash;
cec736d2
LP
1114 }
1115
de190aef 1116 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1117
50f20cfd
LP
1118 journal_file_post_change(f);
1119
cec736d2
LP
1120 return r;
1121}
1122
de190aef
LP
1123static int generic_array_get(JournalFile *f,
1124 uint64_t first,
1125 uint64_t i,
1126 Object **ret, uint64_t *offset) {
1127
cec736d2 1128 Object *o;
6c8a39b8 1129 uint64_t p = 0, a;
cec736d2
LP
1130 int r;
1131
1132 assert(f);
1133
de190aef
LP
1134 a = first;
1135 while (a > 0) {
1136 uint64_t n;
cec736d2 1137
de190aef
LP
1138 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1139 if (r < 0)
1140 return r;
cec736d2 1141
de190aef
LP
1142 n = journal_file_entry_array_n_items(o);
1143 if (i < n) {
1144 p = le64toh(o->entry_array.items[i]);
1145 break;
cec736d2
LP
1146 }
1147
de190aef
LP
1148 i -= n;
1149 a = le64toh(o->entry_array.next_entry_array_offset);
1150 }
1151
1152 if (a <= 0 || p <= 0)
1153 return 0;
1154
1155 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1156 if (r < 0)
1157 return r;
1158
1159 if (ret)
1160 *ret = o;
1161
1162 if (offset)
1163 *offset = p;
1164
1165 return 1;
1166}
1167
1168static int generic_array_get_plus_one(JournalFile *f,
1169 uint64_t extra,
1170 uint64_t first,
1171 uint64_t i,
1172 Object **ret, uint64_t *offset) {
1173
1174 Object *o;
1175
1176 assert(f);
1177
1178 if (i == 0) {
1179 int r;
1180
1181 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1182 if (r < 0)
1183 return r;
1184
de190aef
LP
1185 if (ret)
1186 *ret = o;
cec736d2 1187
de190aef
LP
1188 if (offset)
1189 *offset = extra;
cec736d2 1190
de190aef 1191 return 1;
cec736d2
LP
1192 }
1193
de190aef
LP
1194 return generic_array_get(f, first, i-1, ret, offset);
1195}
cec736d2 1196
de190aef
LP
1197enum {
1198 TEST_FOUND,
1199 TEST_LEFT,
1200 TEST_RIGHT
1201};
cec736d2 1202
de190aef
LP
1203static int generic_array_bisect(JournalFile *f,
1204 uint64_t first,
1205 uint64_t n,
1206 uint64_t needle,
1207 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1208 direction_t direction,
1209 Object **ret,
1210 uint64_t *offset,
1211 uint64_t *idx) {
1212
1213 uint64_t a, p, t = 0, i = 0, last_p = 0;
1214 bool subtract_one = false;
1215 Object *o, *array = NULL;
1216 int r;
cec736d2 1217
de190aef
LP
1218 assert(f);
1219 assert(test_object);
cec736d2 1220
de190aef
LP
1221 a = first;
1222 while (a > 0) {
1223 uint64_t left, right, k, lp;
1224
1225 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1226 if (r < 0)
1227 return r;
1228
de190aef
LP
1229 k = journal_file_entry_array_n_items(array);
1230 right = MIN(k, n);
1231 if (right <= 0)
1232 return 0;
cec736d2 1233
de190aef
LP
1234 i = right - 1;
1235 lp = p = le64toh(array->entry_array.items[i]);
1236 if (p <= 0)
1237 return -EBADMSG;
cec736d2 1238
de190aef
LP
1239 r = test_object(f, p, needle);
1240 if (r < 0)
1241 return r;
cec736d2 1242
de190aef
LP
1243 if (r == TEST_FOUND)
1244 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1245
1246 if (r == TEST_RIGHT) {
1247 left = 0;
1248 right -= 1;
1249 for (;;) {
1250 if (left == right) {
1251 if (direction == DIRECTION_UP)
1252 subtract_one = true;
1253
1254 i = left;
1255 goto found;
1256 }
1257
1258 assert(left < right);
1259
1260 i = (left + right) / 2;
1261 p = le64toh(array->entry_array.items[i]);
1262 if (p <= 0)
1263 return -EBADMSG;
1264
1265 r = test_object(f, p, needle);
1266 if (r < 0)
1267 return r;
cec736d2 1268
de190aef
LP
1269 if (r == TEST_FOUND)
1270 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1271
1272 if (r == TEST_RIGHT)
1273 right = i;
1274 else
1275 left = i + 1;
1276 }
1277 }
1278
cbdca852
LP
1279 if (k > n) {
1280 if (direction == DIRECTION_UP) {
1281 i = n;
1282 subtract_one = true;
1283 goto found;
1284 }
1285
cec736d2 1286 return 0;
cbdca852 1287 }
cec736d2 1288
de190aef
LP
1289 last_p = lp;
1290
1291 n -= k;
1292 t += k;
1293 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1294 }
1295
1296 return 0;
de190aef
LP
1297
1298found:
1299 if (subtract_one && t == 0 && i == 0)
1300 return 0;
1301
1302 if (subtract_one && i == 0)
1303 p = last_p;
1304 else if (subtract_one)
1305 p = le64toh(array->entry_array.items[i-1]);
1306 else
1307 p = le64toh(array->entry_array.items[i]);
1308
1309 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1310 if (r < 0)
1311 return r;
1312
1313 if (ret)
1314 *ret = o;
1315
1316 if (offset)
1317 *offset = p;
1318
1319 if (idx)
cbdca852 1320 *idx = t + i + (subtract_one ? -1 : 0);
de190aef
LP
1321
1322 return 1;
cec736d2
LP
1323}
1324
de190aef
LP
1325static int generic_array_bisect_plus_one(JournalFile *f,
1326 uint64_t extra,
1327 uint64_t first,
1328 uint64_t n,
1329 uint64_t needle,
1330 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1331 direction_t direction,
1332 Object **ret,
1333 uint64_t *offset,
1334 uint64_t *idx) {
1335
cec736d2 1336 int r;
cbdca852
LP
1337 bool step_back = false;
1338 Object *o;
cec736d2
LP
1339
1340 assert(f);
de190aef 1341 assert(test_object);
cec736d2 1342
de190aef
LP
1343 if (n <= 0)
1344 return 0;
cec736d2 1345
de190aef
LP
1346 /* This bisects the array in object 'first', but first checks
1347 * an extra */
de190aef
LP
1348 r = test_object(f, extra, needle);
1349 if (r < 0)
1350 return r;
a536e261
LP
1351
1352 if (r == TEST_FOUND)
1353 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1354
cbdca852
LP
1355 /* if we are looking with DIRECTION_UP then we need to first
1356 see if in the actual array there is a matching entry, and
1357 return the last one of that. But if there isn't any we need
1358 to return this one. Hence remember this, and return it
1359 below. */
1360 if (r == TEST_LEFT)
1361 step_back = direction == DIRECTION_UP;
de190aef 1362
cbdca852
LP
1363 if (r == TEST_RIGHT) {
1364 if (direction == DIRECTION_DOWN)
1365 goto found;
1366 else
1367 return 0;
a536e261 1368 }
cec736d2 1369
de190aef
LP
1370 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1371
cbdca852
LP
1372 if (r == 0 && step_back)
1373 goto found;
1374
ecf68b1d 1375 if (r > 0 && idx)
de190aef
LP
1376 (*idx) ++;
1377
1378 return r;
cbdca852
LP
1379
1380found:
1381 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1382 if (r < 0)
1383 return r;
1384
1385 if (ret)
1386 *ret = o;
1387
1388 if (offset)
1389 *offset = extra;
1390
1391 if (idx)
1392 *idx = 0;
1393
1394 return 1;
1395}
1396
1397static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1398 assert(f);
1399 assert(p > 0);
1400
1401 if (p == needle)
1402 return TEST_FOUND;
1403 else if (p < needle)
1404 return TEST_LEFT;
1405 else
1406 return TEST_RIGHT;
1407}
1408
1409int journal_file_move_to_entry_by_offset(
1410 JournalFile *f,
1411 uint64_t p,
1412 direction_t direction,
1413 Object **ret,
1414 uint64_t *offset) {
1415
1416 return generic_array_bisect(f,
1417 le64toh(f->header->entry_array_offset),
1418 le64toh(f->header->n_entries),
1419 p,
1420 test_object_offset,
1421 direction,
1422 ret, offset, NULL);
de190aef
LP
1423}
1424
cbdca852 1425
de190aef
LP
1426static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1427 Object *o;
1428 int r;
1429
1430 assert(f);
1431 assert(p > 0);
1432
1433 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1434 if (r < 0)
1435 return r;
1436
de190aef
LP
1437 if (le64toh(o->entry.seqnum) == needle)
1438 return TEST_FOUND;
1439 else if (le64toh(o->entry.seqnum) < needle)
1440 return TEST_LEFT;
1441 else
1442 return TEST_RIGHT;
1443}
cec736d2 1444
de190aef
LP
1445int journal_file_move_to_entry_by_seqnum(
1446 JournalFile *f,
1447 uint64_t seqnum,
1448 direction_t direction,
1449 Object **ret,
1450 uint64_t *offset) {
1451
1452 return generic_array_bisect(f,
1453 le64toh(f->header->entry_array_offset),
1454 le64toh(f->header->n_entries),
1455 seqnum,
1456 test_object_seqnum,
1457 direction,
1458 ret, offset, NULL);
1459}
cec736d2 1460
de190aef
LP
1461static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1462 Object *o;
1463 int r;
1464
1465 assert(f);
1466 assert(p > 0);
1467
1468 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1469 if (r < 0)
1470 return r;
1471
1472 if (le64toh(o->entry.realtime) == needle)
1473 return TEST_FOUND;
1474 else if (le64toh(o->entry.realtime) < needle)
1475 return TEST_LEFT;
1476 else
1477 return TEST_RIGHT;
cec736d2
LP
1478}
1479
de190aef
LP
1480int journal_file_move_to_entry_by_realtime(
1481 JournalFile *f,
1482 uint64_t realtime,
1483 direction_t direction,
1484 Object **ret,
1485 uint64_t *offset) {
1486
1487 return generic_array_bisect(f,
1488 le64toh(f->header->entry_array_offset),
1489 le64toh(f->header->n_entries),
1490 realtime,
1491 test_object_realtime,
1492 direction,
1493 ret, offset, NULL);
1494}
1495
1496static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1497 Object *o;
1498 int r;
1499
1500 assert(f);
1501 assert(p > 0);
1502
1503 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1504 if (r < 0)
1505 return r;
1506
1507 if (le64toh(o->entry.monotonic) == needle)
1508 return TEST_FOUND;
1509 else if (le64toh(o->entry.monotonic) < needle)
1510 return TEST_LEFT;
1511 else
1512 return TEST_RIGHT;
1513}
1514
1515int journal_file_move_to_entry_by_monotonic(
1516 JournalFile *f,
1517 sd_id128_t boot_id,
1518 uint64_t monotonic,
1519 direction_t direction,
1520 Object **ret,
1521 uint64_t *offset) {
1522
10b6f904 1523 char t[9+32+1] = "_BOOT_ID=";
de190aef
LP
1524 Object *o;
1525 int r;
1526
cbdca852 1527 assert(f);
de190aef 1528
cbdca852 1529 sd_id128_to_string(boot_id, t + 9);
de190aef
LP
1530 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1531 if (r < 0)
1532 return r;
cbdca852 1533 if (r == 0)
de190aef
LP
1534 return -ENOENT;
1535
1536 return generic_array_bisect_plus_one(f,
1537 le64toh(o->data.entry_offset),
1538 le64toh(o->data.entry_array_offset),
1539 le64toh(o->data.n_entries),
1540 monotonic,
1541 test_object_monotonic,
1542 direction,
1543 ret, offset, NULL);
1544}
1545
de190aef
LP
1546int journal_file_next_entry(
1547 JournalFile *f,
1548 Object *o, uint64_t p,
1549 direction_t direction,
1550 Object **ret, uint64_t *offset) {
1551
1552 uint64_t i, n;
cec736d2
LP
1553 int r;
1554
1555 assert(f);
de190aef
LP
1556 assert(p > 0 || !o);
1557
1558 n = le64toh(f->header->n_entries);
1559 if (n <= 0)
1560 return 0;
cec736d2
LP
1561
1562 if (!o)
de190aef 1563 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1564 else {
de190aef 1565 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1566 return -EINVAL;
1567
de190aef
LP
1568 r = generic_array_bisect(f,
1569 le64toh(f->header->entry_array_offset),
1570 le64toh(f->header->n_entries),
1571 p,
1572 test_object_offset,
1573 DIRECTION_DOWN,
1574 NULL, NULL,
1575 &i);
1576 if (r <= 0)
1577 return r;
1578
1579 if (direction == DIRECTION_DOWN) {
1580 if (i >= n - 1)
1581 return 0;
1582
1583 i++;
1584 } else {
1585 if (i <= 0)
1586 return 0;
1587
1588 i--;
1589 }
cec736d2
LP
1590 }
1591
de190aef
LP
1592 /* And jump to it */
1593 return generic_array_get(f,
1594 le64toh(f->header->entry_array_offset),
1595 i,
1596 ret, offset);
1597}
cec736d2 1598
de190aef
LP
1599int journal_file_skip_entry(
1600 JournalFile *f,
1601 Object *o, uint64_t p,
1602 int64_t skip,
1603 Object **ret, uint64_t *offset) {
1604
1605 uint64_t i, n;
1606 int r;
1607
1608 assert(f);
1609 assert(o);
1610 assert(p > 0);
1611
1612 if (o->object.type != OBJECT_ENTRY)
1613 return -EINVAL;
1614
1615 r = generic_array_bisect(f,
1616 le64toh(f->header->entry_array_offset),
1617 le64toh(f->header->n_entries),
1618 p,
1619 test_object_offset,
1620 DIRECTION_DOWN,
1621 NULL, NULL,
1622 &i);
1623 if (r <= 0)
cec736d2
LP
1624 return r;
1625
de190aef
LP
1626 /* Calculate new index */
1627 if (skip < 0) {
1628 if ((uint64_t) -skip >= i)
1629 i = 0;
1630 else
1631 i = i - (uint64_t) -skip;
1632 } else
1633 i += (uint64_t) skip;
cec736d2 1634
de190aef
LP
1635 n = le64toh(f->header->n_entries);
1636 if (n <= 0)
1637 return -EBADMSG;
cec736d2 1638
de190aef
LP
1639 if (i >= n)
1640 i = n-1;
1641
1642 return generic_array_get(f,
1643 le64toh(f->header->entry_array_offset),
1644 i,
1645 ret, offset);
cec736d2
LP
1646}
1647
de190aef
LP
1648int journal_file_next_entry_for_data(
1649 JournalFile *f,
1650 Object *o, uint64_t p,
1651 uint64_t data_offset,
1652 direction_t direction,
1653 Object **ret, uint64_t *offset) {
1654
1655 uint64_t n, i;
cec736d2 1656 int r;
de190aef 1657 Object *d;
cec736d2
LP
1658
1659 assert(f);
de190aef 1660 assert(p > 0 || !o);
cec736d2 1661
de190aef 1662 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 1663 if (r < 0)
de190aef 1664 return r;
cec736d2 1665
de190aef
LP
1666 n = le64toh(d->data.n_entries);
1667 if (n <= 0)
1668 return n;
cec736d2 1669
de190aef
LP
1670 if (!o)
1671 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1672 else {
1673 if (o->object.type != OBJECT_ENTRY)
1674 return -EINVAL;
cec736d2 1675
de190aef
LP
1676 r = generic_array_bisect_plus_one(f,
1677 le64toh(d->data.entry_offset),
1678 le64toh(d->data.entry_array_offset),
1679 le64toh(d->data.n_entries),
1680 p,
1681 test_object_offset,
1682 DIRECTION_DOWN,
1683 NULL, NULL,
1684 &i);
1685
1686 if (r <= 0)
cec736d2
LP
1687 return r;
1688
de190aef
LP
1689 if (direction == DIRECTION_DOWN) {
1690 if (i >= n - 1)
1691 return 0;
cec736d2 1692
de190aef
LP
1693 i++;
1694 } else {
1695 if (i <= 0)
1696 return 0;
cec736d2 1697
de190aef
LP
1698 i--;
1699 }
cec736d2 1700
de190aef 1701 }
cec736d2 1702
de190aef
LP
1703 return generic_array_get_plus_one(f,
1704 le64toh(d->data.entry_offset),
1705 le64toh(d->data.entry_array_offset),
1706 i,
1707 ret, offset);
1708}
cec736d2 1709
cbdca852
LP
1710int journal_file_move_to_entry_by_offset_for_data(
1711 JournalFile *f,
1712 uint64_t data_offset,
1713 uint64_t p,
1714 direction_t direction,
1715 Object **ret, uint64_t *offset) {
1716
1717 int r;
1718 Object *d;
1719
1720 assert(f);
1721
1722 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1723 if (r < 0)
1724 return r;
1725
1726 return generic_array_bisect_plus_one(f,
1727 le64toh(d->data.entry_offset),
1728 le64toh(d->data.entry_array_offset),
1729 le64toh(d->data.n_entries),
1730 p,
1731 test_object_offset,
1732 direction,
1733 ret, offset, NULL);
1734}
1735
1736int journal_file_move_to_entry_by_monotonic_for_data(
1737 JournalFile *f,
1738 uint64_t data_offset,
1739 sd_id128_t boot_id,
1740 uint64_t monotonic,
1741 direction_t direction,
1742 Object **ret, uint64_t *offset) {
1743
1744 char t[9+32+1] = "_BOOT_ID=";
1745 Object *o, *d;
1746 int r;
1747 uint64_t b, z;
1748
1749 assert(f);
1750
1751 /* First, seek by time */
1752 sd_id128_to_string(boot_id, t + 9);
1753 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1754 if (r < 0)
1755 return r;
1756 if (r == 0)
1757 return -ENOENT;
1758
1759 r = generic_array_bisect_plus_one(f,
1760 le64toh(o->data.entry_offset),
1761 le64toh(o->data.entry_array_offset),
1762 le64toh(o->data.n_entries),
1763 monotonic,
1764 test_object_monotonic,
1765 direction,
1766 NULL, &z, NULL);
1767 if (r <= 0)
1768 return r;
1769
1770 /* And now, continue seeking until we find an entry that
1771 * exists in both bisection arrays */
1772
1773 for (;;) {
1774 Object *qo;
1775 uint64_t p, q;
1776
1777 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1778 if (r < 0)
1779 return r;
1780
1781 r = generic_array_bisect_plus_one(f,
1782 le64toh(d->data.entry_offset),
1783 le64toh(d->data.entry_array_offset),
1784 le64toh(d->data.n_entries),
1785 z,
1786 test_object_offset,
1787 direction,
1788 NULL, &p, NULL);
1789 if (r <= 0)
1790 return r;
1791
1792 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1793 if (r < 0)
1794 return r;
1795
1796 r = generic_array_bisect_plus_one(f,
1797 le64toh(o->data.entry_offset),
1798 le64toh(o->data.entry_array_offset),
1799 le64toh(o->data.n_entries),
1800 p,
1801 test_object_offset,
1802 direction,
1803 &qo, &q, NULL);
1804
1805 if (r <= 0)
1806 return r;
1807
1808 if (p == q) {
1809 if (ret)
1810 *ret = qo;
1811 if (offset)
1812 *offset = q;
1813
1814 return 1;
1815 }
1816
1817 z = q;
1818 }
1819
1820 return 0;
1821}
1822
de190aef
LP
1823int journal_file_move_to_entry_by_seqnum_for_data(
1824 JournalFile *f,
1825 uint64_t data_offset,
1826 uint64_t seqnum,
1827 direction_t direction,
1828 Object **ret, uint64_t *offset) {
cec736d2 1829
de190aef
LP
1830 Object *d;
1831 int r;
cec736d2 1832
91a31dde
LP
1833 assert(f);
1834
de190aef 1835 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1836 if (r < 0)
de190aef 1837 return r;
cec736d2 1838
de190aef
LP
1839 return generic_array_bisect_plus_one(f,
1840 le64toh(d->data.entry_offset),
1841 le64toh(d->data.entry_array_offset),
1842 le64toh(d->data.n_entries),
1843 seqnum,
1844 test_object_seqnum,
1845 direction,
1846 ret, offset, NULL);
1847}
cec736d2 1848
de190aef
LP
1849int journal_file_move_to_entry_by_realtime_for_data(
1850 JournalFile *f,
1851 uint64_t data_offset,
1852 uint64_t realtime,
1853 direction_t direction,
1854 Object **ret, uint64_t *offset) {
1855
1856 Object *d;
1857 int r;
1858
91a31dde
LP
1859 assert(f);
1860
de190aef 1861 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1862 if (r < 0)
de190aef
LP
1863 return r;
1864
1865 return generic_array_bisect_plus_one(f,
1866 le64toh(d->data.entry_offset),
1867 le64toh(d->data.entry_array_offset),
1868 le64toh(d->data.n_entries),
1869 realtime,
1870 test_object_realtime,
1871 direction,
1872 ret, offset, NULL);
cec736d2
LP
1873}
1874
7560fffc
LP
1875static void *fsprg_state(JournalFile *f) {
1876 uint64_t a, b;
1877 assert(f);
1878
1879 if (!f->authenticate)
1880 return NULL;
1881
1882 a = le64toh(f->fsprg_header->header_size);
1883 b = le64toh(f->fsprg_header->state_size);
1884
1885 if (a + b > f->fsprg_size)
1886 return NULL;
1887
1888 return (uint8_t*) f->fsprg_header + a;
1889}
1890
1891static int journal_file_append_tag(JournalFile *f) {
1892 Object *o;
1893 uint64_t p;
1894 int r;
1895
1896 assert(f);
1897
1898 if (!f->authenticate)
1899 return 0;
1900
1901 if (!f->hmac_running)
1902 return 0;
1903
1904 log_debug("Writing tag for epoch %llu\n", (unsigned long long) FSPRG_GetEpoch(fsprg_state(f)));
1905
1906 assert(f->hmac);
1907
1908 r = journal_file_append_object(f, OBJECT_TAG, sizeof(struct TagObject), &o, &p);
1909 if (r < 0)
1910 return r;
1911
1912 /* Get the HMAC tag and store it in the object */
1913 memcpy(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH);
1914 f->hmac_running = false;
1915
1916 return 0;
1917}
1918
1919static int journal_file_hmac_start(JournalFile *f) {
1920 uint8_t key[256 / 8]; /* Let's pass 256 bit from FSPRG to HMAC */
1921
1922 assert(f);
1923
1924 if (!f->authenticate)
1925 return 0;
1926
1927 if (f->hmac_running)
1928 return 0;
1929
1930 /* Prepare HMAC for next cycle */
1931 gcry_md_reset(f->hmac);
1932 FSPRG_GetKey(fsprg_state(f), key, sizeof(key), 0);
1933 gcry_md_setkey(f->hmac, key, sizeof(key));
1934
1935 f->hmac_running = true;
1936
1937 return 0;
1938}
1939
1940static int journal_file_get_epoch(JournalFile *f, uint64_t realtime, uint64_t *epoch) {
1941 uint64_t t;
1942
1943 assert(f);
1944 assert(epoch);
1945 assert(f->authenticate);
1946
1947 if (le64toh(f->fsprg_header->fsprg_start_usec) == 0 ||
1948 le64toh(f->fsprg_header->fsprg_interval_usec) == 0)
1949 return -ENOTSUP;
1950
1951 if (realtime < le64toh(f->fsprg_header->fsprg_start_usec))
1952 return -ESTALE;
1953
1954 t = realtime - le64toh(f->fsprg_header->fsprg_start_usec);
1955 t = t / le64toh(f->fsprg_header->fsprg_interval_usec);
1956
1957 *epoch = t;
1958 return 0;
1959}
1960
1961static int journal_file_need_evolve(JournalFile *f, uint64_t realtime) {
1962 uint64_t goal, epoch;
1963 int r;
1964 assert(f);
1965
1966 if (!f->authenticate)
1967 return 0;
1968
1969 r = journal_file_get_epoch(f, realtime, &goal);
1970 if (r < 0)
1971 return r;
1972
1973 epoch = FSPRG_GetEpoch(fsprg_state(f));
1974 if (epoch > goal)
1975 return -ESTALE;
1976
1977 return epoch != goal;
1978}
1979
1980static int journal_file_evolve(JournalFile *f, uint64_t realtime) {
1981 uint64_t goal, epoch;
1982 int r;
1983
1984 assert(f);
1985
1986 if (!f->authenticate)
1987 return 0;
1988
1989 r = journal_file_get_epoch(f, realtime, &goal);
1990 if (r < 0)
1991 return r;
1992
1993 epoch = FSPRG_GetEpoch(fsprg_state(f));
1994 if (epoch < goal)
1995 log_debug("Evolving FSPRG key from epoch %llu to %llu.", (unsigned long long) epoch, (unsigned long long) goal);
1996
1997 for (;;) {
1998 if (epoch > goal)
1999 return -ESTALE;
2000 if (epoch == goal)
2001 return 0;
2002
2003 FSPRG_Evolve(fsprg_state(f));
2004 epoch = FSPRG_GetEpoch(fsprg_state(f));
2005 }
2006}
2007
2008static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime) {
2009 int r;
2010
2011 assert(f);
2012
2013 if (!f->authenticate)
2014 return 0;
2015
2016 r = journal_file_need_evolve(f, realtime);
2017 if (r <= 0)
2018 return 0;
2019
2020 r = journal_file_append_tag(f);
2021 if (r < 0)
2022 return r;
2023
2024 r = journal_file_evolve(f, realtime);
2025 if (r < 0)
2026 return r;
2027
2028 r = journal_file_hmac_start(f);
2029 if (r < 0)
2030 return r;
2031
2032 return 0;
2033}
2034
2035static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p) {
2036 int r;
2037 Object *o;
2038
2039 assert(f);
2040
2041 if (!f->authenticate)
2042 return 0;
2043
2044 r = journal_file_hmac_start(f);
2045 if (r < 0)
2046 return r;
2047
2048 r = journal_file_move_to_object(f, type, p, &o);
2049 if (r < 0)
2050 return r;
2051
2052 gcry_md_write(f->hmac, o, offsetof(ObjectHeader, payload));
2053
2054 switch (o->object.type) {
2055
2056 case OBJECT_DATA:
2057 /* All but: entry_array_offset, n_entries are mutable */
2058 gcry_md_write(f->hmac, &o->data.hash, offsetof(DataObject, entry_array_offset) - offsetof(DataObject, hash));
2059 gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(DataObject, payload));
2060 break;
2061
2062 case OBJECT_ENTRY:
2063 /* All */
2064 gcry_md_write(f->hmac, &o->entry.seqnum, le64toh(o->object.size) - offsetof(EntryObject, seqnum));
2065 break;
2066
2067 case OBJECT_FIELD_HASH_TABLE:
2068 case OBJECT_DATA_HASH_TABLE:
2069 case OBJECT_ENTRY_ARRAY:
2070 /* Nothing: everything is mutable */
2071 break;
2072
2073 case OBJECT_TAG:
2074 /* All */
2075 gcry_md_write(f->hmac, o->tag.tag, le64toh(o->object.size) - offsetof(TagObject, tag));
2076 break;
2077
2078 default:
2079 return -EINVAL;
2080 }
2081
2082 return 0;
2083}
2084
2085static int journal_file_hmac_put_header(JournalFile *f) {
2086 int r;
2087
2088 assert(f);
2089
2090 if (!f->authenticate)
2091 return 0;
2092
2093 r = journal_file_hmac_start(f);
2094 if (r < 0)
2095 return r;
2096
2097 /* All but state+reserved, boot_id, arena_size,
2098 * tail_object_offset, n_objects, n_entries, tail_seqnum,
2099 * head_entry_realtime, tail_entry_realtime,
2100 * tail_entry_monotonic, n_data, n_fields, header_tag */
2101
2102 gcry_md_write(f->hmac, f->header->signature, offsetof(Header, state) - offsetof(Header, signature));
2103 gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, boot_id) - offsetof(Header, file_id));
2104 gcry_md_write(f->hmac, &f->header->seqnum_id, offsetof(Header, arena_size) - offsetof(Header, seqnum_id));
2105 gcry_md_write(f->hmac, &f->header->data_hash_table_offset, offsetof(Header, tail_object_offset) - offsetof(Header, data_hash_table_offset));
2106 gcry_md_write(f->hmac, &f->header->head_seqnum, offsetof(Header, head_entry_realtime) - offsetof(Header, head_seqnum));
2107
2108 return 0;
2109}
2110
2111static int journal_file_load_fsprg(JournalFile *f) {
2112 int r, fd = -1;
2113 char *p = NULL;
2114 struct stat st;
2115 FSPRGHeader *m = NULL;
2116 sd_id128_t machine;
2117
2118 assert(f);
2119
2120 if (!f->authenticate)
2121 return 0;
2122
2123 r = sd_id128_get_machine(&machine);
2124 if (r < 0)
2125 return r;
2126
2127 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/fsprg",
2128 SD_ID128_FORMAT_VAL(machine)) < 0)
2129 return -ENOMEM;
2130
2131 fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY, 0600);
2132 if (fd < 0) {
2133 log_error("Failed to open %s: %m", p);
2134 r = -errno;
2135 goto finish;
2136 }
2137
2138 if (fstat(fd, &st) < 0) {
2139 r = -errno;
2140 goto finish;
2141 }
2142
2143 if (st.st_size < (off_t) sizeof(FSPRGHeader)) {
2144 r = -ENODATA;
2145 goto finish;
2146 }
2147
2148 m = mmap(NULL, PAGE_ALIGN(sizeof(FSPRGHeader)), PROT_READ, MAP_SHARED, fd, 0);
2149 if (m == MAP_FAILED) {
2150 m = NULL;
2151 r = -errno;
2152 goto finish;
2153 }
2154
2155 if (memcmp(m->signature, FSPRG_HEADER_SIGNATURE, 8) != 0) {
2156 r = -EBADMSG;
2157 goto finish;
2158 }
2159
2160 if (m->incompatible_flags != 0) {
2161 r = -EPROTONOSUPPORT;
2162 goto finish;
2163 }
2164
2165 if (le64toh(m->header_size) < sizeof(FSPRGHeader)) {
2166 r = -EBADMSG;
2167 goto finish;
2168 }
2169
2170 if (le64toh(m->state_size) != FSPRG_stateinbytes(m->secpar)) {
2171 r = -EBADMSG;
2172 goto finish;
2173 }
2174
2175 f->fsprg_size = le64toh(m->header_size) + le64toh(m->state_size);
2176 if ((uint64_t) st.st_size < f->fsprg_size) {
2177 r = -ENODATA;
2178 goto finish;
2179 }
2180
2181 if (!sd_id128_equal(machine, m->machine_id)) {
2182 r = -EHOSTDOWN;
2183 goto finish;
2184 }
2185
2186 if (le64toh(m->fsprg_start_usec) <= 0 ||
2187 le64toh(m->fsprg_interval_usec) <= 0) {
2188 r = -EBADMSG;
2189 goto finish;
2190 }
2191
2192 f->fsprg_header = mmap(NULL, PAGE_ALIGN(f->fsprg_size), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2193 if (f->fsprg_header == MAP_FAILED) {
2194 f->fsprg_header = NULL;
2195 r = -errno;
2196 goto finish;
2197 }
2198
2199 r = 0;
2200
2201finish:
2202 if (m)
2203 munmap(m, PAGE_ALIGN(sizeof(FSPRGHeader)));
2204
2205 if (fd >= 0)
2206 close_nointr_nofail(fd);
2207
2208 free(p);
2209 return r;
2210}
2211
2212static int journal_file_setup_hmac(JournalFile *f) {
2213 gcry_error_t e;
2214
2215 if (!f->authenticate)
2216 return 0;
2217
2218 e = gcry_md_open(&f->hmac, GCRY_MD_SHA256, GCRY_MD_FLAG_HMAC);
2219 if (e != 0)
2220 return -ENOTSUP;
2221
2222 return 0;
2223}
2224
2225static int journal_file_append_first_tag(JournalFile *f) {
2226 int r;
2227 uint64_t p;
2228
2229 if (!f->authenticate)
2230 return 0;
2231
2232 log_debug("Calculating first tag...");
2233
2234 r = journal_file_hmac_put_header(f);
2235 if (r < 0)
2236 return r;
2237
2238 p = le64toh(f->header->field_hash_table_offset);
2239 if (p < offsetof(Object, hash_table.items))
2240 return -EINVAL;
2241 p -= offsetof(Object, hash_table.items);
2242
2243 r = journal_file_hmac_put_object(f, OBJECT_FIELD_HASH_TABLE, p);
2244 if (r < 0)
2245 return r;
2246
2247 p = le64toh(f->header->data_hash_table_offset);
2248 if (p < offsetof(Object, hash_table.items))
2249 return -EINVAL;
2250 p -= offsetof(Object, hash_table.items);
2251
2252 r = journal_file_hmac_put_object(f, OBJECT_DATA_HASH_TABLE, p);
2253 if (r < 0)
2254 return r;
2255
2256 r = journal_file_append_tag(f);
2257 if (r < 0)
2258 return r;
2259
2260 return 0;
2261}
2262
cec736d2 2263void journal_file_dump(JournalFile *f) {
cec736d2
LP
2264 Object *o;
2265 int r;
2266 uint64_t p;
2267
2268 assert(f);
2269
dca6219e 2270 journal_file_print_header(f);
cec736d2 2271
23b0b2b2 2272 p = le64toh(f->header->header_size);
cec736d2 2273 while (p != 0) {
de190aef 2274 r = journal_file_move_to_object(f, -1, p, &o);
cec736d2
LP
2275 if (r < 0)
2276 goto fail;
2277
2278 switch (o->object.type) {
2279
2280 case OBJECT_UNUSED:
2281 printf("Type: OBJECT_UNUSED\n");
2282 break;
2283
2284 case OBJECT_DATA:
2285 printf("Type: OBJECT_DATA\n");
2286 break;
2287
2288 case OBJECT_ENTRY:
3fbf9cbb
LP
2289 printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
2290 (unsigned long long) le64toh(o->entry.seqnum),
2291 (unsigned long long) le64toh(o->entry.monotonic),
2292 (unsigned long long) le64toh(o->entry.realtime));
cec736d2
LP
2293 break;
2294
de190aef
LP
2295 case OBJECT_FIELD_HASH_TABLE:
2296 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
cec736d2
LP
2297 break;
2298
de190aef
LP
2299 case OBJECT_DATA_HASH_TABLE:
2300 printf("Type: OBJECT_DATA_HASH_TABLE\n");
2301 break;
2302
2303 case OBJECT_ENTRY_ARRAY:
2304 printf("Type: OBJECT_ENTRY_ARRAY\n");
cec736d2 2305 break;
8144056f 2306
7560fffc
LP
2307 case OBJECT_TAG:
2308 printf("Type: OBJECT_TAG\n");
8144056f 2309 break;
cec736d2
LP
2310 }
2311
807e17f0
LP
2312 if (o->object.flags & OBJECT_COMPRESSED)
2313 printf("Flags: COMPRESSED\n");
2314
cec736d2
LP
2315 if (p == le64toh(f->header->tail_object_offset))
2316 p = 0;
2317 else
2318 p = p + ALIGN64(le64toh(o->object.size));
2319 }
2320
2321 return;
2322fail:
2323 log_error("File corrupt");
2324}
2325
dca6219e
LP
2326void journal_file_print_header(JournalFile *f) {
2327 char a[33], b[33], c[33];
2328 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
2329
2330 assert(f);
2331
2332 printf("File Path: %s\n"
2333 "File ID: %s\n"
2334 "Machine ID: %s\n"
2335 "Boot ID: %s\n"
2336 "Sequential Number ID: %s\n"
dc36ac67
LP
2337 "State: %s\n"
2338 "Compatible Flags:%s%s\n"
2339 "Incompatible Flags:%s%s\n"
dca6219e
LP
2340 "Header size: %llu\n"
2341 "Arena size: %llu\n"
2342 "Data Hash Table Size: %llu\n"
2343 "Field Hash Table Size: %llu\n"
2344 "Objects: %llu\n"
2345 "Entry Objects: %llu\n"
2346 "Rotate Suggested: %s\n"
2347 "Head Sequential Number: %llu\n"
2348 "Tail Sequential Number: %llu\n"
2349 "Head Realtime Timestamp: %s\n"
2350 "Tail Realtime Timestamp: %s\n",
2351 f->path,
2352 sd_id128_to_string(f->header->file_id, a),
2353 sd_id128_to_string(f->header->machine_id, b),
2354 sd_id128_to_string(f->header->boot_id, c),
2355 sd_id128_to_string(f->header->seqnum_id, c),
dc36ac67
LP
2356 f->header->state == STATE_OFFLINE ? "offline" :
2357 f->header->state == STATE_ONLINE ? "online" :
2358 f->header->state == STATE_ARCHIVED ? "archived" : "unknown",
7560fffc
LP
2359 (f->header->compatible_flags & HEADER_COMPATIBLE_AUTHENTICATED) ? " AUTHENTICATED" : "",
2360 (f->header->compatible_flags & ~HEADER_COMPATIBLE_AUTHENTICATED) ? " ???" : "",
dc36ac67
LP
2361 (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
2362 (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
dca6219e
LP
2363 (unsigned long long) le64toh(f->header->header_size),
2364 (unsigned long long) le64toh(f->header->arena_size),
2365 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2366 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
2367 (unsigned long long) le64toh(f->header->n_objects),
2368 (unsigned long long) le64toh(f->header->n_entries),
2369 yes_no(journal_file_rotate_suggested(f)),
2370 (unsigned long long) le64toh(f->header->head_seqnum),
2371 (unsigned long long) le64toh(f->header->tail_seqnum),
2372 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
2373 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)));
2374
2375 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2376 printf("Data Objects: %llu\n"
2377 "Data Hash Table Fill: %.1f%%\n",
2378 (unsigned long long) le64toh(f->header->n_data),
2379 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
2380
2381 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2382 printf("Field Objects: %llu\n"
2383 "Field Hash Table Fill: %.1f%%\n",
2384 (unsigned long long) le64toh(f->header->n_fields),
2385 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
2386}
2387
cec736d2
LP
2388int journal_file_open(
2389 const char *fname,
2390 int flags,
2391 mode_t mode,
7560fffc
LP
2392 bool compress,
2393 bool authenticate,
4a92baf3 2394 JournalMetrics *metrics,
0ac38b70 2395 JournalFile *template,
cec736d2
LP
2396 JournalFile **ret) {
2397
2398 JournalFile *f;
2399 int r;
2400 bool newly_created = false;
2401
2402 assert(fname);
2403
2404 if ((flags & O_ACCMODE) != O_RDONLY &&
2405 (flags & O_ACCMODE) != O_RDWR)
2406 return -EINVAL;
2407
9447a7f1
LP
2408 if (!endswith(fname, ".journal"))
2409 return -EINVAL;
2410
cec736d2
LP
2411 f = new0(JournalFile, 1);
2412 if (!f)
2413 return -ENOMEM;
2414
0ac38b70 2415 f->fd = -1;
0ac38b70 2416 f->mode = mode;
cec736d2 2417
7560fffc
LP
2418 f->flags = flags;
2419 f->prot = prot_from_flags(flags);
2420 f->writable = (flags & O_ACCMODE) != O_RDONLY;
2421 f->compress = compress;
2422 f->authenticate = authenticate;
15944db8 2423
cec736d2
LP
2424 f->path = strdup(fname);
2425 if (!f->path) {
2426 r = -ENOMEM;
2427 goto fail;
2428 }
2429
0ac38b70
LP
2430 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2431 if (f->fd < 0) {
2432 r = -errno;
2433 goto fail;
2434 }
2435
cec736d2
LP
2436 if (fstat(f->fd, &f->last_stat) < 0) {
2437 r = -errno;
2438 goto fail;
2439 }
2440
2441 if (f->last_stat.st_size == 0 && f->writable) {
2442 newly_created = true;
2443
7560fffc
LP
2444 /* Try to load the FSPRG state, and if we can't, then
2445 * just don't do authentication */
2446 r = journal_file_load_fsprg(f);
2447 if (r < 0)
2448 f->authenticate = false;
2449
0ac38b70 2450 r = journal_file_init_header(f, template);
cec736d2
LP
2451 if (r < 0)
2452 goto fail;
2453
2454 if (fstat(f->fd, &f->last_stat) < 0) {
2455 r = -errno;
2456 goto fail;
2457 }
2458 }
2459
dca6219e 2460 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
cec736d2
LP
2461 r = -EIO;
2462 goto fail;
2463 }
2464
2465 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2466 if (f->header == MAP_FAILED) {
2467 f->header = NULL;
2468 r = -errno;
2469 goto fail;
2470 }
2471
2472 if (!newly_created) {
2473 r = journal_file_verify_header(f);
2474 if (r < 0)
2475 goto fail;
7560fffc
LP
2476
2477 r = journal_file_load_fsprg(f);
2478 if (r < 0)
2479 goto fail;
cec736d2
LP
2480 }
2481
2482 if (f->writable) {
4a92baf3
LP
2483 if (metrics) {
2484 journal_default_metrics(metrics, f->fd);
2485 f->metrics = *metrics;
2486 } else if (template)
2487 f->metrics = template->metrics;
2488
cec736d2
LP
2489 r = journal_file_refresh_header(f);
2490 if (r < 0)
2491 goto fail;
7560fffc
LP
2492
2493 r = journal_file_setup_hmac(f);
2494 if (r < 0)
2495 goto fail;
cec736d2
LP
2496 }
2497
2498 if (newly_created) {
de190aef 2499 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
2500 if (r < 0)
2501 goto fail;
2502
de190aef 2503 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
2504 if (r < 0)
2505 goto fail;
7560fffc
LP
2506
2507 r = journal_file_append_first_tag(f);
2508 if (r < 0)
2509 goto fail;
cec736d2
LP
2510 }
2511
de190aef 2512 r = journal_file_map_field_hash_table(f);
cec736d2
LP
2513 if (r < 0)
2514 goto fail;
2515
de190aef 2516 r = journal_file_map_data_hash_table(f);
cec736d2
LP
2517 if (r < 0)
2518 goto fail;
2519
2520 if (ret)
2521 *ret = f;
2522
2523 return 0;
2524
2525fail:
2526 journal_file_close(f);
2527
2528 return r;
2529}
0ac38b70 2530
7560fffc 2531int journal_file_rotate(JournalFile **f, bool compress, bool authenticate) {
0ac38b70
LP
2532 char *p;
2533 size_t l;
2534 JournalFile *old_file, *new_file = NULL;
2535 int r;
2536
2537 assert(f);
2538 assert(*f);
2539
2540 old_file = *f;
2541
2542 if (!old_file->writable)
2543 return -EINVAL;
2544
2545 if (!endswith(old_file->path, ".journal"))
2546 return -EINVAL;
2547
2548 l = strlen(old_file->path);
2549
9447a7f1 2550 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
0ac38b70
LP
2551 if (!p)
2552 return -ENOMEM;
2553
2554 memcpy(p, old_file->path, l - 8);
2555 p[l-8] = '@';
2556 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2557 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2558 "-%016llx-%016llx.journal",
dca6219e 2559 (unsigned long long) le64toh((*f)->header->tail_seqnum),
0ac38b70
LP
2560 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2561
2562 r = rename(old_file->path, p);
2563 free(p);
2564
2565 if (r < 0)
2566 return -errno;
2567
ccdbaf91 2568 old_file->header->state = STATE_ARCHIVED;
0ac38b70 2569
7560fffc 2570 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, authenticate, NULL, old_file, &new_file);
0ac38b70
LP
2571 journal_file_close(old_file);
2572
2573 *f = new_file;
2574 return r;
2575}
2576
9447a7f1
LP
2577int journal_file_open_reliably(
2578 const char *fname,
2579 int flags,
2580 mode_t mode,
7560fffc
LP
2581 bool compress,
2582 bool authenticate,
4a92baf3 2583 JournalMetrics *metrics,
9447a7f1
LP
2584 JournalFile *template,
2585 JournalFile **ret) {
2586
2587 int r;
2588 size_t l;
2589 char *p;
2590
7560fffc 2591 r = journal_file_open(fname, flags, mode, compress, authenticate, metrics, template, ret);
0071d9f1
LP
2592 if (r != -EBADMSG && /* corrupted */
2593 r != -ENODATA && /* truncated */
2594 r != -EHOSTDOWN && /* other machine */
a1a1898f
LP
2595 r != -EPROTONOSUPPORT && /* incompatible feature */
2596 r != -EBUSY && /* unclean shutdown */
2597 r != -ESHUTDOWN /* already archived */)
9447a7f1
LP
2598 return r;
2599
2600 if ((flags & O_ACCMODE) == O_RDONLY)
2601 return r;
2602
2603 if (!(flags & O_CREAT))
2604 return r;
2605
7560fffc
LP
2606 if (!endswith(fname, ".journal"))
2607 return r;
2608
5c70eab4
LP
2609 /* The file is corrupted. Rotate it away and try it again (but only once) */
2610
9447a7f1
LP
2611 l = strlen(fname);
2612 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2613 (int) (l-8), fname,
2614 (unsigned long long) now(CLOCK_REALTIME),
2615 random_ull()) < 0)
2616 return -ENOMEM;
2617
2618 r = rename(fname, p);
2619 free(p);
2620 if (r < 0)
2621 return -errno;
2622
a1a1898f 2623 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
9447a7f1 2624
7560fffc 2625 return journal_file_open(fname, flags, mode, compress, authenticate, metrics, template, ret);
9447a7f1
LP
2626}
2627
0ac38b70
LP
2628struct vacuum_info {
2629 off_t usage;
2630 char *filename;
2631
2632 uint64_t realtime;
2633 sd_id128_t seqnum_id;
2634 uint64_t seqnum;
5c70eab4
LP
2635
2636 bool have_seqnum;
0ac38b70
LP
2637};
2638
2639static int vacuum_compare(const void *_a, const void *_b) {
2640 const struct vacuum_info *a, *b;
2641
2642 a = _a;
2643 b = _b;
2644
5c70eab4
LP
2645 if (a->have_seqnum && b->have_seqnum &&
2646 sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
0ac38b70
LP
2647 if (a->seqnum < b->seqnum)
2648 return -1;
2649 else if (a->seqnum > b->seqnum)
2650 return 1;
2651 else
2652 return 0;
2653 }
2654
2655 if (a->realtime < b->realtime)
2656 return -1;
2657 else if (a->realtime > b->realtime)
2658 return 1;
5c70eab4 2659 else if (a->have_seqnum && b->have_seqnum)
0ac38b70 2660 return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
5c70eab4
LP
2661 else
2662 return strcmp(a->filename, b->filename);
0ac38b70
LP
2663}
2664
2665int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
2666 DIR *d;
2667 int r = 0;
2668 struct vacuum_info *list = NULL;
2669 unsigned n_list = 0, n_allocated = 0, i;
2670 uint64_t sum = 0;
2671
2672 assert(directory);
2673
2674 if (max_use <= 0)
babfc091 2675 return 0;
0ac38b70
LP
2676
2677 d = opendir(directory);
2678 if (!d)
2679 return -errno;
2680
2681 for (;;) {
2682 int k;
2683 struct dirent buf, *de;
2684 size_t q;
2685 struct stat st;
2686 char *p;
7ea07dcd 2687 unsigned long long seqnum = 0, realtime;
0ac38b70 2688 sd_id128_t seqnum_id;
5c70eab4 2689 bool have_seqnum;
0ac38b70
LP
2690
2691 k = readdir_r(d, &buf, &de);
2692 if (k != 0) {
2693 r = -k;
2694 goto finish;
2695 }
2696
2697 if (!de)
2698 break;
2699
5c70eab4
LP
2700 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
2701 continue;
2702
2703 if (!S_ISREG(st.st_mode))
0ac38b70
LP
2704 continue;
2705
2706 q = strlen(de->d_name);
2707
5c70eab4 2708 if (endswith(de->d_name, ".journal")) {
0ac38b70 2709
5c70eab4 2710 /* Vacuum archived files */
0ac38b70 2711
5c70eab4
LP
2712 if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
2713 continue;
0ac38b70 2714
5c70eab4
LP
2715 if (de->d_name[q-8-16-1] != '-' ||
2716 de->d_name[q-8-16-1-16-1] != '-' ||
2717 de->d_name[q-8-16-1-16-1-32-1] != '@')
2718 continue;
0ac38b70 2719
5c70eab4
LP
2720 p = strdup(de->d_name);
2721 if (!p) {
2722 r = -ENOMEM;
2723 goto finish;
2724 }
0ac38b70 2725
5c70eab4
LP
2726 de->d_name[q-8-16-1-16-1] = 0;
2727 if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
2728 free(p);
2729 continue;
2730 }
2731
2732 if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
2733 free(p);
2734 continue;
2735 }
2736
2737 have_seqnum = true;
2738
2739 } else if (endswith(de->d_name, ".journal~")) {
2740 unsigned long long tmp;
2741
2742 /* Vacuum corrupted files */
2743
2744 if (q < 1 + 16 + 1 + 16 + 8 + 1)
2745 continue;
0ac38b70 2746
5c70eab4
LP
2747 if (de->d_name[q-1-8-16-1] != '-' ||
2748 de->d_name[q-1-8-16-1-16-1] != '@')
2749 continue;
2750
2751 p = strdup(de->d_name);
2752 if (!p) {
2753 r = -ENOMEM;
2754 goto finish;
2755 }
2756
2757 if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
2758 free(p);
2759 continue;
2760 }
2761
2762 have_seqnum = false;
2763 } else
0ac38b70 2764 continue;
0ac38b70
LP
2765
2766 if (n_list >= n_allocated) {
2767 struct vacuum_info *j;
2768
2769 n_allocated = MAX(n_allocated * 2U, 8U);
2770 j = realloc(list, n_allocated * sizeof(struct vacuum_info));
2771 if (!j) {
2772 free(p);
2773 r = -ENOMEM;
2774 goto finish;
2775 }
2776
2777 list = j;
2778 }
2779
2780 list[n_list].filename = p;
a3a52c0f 2781 list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
0ac38b70
LP
2782 list[n_list].seqnum = seqnum;
2783 list[n_list].realtime = realtime;
2784 list[n_list].seqnum_id = seqnum_id;
5c70eab4 2785 list[n_list].have_seqnum = have_seqnum;
0ac38b70
LP
2786
2787 sum += list[n_list].usage;
2788
2789 n_list ++;
2790 }
2791
64825d3c
LP
2792 if (n_list > 0)
2793 qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
0ac38b70
LP
2794
2795 for(i = 0; i < n_list; i++) {
2796 struct statvfs ss;
2797
2798 if (fstatvfs(dirfd(d), &ss) < 0) {
2799 r = -errno;
2800 goto finish;
2801 }
2802
2803 if (sum <= max_use &&
2804 (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
2805 break;
2806
2807 if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
e7bf07b3 2808 log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
0ac38b70
LP
2809 sum -= list[i].usage;
2810 } else if (errno != ENOENT)
2811 log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
2812 }
2813
2814finish:
2815 for (i = 0; i < n_list; i++)
2816 free(list[i].filename);
2817
2818 free(list);
2819
de190aef
LP
2820 if (d)
2821 closedir(d);
2822
0ac38b70
LP
2823 return r;
2824}
cf244689
LP
2825
2826int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2827 uint64_t i, n;
2828 uint64_t q, xor_hash = 0;
2829 int r;
2830 EntryItem *items;
2831 dual_timestamp ts;
2832
2833 assert(from);
2834 assert(to);
2835 assert(o);
2836 assert(p);
2837
2838 if (!to->writable)
2839 return -EPERM;
2840
2841 ts.monotonic = le64toh(o->entry.monotonic);
2842 ts.realtime = le64toh(o->entry.realtime);
2843
2844 if (to->tail_entry_monotonic_valid &&
2845 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2846 return -EINVAL;
2847
cf244689
LP
2848 n = journal_file_entry_n_items(o);
2849 items = alloca(sizeof(EntryItem) * n);
2850
2851 for (i = 0; i < n; i++) {
4fd052ae
FC
2852 uint64_t l, h;
2853 le64_t le_hash;
cf244689
LP
2854 size_t t;
2855 void *data;
2856 Object *u;
2857
2858 q = le64toh(o->entry.items[i].object_offset);
2859 le_hash = o->entry.items[i].hash;
2860
2861 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2862 if (r < 0)
2863 return r;
2864
2865 if (le_hash != o->data.hash)
2866 return -EBADMSG;
2867
2868 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2869 t = (size_t) l;
2870
2871 /* We hit the limit on 32bit machines */
2872 if ((uint64_t) t != l)
2873 return -E2BIG;
2874
2875 if (o->object.flags & OBJECT_COMPRESSED) {
2876#ifdef HAVE_XZ
2877 uint64_t rsize;
2878
2879 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2880 return -EBADMSG;
2881
2882 data = from->compress_buffer;
2883 l = rsize;
2884#else
2885 return -EPROTONOSUPPORT;
2886#endif
2887 } else
2888 data = o->data.payload;
2889
2890 r = journal_file_append_data(to, data, l, &u, &h);
2891 if (r < 0)
2892 return r;
2893
2894 xor_hash ^= le64toh(u->data.hash);
2895 items[i].object_offset = htole64(h);
2896 items[i].hash = u->data.hash;
2897
2898 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2899 if (r < 0)
2900 return r;
2901 }
2902
2903 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2904}
babfc091
LP
2905
2906void journal_default_metrics(JournalMetrics *m, int fd) {
2907 uint64_t fs_size = 0;
2908 struct statvfs ss;
a7bc2c2a 2909 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2910
2911 assert(m);
2912 assert(fd >= 0);
2913
2914 if (fstatvfs(fd, &ss) >= 0)
2915 fs_size = ss.f_frsize * ss.f_blocks;
2916
2917 if (m->max_use == (uint64_t) -1) {
2918
2919 if (fs_size > 0) {
2920 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2921
2922 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2923 m->max_use = DEFAULT_MAX_USE_UPPER;
2924
2925 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2926 m->max_use = DEFAULT_MAX_USE_LOWER;
2927 } else
2928 m->max_use = DEFAULT_MAX_USE_LOWER;
2929 } else {
2930 m->max_use = PAGE_ALIGN(m->max_use);
2931
2932 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2933 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2934 }
2935
2936 if (m->max_size == (uint64_t) -1) {
2937 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2938
2939 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2940 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2941 } else
2942 m->max_size = PAGE_ALIGN(m->max_size);
2943
2944 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2945 m->max_size = JOURNAL_FILE_SIZE_MIN;
2946
2947 if (m->max_size*2 > m->max_use)
2948 m->max_use = m->max_size*2;
2949
2950 if (m->min_size == (uint64_t) -1)
2951 m->min_size = JOURNAL_FILE_SIZE_MIN;
2952 else {
2953 m->min_size = PAGE_ALIGN(m->min_size);
2954
2955 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2956 m->min_size = JOURNAL_FILE_SIZE_MIN;
2957
2958 if (m->min_size > m->max_size)
2959 m->max_size = m->min_size;
2960 }
2961
2962 if (m->keep_free == (uint64_t) -1) {
2963
2964 if (fs_size > 0) {
2965 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2966
2967 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2968 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2969
2970 } else
2971 m->keep_free = DEFAULT_KEEP_FREE;
2972 }
2973
e7bf07b3
LP
2974 log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2975 format_bytes(a, sizeof(a), m->max_use),
2976 format_bytes(b, sizeof(b), m->max_size),
2977 format_bytes(c, sizeof(c), m->min_size),
2978 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2979}
08984293
LP
2980
2981int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
08984293
LP
2982 assert(f);
2983 assert(from || to);
2984
2985 if (from) {
162566a4
LP
2986 if (f->header->head_entry_realtime == 0)
2987 return -ENOENT;
08984293 2988
162566a4 2989 *from = le64toh(f->header->head_entry_realtime);
08984293
LP
2990 }
2991
2992 if (to) {
162566a4
LP
2993 if (f->header->tail_entry_realtime == 0)
2994 return -ENOENT;
08984293 2995
162566a4 2996 *to = le64toh(f->header->tail_entry_realtime);
08984293
LP
2997 }
2998
2999 return 1;
3000}
3001
3002int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
3003 char t[9+32+1] = "_BOOT_ID=";
3004 Object *o;
3005 uint64_t p;
3006 int r;
3007
3008 assert(f);
3009 assert(from || to);
3010
3011 sd_id128_to_string(boot_id, t + 9);
3012
3013 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
3014 if (r <= 0)
3015 return r;
3016
3017 if (le64toh(o->data.n_entries) <= 0)
3018 return 0;
3019
3020 if (from) {
3021 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
3022 if (r < 0)
3023 return r;
3024
3025 *from = le64toh(o->entry.monotonic);
3026 }
3027
3028 if (to) {
3029 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
3030 if (r < 0)
3031 return r;
3032
3033 r = generic_array_get_plus_one(f,
3034 le64toh(o->data.entry_offset),
3035 le64toh(o->data.entry_array_offset),
3036 le64toh(o->data.n_entries)-1,
3037 &o, NULL);
3038 if (r <= 0)
3039 return r;
3040
3041 *to = le64toh(o->entry.monotonic);
3042 }
3043
3044 return 1;
3045}
dca6219e
LP
3046
3047bool journal_file_rotate_suggested(JournalFile *f) {
3048 assert(f);
3049
3050 /* If we gained new header fields we gained new features,
3051 * hence suggest a rotation */
361f9cbc
LP
3052 if (le64toh(f->header->header_size) < sizeof(Header)) {
3053 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
dca6219e 3054 return true;
361f9cbc 3055 }
dca6219e
LP
3056
3057 /* Let's check if the hash tables grew over a certain fill
3058 * level (75%, borrowing this value from Java's hash table
3059 * implementation), and if so suggest a rotation. To calculate
3060 * the fill level we need the n_data field, which only exists
3061 * in newer versions. */
3062
3063 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
361f9cbc
LP
3064 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
3065 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
3066 f->path,
3067 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
3068 (unsigned long long) le64toh(f->header->n_data),
3069 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
3070 (unsigned long long) (f->last_stat.st_size),
3071 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
dca6219e 3072 return true;
361f9cbc 3073 }
dca6219e
LP
3074
3075 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
361f9cbc
LP
3076 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
3077 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
3078 f->path,
3079 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
3080 (unsigned long long) le64toh(f->header->n_fields),
3081 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
dca6219e 3082 return true;
361f9cbc 3083 }
dca6219e
LP
3084
3085 return false;
3086}