]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
journal: add all objects we add to HMAC
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
30#include "journal-def.h"
31#include "journal-file.h"
32#include "lookup3.h"
807e17f0 33#include "compress.h"
7560fffc 34#include "fsprg.h"
cec736d2 35
4a92baf3
LP
36#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
37#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
cec736d2 38
1fa80181 39#define DEFAULT_WINDOW_SIZE (8ULL*1024ULL*1024ULL)
cec736d2 40
be19b7df 41#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 42
babfc091 43/* This is the minimum journal file size */
b47ffcfd 44#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
babfc091
LP
45
46/* These are the lower and upper bounds if we deduce the max_use value
47 * from the file system size */
48#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
49#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
50
51/* This is the upper bound if we deduce max_size from max_use */
71100051 52#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
53
54/* This is the upper bound if we deduce the keep_free value from the
55 * file system size */
56#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
57
58/* This is the keep_free value when we can't determine the system
59 * size */
60#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
61
dca6219e
LP
62/* n_data was the first entry we added after the initial file format design */
63#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
cec736d2
LP
64
65#define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
66
dca6219e
LP
67#define JOURNAL_HEADER_CONTAINS(h, field) \
68 (le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field))
69
7560fffc 70static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime);
b0af6f41 71static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p);
dca6219e 72
cec736d2 73void journal_file_close(JournalFile *f) {
de190aef 74 int t;
cec736d2 75
de190aef 76 assert(f);
cec736d2 77
b0af6f41
LP
78 /* Write the final tag */
79 if (f->authenticate)
80 journal_file_append_tag(f);
81
7560fffc
LP
82 /* Sync everything to disk, before we mark the file offline */
83 for (t = 0; t < _WINDOW_MAX; t++)
84 if (f->windows[t].ptr)
85 munmap(f->windows[t].ptr, f->windows[t].size);
86
87 if (f->writable && f->fd >= 0)
88 fdatasync(f->fd);
89
d384c7a8 90 if (f->header) {
cd96b3b8
LP
91 /* Mark the file offline. Don't override the archived state if it already is set */
92 if (f->writable && f->header->state == STATE_ONLINE)
d384c7a8 93 f->header->state = STATE_OFFLINE;
cec736d2 94
d384c7a8
MS
95 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
96 }
cec736d2 97
0ac38b70
LP
98 if (f->fd >= 0)
99 close_nointr_nofail(f->fd);
100
cec736d2 101 free(f->path);
807e17f0
LP
102
103#ifdef HAVE_XZ
104 free(f->compress_buffer);
105#endif
106
7560fffc
LP
107#ifdef HAVE_GCRYPT
108 if (f->fsprg_header)
109 munmap(f->fsprg_header, PAGE_ALIGN(f->fsprg_size));
110
111 if (f->hmac)
112 gcry_md_close(f->hmac);
113#endif
114
cec736d2
LP
115 free(f);
116}
117
0ac38b70 118static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
119 Header h;
120 ssize_t k;
121 int r;
122
123 assert(f);
124
125 zero(h);
7560fffc 126 memcpy(h.signature, HEADER_SIGNATURE, 8);
23b0b2b2 127 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2 128
7560fffc
LP
129 h.incompatible_flags =
130 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
131
132 h.compatible_flags =
133 htole32(f->authenticate ? HEADER_COMPATIBLE_AUTHENTICATED : 0);
134
cec736d2
LP
135 r = sd_id128_randomize(&h.file_id);
136 if (r < 0)
137 return r;
138
0ac38b70
LP
139 if (template) {
140 h.seqnum_id = template->header->seqnum_id;
dca6219e 141 h.tail_seqnum = template->header->tail_seqnum;
0ac38b70
LP
142 } else
143 h.seqnum_id = h.file_id;
cec736d2
LP
144
145 k = pwrite(f->fd, &h, sizeof(h), 0);
146 if (k < 0)
147 return -errno;
148
149 if (k != sizeof(h))
150 return -EIO;
151
152 return 0;
153}
154
155static int journal_file_refresh_header(JournalFile *f) {
156 int r;
de190aef 157 sd_id128_t boot_id;
cec736d2
LP
158
159 assert(f);
160
161 r = sd_id128_get_machine(&f->header->machine_id);
162 if (r < 0)
163 return r;
164
de190aef 165 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
166 if (r < 0)
167 return r;
168
de190aef
LP
169 if (sd_id128_equal(boot_id, f->header->boot_id))
170 f->tail_entry_monotonic_valid = true;
171
172 f->header->boot_id = boot_id;
173
174 f->header->state = STATE_ONLINE;
b788cc23 175
7560fffc
LP
176 /* Sync the online state to disk */
177 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
178 fdatasync(f->fd);
b788cc23 179
cec736d2
LP
180 return 0;
181}
182
183static int journal_file_verify_header(JournalFile *f) {
184 assert(f);
185
7560fffc 186 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
cec736d2
LP
187 return -EBADMSG;
188
7560fffc
LP
189 /* In both read and write mode we refuse to open files with
190 * incompatible flags we don't know */
807e17f0 191#ifdef HAVE_XZ
7560fffc 192 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
807e17f0
LP
193 return -EPROTONOSUPPORT;
194#else
cec736d2
LP
195 if (f->header->incompatible_flags != 0)
196 return -EPROTONOSUPPORT;
807e17f0 197#endif
cec736d2 198
7560fffc
LP
199 /* When open for writing we refuse to open files with
200 * compatible flags, too */
201 if (f->writable) {
202#ifdef HAVE_GCRYPT
203 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_AUTHENTICATED) != 0)
204 return -EPROTONOSUPPORT;
205#else
206 if (f->header->compatible_flags != 0)
207 return -EPROTONOSUPPORT;
208#endif
209 }
210
dca6219e
LP
211 /* The first addition was n_data, so check that we are at least this large */
212 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
23b0b2b2
LP
213 return -EBADMSG;
214
215 if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
cec736d2
LP
216 return -ENODATA;
217
218 if (f->writable) {
ccdbaf91 219 uint8_t state;
cec736d2
LP
220 sd_id128_t machine_id;
221 int r;
222
223 r = sd_id128_get_machine(&machine_id);
224 if (r < 0)
225 return r;
226
227 if (!sd_id128_equal(machine_id, f->header->machine_id))
228 return -EHOSTDOWN;
229
de190aef 230 state = f->header->state;
cec736d2 231
71fa6f00
LP
232 if (state == STATE_ONLINE) {
233 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
234 return -EBUSY;
235 } else if (state == STATE_ARCHIVED)
cec736d2 236 return -ESHUTDOWN;
71fa6f00
LP
237 else if (state != STATE_OFFLINE) {
238 log_debug("Journal file %s has unknown state %u.", f->path, state);
239 return -EBUSY;
240 }
cec736d2
LP
241 }
242
7560fffc
LP
243 f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
244 f->authenticate = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED);
245
cec736d2
LP
246 return 0;
247}
248
249static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
cec736d2 250 uint64_t old_size, new_size;
fec2aa2f 251 int r;
cec736d2
LP
252
253 assert(f);
254
cec736d2 255 /* We assume that this file is not sparse, and we know that
38ac38b2 256 * for sure, since we always call posix_fallocate()
cec736d2
LP
257 * ourselves */
258
259 old_size =
23b0b2b2 260 le64toh(f->header->header_size) +
cec736d2
LP
261 le64toh(f->header->arena_size);
262
bc85bfee 263 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
264 if (new_size < le64toh(f->header->header_size))
265 new_size = le64toh(f->header->header_size);
bc85bfee
LP
266
267 if (new_size <= old_size)
cec736d2
LP
268 return 0;
269
bc85bfee
LP
270 if (f->metrics.max_size > 0 &&
271 new_size > f->metrics.max_size)
272 return -E2BIG;
cec736d2 273
bc85bfee
LP
274 if (new_size > f->metrics.min_size &&
275 f->metrics.keep_free > 0) {
cec736d2
LP
276 struct statvfs svfs;
277
278 if (fstatvfs(f->fd, &svfs) >= 0) {
279 uint64_t available;
280
281 available = svfs.f_bfree * svfs.f_bsize;
282
bc85bfee
LP
283 if (available >= f->metrics.keep_free)
284 available -= f->metrics.keep_free;
cec736d2
LP
285 else
286 available = 0;
287
288 if (new_size - old_size > available)
289 return -E2BIG;
290 }
291 }
292
bc85bfee
LP
293 /* Note that the glibc fallocate() fallback is very
294 inefficient, hence we try to minimize the allocation area
295 as we can. */
fec2aa2f
GV
296 r = posix_fallocate(f->fd, old_size, new_size - old_size);
297 if (r != 0)
298 return -r;
cec736d2
LP
299
300 if (fstat(f->fd, &f->last_stat) < 0)
301 return -errno;
302
23b0b2b2 303 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
304
305 return 0;
306}
307
308static int journal_file_map(
309 JournalFile *f,
310 uint64_t offset,
311 uint64_t size,
312 void **_window,
313 uint64_t *_woffset,
314 uint64_t *_wsize,
315 void **ret) {
316
317 uint64_t woffset, wsize;
318 void *window;
319
320 assert(f);
321 assert(size > 0);
322 assert(ret);
323
324 woffset = offset & ~((uint64_t) page_size() - 1ULL);
325 wsize = size + (offset - woffset);
326 wsize = PAGE_ALIGN(wsize);
327
2a59ea54
LP
328 /* Avoid SIGBUS on invalid accesses */
329 if (woffset + wsize > (uint64_t) PAGE_ALIGN(f->last_stat.st_size))
330 return -EADDRNOTAVAIL;
331
cec736d2
LP
332 window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset);
333 if (window == MAP_FAILED)
334 return -errno;
335
336 if (_window)
337 *_window = window;
338
339 if (_woffset)
340 *_woffset = woffset;
341
342 if (_wsize)
343 *_wsize = wsize;
344
345 *ret = (uint8_t*) window + (offset - woffset);
346
347 return 0;
348}
349
de190aef 350static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_t size, void **ret) {
6c8a39b8 351 void *p = NULL;
cec736d2
LP
352 uint64_t delta;
353 int r;
de190aef 354 Window *w;
cec736d2
LP
355
356 assert(f);
357 assert(ret);
de190aef
LP
358 assert(wt >= 0);
359 assert(wt < _WINDOW_MAX);
cec736d2 360
4bbdcdb3
LP
361 if (offset + size > (uint64_t) f->last_stat.st_size) {
362 /* Hmm, out of range? Let's refresh the fstat() data
363 * first, before we trust that check. */
364
365 if (fstat(f->fd, &f->last_stat) < 0 ||
366 offset + size > (uint64_t) f->last_stat.st_size)
367 return -EADDRNOTAVAIL;
368 }
369
de190aef 370 w = f->windows + wt;
cec736d2 371
de190aef
LP
372 if (_likely_(w->ptr &&
373 w->offset <= offset &&
374 w->offset + w->size >= offset + size)) {
375
376 *ret = (uint8_t*) w->ptr + (offset - w->offset);
cec736d2
LP
377 return 0;
378 }
379
de190aef
LP
380 if (w->ptr) {
381 if (munmap(w->ptr, w->size) < 0)
cec736d2
LP
382 return -errno;
383
de190aef
LP
384 w->ptr = NULL;
385 w->size = w->offset = 0;
cec736d2
LP
386 }
387
388 if (size < DEFAULT_WINDOW_SIZE) {
389 /* If the default window size is larger then what was
390 * asked for extend the mapping a bit in the hope to
391 * minimize needed remappings later on. We add half
392 * the window space before and half behind the
393 * requested mapping */
394
1921a5cb 395 delta = (DEFAULT_WINDOW_SIZE - size) / 2;
cec736d2 396
a99c349d 397 if (delta > offset)
cec736d2
LP
398 delta = offset;
399
400 offset -= delta;
a99c349d 401 size = DEFAULT_WINDOW_SIZE;
cec736d2
LP
402 } else
403 delta = 0;
404
2a59ea54 405 if (offset + size > (uint64_t) f->last_stat.st_size)
1921a5cb 406 size = (uint64_t) f->last_stat.st_size - offset;
2a59ea54
LP
407
408 if (size <= 0)
409 return -EADDRNOTAVAIL;
410
cec736d2
LP
411 r = journal_file_map(f,
412 offset, size,
de190aef
LP
413 &w->ptr, &w->offset, &w->size,
414 &p);
cec736d2
LP
415
416 if (r < 0)
417 return r;
418
419 *ret = (uint8_t*) p + delta;
420 return 0;
421}
422
423static bool verify_hash(Object *o) {
de190aef 424 uint64_t h1, h2;
cec736d2
LP
425
426 assert(o);
427
807e17f0 428 if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
cec736d2 429 h1 = le64toh(o->data.hash);
de190aef
LP
430 h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
431 } else if (o->object.type == OBJECT_FIELD) {
432 h1 = le64toh(o->field.hash);
433 h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
434 } else
435 return true;
cec736d2 436
de190aef 437 return h1 == h2;
cec736d2
LP
438}
439
de190aef 440int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
441 int r;
442 void *t;
443 Object *o;
444 uint64_t s;
445
446 assert(f);
447 assert(ret);
de190aef 448 assert(type < _OBJECT_TYPE_MAX);
cec736d2 449
de190aef 450 r = journal_file_move_to(f, type >= 0 ? type : WINDOW_UNKNOWN, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
451 if (r < 0)
452 return r;
453
454 o = (Object*) t;
455 s = le64toh(o->object.size);
456
457 if (s < sizeof(ObjectHeader))
458 return -EBADMSG;
459
de190aef 460 if (type >= 0 && o->object.type != type)
cec736d2
LP
461 return -EBADMSG;
462
463 if (s > sizeof(ObjectHeader)) {
de190aef 464 r = journal_file_move_to(f, o->object.type, offset, s, &t);
cec736d2
LP
465 if (r < 0)
466 return r;
467
468 o = (Object*) t;
469 }
470
471 if (!verify_hash(o))
472 return -EBADMSG;
473
474 *ret = o;
475 return 0;
476}
477
c2373f84 478static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
479 uint64_t r;
480
481 assert(f);
482
dca6219e 483 r = le64toh(f->header->tail_seqnum) + 1;
c2373f84
LP
484
485 if (seqnum) {
de190aef 486 /* If an external seqnum counter was passed, we update
c2373f84
LP
487 * both the local and the external one, and set it to
488 * the maximum of both */
489
490 if (*seqnum + 1 > r)
491 r = *seqnum + 1;
492
493 *seqnum = r;
494 }
495
dca6219e 496 f->header->tail_seqnum = htole64(r);
cec736d2 497
dca6219e
LP
498 if (f->header->head_seqnum == 0)
499 f->header->head_seqnum = htole64(r);
de190aef 500
cec736d2
LP
501 return r;
502}
503
de190aef 504static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
505 int r;
506 uint64_t p;
507 Object *tail, *o;
508 void *t;
509
510 assert(f);
511 assert(size >= sizeof(ObjectHeader));
512 assert(offset);
513 assert(ret);
514
515 p = le64toh(f->header->tail_object_offset);
cec736d2 516 if (p == 0)
23b0b2b2 517 p = le64toh(f->header->header_size);
cec736d2 518 else {
de190aef 519 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
520 if (r < 0)
521 return r;
522
523 p += ALIGN64(le64toh(tail->object.size));
524 }
525
526 r = journal_file_allocate(f, p, size);
527 if (r < 0)
528 return r;
529
de190aef 530 r = journal_file_move_to(f, type, p, size, &t);
cec736d2
LP
531 if (r < 0)
532 return r;
533
534 o = (Object*) t;
535
536 zero(o->object);
de190aef 537 o->object.type = type;
cec736d2
LP
538 o->object.size = htole64(size);
539
540 f->header->tail_object_offset = htole64(p);
cec736d2
LP
541 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
542
543 *ret = o;
544 *offset = p;
545
546 return 0;
547}
548
de190aef 549static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
550 uint64_t s, p;
551 Object *o;
552 int r;
553
554 assert(f);
555
dfabe643 556 /* We estimate that we need 1 hash table entry per 768 of
4a92baf3
LP
557 journal file and we want to make sure we never get beyond
558 75% fill level. Calculate the hash table size for the
559 maximum file size based on these metrics. */
560
dfabe643 561 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
4a92baf3
LP
562 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
563 s = DEFAULT_DATA_HASH_TABLE_SIZE;
564
dfabe643 565 log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
4a92baf3 566
de190aef
LP
567 r = journal_file_append_object(f,
568 OBJECT_DATA_HASH_TABLE,
569 offsetof(Object, hash_table.items) + s,
570 &o, &p);
cec736d2
LP
571 if (r < 0)
572 return r;
573
de190aef 574 memset(o->hash_table.items, 0, s);
cec736d2 575
de190aef
LP
576 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
577 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
578
579 return 0;
580}
581
de190aef 582static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
583 uint64_t s, p;
584 Object *o;
585 int r;
586
587 assert(f);
588
de190aef
LP
589 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
590 r = journal_file_append_object(f,
591 OBJECT_FIELD_HASH_TABLE,
592 offsetof(Object, hash_table.items) + s,
593 &o, &p);
cec736d2
LP
594 if (r < 0)
595 return r;
596
de190aef 597 memset(o->hash_table.items, 0, s);
cec736d2 598
de190aef
LP
599 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
600 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
601
602 return 0;
603}
604
de190aef 605static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
606 uint64_t s, p;
607 void *t;
608 int r;
609
610 assert(f);
611
de190aef
LP
612 p = le64toh(f->header->data_hash_table_offset);
613 s = le64toh(f->header->data_hash_table_size);
cec736d2 614
de190aef
LP
615 r = journal_file_move_to(f,
616 WINDOW_DATA_HASH_TABLE,
617 p, s,
618 &t);
cec736d2
LP
619 if (r < 0)
620 return r;
621
de190aef 622 f->data_hash_table = t;
cec736d2
LP
623 return 0;
624}
625
de190aef 626static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
627 uint64_t s, p;
628 void *t;
629 int r;
630
631 assert(f);
632
de190aef
LP
633 p = le64toh(f->header->field_hash_table_offset);
634 s = le64toh(f->header->field_hash_table_size);
cec736d2 635
de190aef
LP
636 r = journal_file_move_to(f,
637 WINDOW_FIELD_HASH_TABLE,
638 p, s,
639 &t);
cec736d2
LP
640 if (r < 0)
641 return r;
642
de190aef 643 f->field_hash_table = t;
cec736d2
LP
644 return 0;
645}
646
de190aef
LP
647static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
648 uint64_t p, h;
cec736d2
LP
649 int r;
650
651 assert(f);
652 assert(o);
653 assert(offset > 0);
de190aef 654 assert(o->object.type == OBJECT_DATA);
cec736d2 655
48496df6
LP
656 /* This might alter the window we are looking at */
657
de190aef
LP
658 o->data.next_hash_offset = o->data.next_field_offset = 0;
659 o->data.entry_offset = o->data.entry_array_offset = 0;
660 o->data.n_entries = 0;
cec736d2 661
de190aef 662 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
8db4213e 663 p = le64toh(f->data_hash_table[h].tail_hash_offset);
cec736d2
LP
664 if (p == 0) {
665 /* Only entry in the hash table is easy */
de190aef 666 f->data_hash_table[h].head_hash_offset = htole64(offset);
cec736d2 667 } else {
48496df6
LP
668 /* Move back to the previous data object, to patch in
669 * pointer */
cec736d2 670
de190aef 671 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
672 if (r < 0)
673 return r;
674
de190aef 675 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
676 }
677
de190aef 678 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2 679
dca6219e
LP
680 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
681 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
682
cec736d2
LP
683 return 0;
684}
685
de190aef
LP
686int journal_file_find_data_object_with_hash(
687 JournalFile *f,
688 const void *data, uint64_t size, uint64_t hash,
689 Object **ret, uint64_t *offset) {
48496df6 690
de190aef 691 uint64_t p, osize, h;
cec736d2
LP
692 int r;
693
694 assert(f);
695 assert(data || size == 0);
696
697 osize = offsetof(Object, data.payload) + size;
698
bc85bfee
LP
699 if (f->header->data_hash_table_size == 0)
700 return -EBADMSG;
701
de190aef
LP
702 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
703 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 704
de190aef
LP
705 while (p > 0) {
706 Object *o;
cec736d2 707
de190aef 708 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
709 if (r < 0)
710 return r;
711
807e17f0 712 if (le64toh(o->data.hash) != hash)
85a131e8 713 goto next;
807e17f0
LP
714
715 if (o->object.flags & OBJECT_COMPRESSED) {
716#ifdef HAVE_XZ
b785c858 717 uint64_t l, rsize;
cec736d2 718
807e17f0
LP
719 l = le64toh(o->object.size);
720 if (l <= offsetof(Object, data.payload))
cec736d2
LP
721 return -EBADMSG;
722
807e17f0
LP
723 l -= offsetof(Object, data.payload);
724
725 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
726 return -EBADMSG;
727
b785c858 728 if (rsize == size &&
807e17f0
LP
729 memcmp(f->compress_buffer, data, size) == 0) {
730
731 if (ret)
732 *ret = o;
733
734 if (offset)
735 *offset = p;
736
737 return 1;
738 }
739#else
740 return -EPROTONOSUPPORT;
741#endif
742
743 } else if (le64toh(o->object.size) == osize &&
744 memcmp(o->data.payload, data, size) == 0) {
745
cec736d2
LP
746 if (ret)
747 *ret = o;
748
749 if (offset)
750 *offset = p;
751
de190aef 752 return 1;
cec736d2
LP
753 }
754
85a131e8 755 next:
cec736d2
LP
756 p = le64toh(o->data.next_hash_offset);
757 }
758
de190aef
LP
759 return 0;
760}
761
762int journal_file_find_data_object(
763 JournalFile *f,
764 const void *data, uint64_t size,
765 Object **ret, uint64_t *offset) {
766
767 uint64_t hash;
768
769 assert(f);
770 assert(data || size == 0);
771
772 hash = hash64(data, size);
773
774 return journal_file_find_data_object_with_hash(f,
775 data, size, hash,
776 ret, offset);
777}
778
48496df6
LP
779static int journal_file_append_data(
780 JournalFile *f,
781 const void *data, uint64_t size,
782 Object **ret, uint64_t *offset) {
783
de190aef
LP
784 uint64_t hash, p;
785 uint64_t osize;
786 Object *o;
787 int r;
807e17f0 788 bool compressed = false;
de190aef
LP
789
790 assert(f);
791 assert(data || size == 0);
792
793 hash = hash64(data, size);
794
795 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
796 if (r < 0)
797 return r;
798 else if (r > 0) {
799
800 if (ret)
801 *ret = o;
802
803 if (offset)
804 *offset = p;
805
806 return 0;
807 }
808
809 osize = offsetof(Object, data.payload) + size;
810 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
811 if (r < 0)
812 return r;
813
cec736d2 814 o->data.hash = htole64(hash);
807e17f0
LP
815
816#ifdef HAVE_XZ
817 if (f->compress &&
818 size >= COMPRESSION_SIZE_THRESHOLD) {
819 uint64_t rsize;
820
821 compressed = compress_blob(data, size, o->data.payload, &rsize);
822
823 if (compressed) {
824 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
825 o->object.flags |= OBJECT_COMPRESSED;
826
807e17f0
LP
827 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
828 }
829 }
830#endif
831
64825d3c 832 if (!compressed && size > 0)
807e17f0 833 memcpy(o->data.payload, data, size);
cec736d2 834
de190aef 835 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
836 if (r < 0)
837 return r;
838
b0af6f41
LP
839 r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
840 if (r < 0)
841 return r;
842
48496df6
LP
843 /* The linking might have altered the window, so let's
844 * refresh our pointer */
845 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
846 if (r < 0)
847 return r;
848
cec736d2
LP
849 if (ret)
850 *ret = o;
851
852 if (offset)
de190aef 853 *offset = p;
cec736d2
LP
854
855 return 0;
856}
857
858uint64_t journal_file_entry_n_items(Object *o) {
859 assert(o);
7be3aa17 860 assert(o->object.type == OBJECT_ENTRY);
cec736d2
LP
861
862 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
863}
864
de190aef
LP
865static uint64_t journal_file_entry_array_n_items(Object *o) {
866 assert(o);
7be3aa17 867 assert(o->object.type == OBJECT_ENTRY_ARRAY);
de190aef
LP
868
869 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
870}
871
872static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
873 le64_t *first,
874 le64_t *idx,
de190aef 875 uint64_t p) {
cec736d2 876 int r;
de190aef
LP
877 uint64_t n = 0, ap = 0, q, i, a, hidx;
878 Object *o;
879
cec736d2 880 assert(f);
de190aef
LP
881 assert(first);
882 assert(idx);
883 assert(p > 0);
cec736d2 884
de190aef
LP
885 a = le64toh(*first);
886 i = hidx = le64toh(*idx);
887 while (a > 0) {
888
889 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
890 if (r < 0)
891 return r;
cec736d2 892
de190aef
LP
893 n = journal_file_entry_array_n_items(o);
894 if (i < n) {
895 o->entry_array.items[i] = htole64(p);
896 *idx = htole64(hidx + 1);
897 return 0;
898 }
cec736d2 899
de190aef
LP
900 i -= n;
901 ap = a;
902 a = le64toh(o->entry_array.next_entry_array_offset);
903 }
904
905 if (hidx > n)
906 n = (hidx+1) * 2;
907 else
908 n = n * 2;
909
910 if (n < 4)
911 n = 4;
912
913 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
914 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
915 &o, &q);
cec736d2
LP
916 if (r < 0)
917 return r;
918
b0af6f41
LP
919 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
920 if (r < 0)
921 return r;
922
de190aef 923 o->entry_array.items[i] = htole64(p);
cec736d2 924
de190aef 925 if (ap == 0)
7be3aa17 926 *first = htole64(q);
cec736d2 927 else {
de190aef 928 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
929 if (r < 0)
930 return r;
931
de190aef
LP
932 o->entry_array.next_entry_array_offset = htole64(q);
933 }
cec736d2 934
de190aef
LP
935 *idx = htole64(hidx + 1);
936
937 return 0;
938}
cec736d2 939
de190aef 940static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
941 le64_t *extra,
942 le64_t *first,
943 le64_t *idx,
de190aef
LP
944 uint64_t p) {
945
946 int r;
947
948 assert(f);
949 assert(extra);
950 assert(first);
951 assert(idx);
952 assert(p > 0);
953
954 if (*idx == 0)
955 *extra = htole64(p);
956 else {
4fd052ae 957 le64_t i;
de190aef 958
7be3aa17 959 i = htole64(le64toh(*idx) - 1);
de190aef
LP
960 r = link_entry_into_array(f, first, &i, p);
961 if (r < 0)
962 return r;
cec736d2
LP
963 }
964
de190aef
LP
965 *idx = htole64(le64toh(*idx) + 1);
966 return 0;
967}
968
969static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
970 uint64_t p;
971 int r;
972 assert(f);
973 assert(o);
974 assert(offset > 0);
975
976 p = le64toh(o->entry.items[i].object_offset);
977 if (p == 0)
978 return -EINVAL;
979
980 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
981 if (r < 0)
982 return r;
983
de190aef
LP
984 return link_entry_into_array_plus_one(f,
985 &o->data.entry_offset,
986 &o->data.entry_array_offset,
987 &o->data.n_entries,
988 offset);
cec736d2
LP
989}
990
991static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 992 uint64_t n, i;
cec736d2
LP
993 int r;
994
995 assert(f);
996 assert(o);
997 assert(offset > 0);
de190aef 998 assert(o->object.type == OBJECT_ENTRY);
cec736d2 999
b788cc23
LP
1000 __sync_synchronize();
1001
cec736d2 1002 /* Link up the entry itself */
de190aef
LP
1003 r = link_entry_into_array(f,
1004 &f->header->entry_array_offset,
1005 &f->header->n_entries,
1006 offset);
1007 if (r < 0)
1008 return r;
cec736d2 1009
aaf53376 1010 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
cec736d2 1011
de190aef 1012 if (f->header->head_entry_realtime == 0)
0ac38b70 1013 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 1014
0ac38b70 1015 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
1016 f->header->tail_entry_monotonic = o->entry.monotonic;
1017
1018 f->tail_entry_monotonic_valid = true;
cec736d2
LP
1019
1020 /* Link up the items */
1021 n = journal_file_entry_n_items(o);
1022 for (i = 0; i < n; i++) {
1023 r = journal_file_link_entry_item(f, o, offset, i);
1024 if (r < 0)
1025 return r;
1026 }
1027
cec736d2
LP
1028 return 0;
1029}
1030
1031static int journal_file_append_entry_internal(
1032 JournalFile *f,
1033 const dual_timestamp *ts,
1034 uint64_t xor_hash,
1035 const EntryItem items[], unsigned n_items,
de190aef 1036 uint64_t *seqnum,
cec736d2
LP
1037 Object **ret, uint64_t *offset) {
1038 uint64_t np;
1039 uint64_t osize;
1040 Object *o;
1041 int r;
1042
1043 assert(f);
1044 assert(items || n_items == 0);
de190aef 1045 assert(ts);
cec736d2
LP
1046
1047 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1048
de190aef 1049 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
1050 if (r < 0)
1051 return r;
1052
de190aef 1053 o->entry.seqnum = htole64(journal_file_seqnum(f, seqnum));
cec736d2 1054 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
1055 o->entry.realtime = htole64(ts->realtime);
1056 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
1057 o->entry.xor_hash = htole64(xor_hash);
1058 o->entry.boot_id = f->header->boot_id;
1059
b0af6f41
LP
1060 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
1061 if (r < 0)
1062 return r;
1063
cec736d2
LP
1064 r = journal_file_link_entry(f, o, np);
1065 if (r < 0)
1066 return r;
1067
1068 if (ret)
1069 *ret = o;
1070
1071 if (offset)
1072 *offset = np;
1073
1074 return 0;
1075}
1076
cf244689 1077void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
1078 assert(f);
1079
1080 /* inotify() does not receive IN_MODIFY events from file
1081 * accesses done via mmap(). After each access we hence
1082 * trigger IN_MODIFY by truncating the journal file to its
1083 * current size which triggers IN_MODIFY. */
1084
bc85bfee
LP
1085 __sync_synchronize();
1086
50f20cfd
LP
1087 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1088 log_error("Failed to to truncate file to its own size: %m");
1089}
1090
de190aef 1091int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1092 unsigned i;
1093 EntryItem *items;
1094 int r;
1095 uint64_t xor_hash = 0;
de190aef 1096 struct dual_timestamp _ts;
cec736d2
LP
1097
1098 assert(f);
1099 assert(iovec || n_iovec == 0);
1100
de190aef
LP
1101 if (!f->writable)
1102 return -EPERM;
1103
1104 if (!ts) {
1105 dual_timestamp_get(&_ts);
1106 ts = &_ts;
1107 }
1108
1109 if (f->tail_entry_monotonic_valid &&
1110 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1111 return -EINVAL;
1112
7560fffc
LP
1113 r = journal_file_maybe_append_tag(f, ts->realtime);
1114 if (r < 0)
1115 return r;
1116
64825d3c
LP
1117 /* alloca() can't take 0, hence let's allocate at least one */
1118 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
cec736d2
LP
1119
1120 for (i = 0; i < n_iovec; i++) {
1121 uint64_t p;
1122 Object *o;
1123
1124 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1125 if (r < 0)
cf244689 1126 return r;
cec736d2
LP
1127
1128 xor_hash ^= le64toh(o->data.hash);
1129 items[i].object_offset = htole64(p);
de7b95cd 1130 items[i].hash = o->data.hash;
cec736d2
LP
1131 }
1132
de190aef 1133 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1134
50f20cfd
LP
1135 journal_file_post_change(f);
1136
cec736d2
LP
1137 return r;
1138}
1139
de190aef
LP
1140static int generic_array_get(JournalFile *f,
1141 uint64_t first,
1142 uint64_t i,
1143 Object **ret, uint64_t *offset) {
1144
cec736d2 1145 Object *o;
6c8a39b8 1146 uint64_t p = 0, a;
cec736d2
LP
1147 int r;
1148
1149 assert(f);
1150
de190aef
LP
1151 a = first;
1152 while (a > 0) {
1153 uint64_t n;
cec736d2 1154
de190aef
LP
1155 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1156 if (r < 0)
1157 return r;
cec736d2 1158
de190aef
LP
1159 n = journal_file_entry_array_n_items(o);
1160 if (i < n) {
1161 p = le64toh(o->entry_array.items[i]);
1162 break;
cec736d2
LP
1163 }
1164
de190aef
LP
1165 i -= n;
1166 a = le64toh(o->entry_array.next_entry_array_offset);
1167 }
1168
1169 if (a <= 0 || p <= 0)
1170 return 0;
1171
1172 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1173 if (r < 0)
1174 return r;
1175
1176 if (ret)
1177 *ret = o;
1178
1179 if (offset)
1180 *offset = p;
1181
1182 return 1;
1183}
1184
1185static int generic_array_get_plus_one(JournalFile *f,
1186 uint64_t extra,
1187 uint64_t first,
1188 uint64_t i,
1189 Object **ret, uint64_t *offset) {
1190
1191 Object *o;
1192
1193 assert(f);
1194
1195 if (i == 0) {
1196 int r;
1197
1198 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1199 if (r < 0)
1200 return r;
1201
de190aef
LP
1202 if (ret)
1203 *ret = o;
cec736d2 1204
de190aef
LP
1205 if (offset)
1206 *offset = extra;
cec736d2 1207
de190aef 1208 return 1;
cec736d2
LP
1209 }
1210
de190aef
LP
1211 return generic_array_get(f, first, i-1, ret, offset);
1212}
cec736d2 1213
de190aef
LP
1214enum {
1215 TEST_FOUND,
1216 TEST_LEFT,
1217 TEST_RIGHT
1218};
cec736d2 1219
de190aef
LP
1220static int generic_array_bisect(JournalFile *f,
1221 uint64_t first,
1222 uint64_t n,
1223 uint64_t needle,
1224 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1225 direction_t direction,
1226 Object **ret,
1227 uint64_t *offset,
1228 uint64_t *idx) {
1229
1230 uint64_t a, p, t = 0, i = 0, last_p = 0;
1231 bool subtract_one = false;
1232 Object *o, *array = NULL;
1233 int r;
cec736d2 1234
de190aef
LP
1235 assert(f);
1236 assert(test_object);
cec736d2 1237
de190aef
LP
1238 a = first;
1239 while (a > 0) {
1240 uint64_t left, right, k, lp;
1241
1242 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1243 if (r < 0)
1244 return r;
1245
de190aef
LP
1246 k = journal_file_entry_array_n_items(array);
1247 right = MIN(k, n);
1248 if (right <= 0)
1249 return 0;
cec736d2 1250
de190aef
LP
1251 i = right - 1;
1252 lp = p = le64toh(array->entry_array.items[i]);
1253 if (p <= 0)
1254 return -EBADMSG;
cec736d2 1255
de190aef
LP
1256 r = test_object(f, p, needle);
1257 if (r < 0)
1258 return r;
cec736d2 1259
de190aef
LP
1260 if (r == TEST_FOUND)
1261 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1262
1263 if (r == TEST_RIGHT) {
1264 left = 0;
1265 right -= 1;
1266 for (;;) {
1267 if (left == right) {
1268 if (direction == DIRECTION_UP)
1269 subtract_one = true;
1270
1271 i = left;
1272 goto found;
1273 }
1274
1275 assert(left < right);
1276
1277 i = (left + right) / 2;
1278 p = le64toh(array->entry_array.items[i]);
1279 if (p <= 0)
1280 return -EBADMSG;
1281
1282 r = test_object(f, p, needle);
1283 if (r < 0)
1284 return r;
cec736d2 1285
de190aef
LP
1286 if (r == TEST_FOUND)
1287 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1288
1289 if (r == TEST_RIGHT)
1290 right = i;
1291 else
1292 left = i + 1;
1293 }
1294 }
1295
cbdca852
LP
1296 if (k > n) {
1297 if (direction == DIRECTION_UP) {
1298 i = n;
1299 subtract_one = true;
1300 goto found;
1301 }
1302
cec736d2 1303 return 0;
cbdca852 1304 }
cec736d2 1305
de190aef
LP
1306 last_p = lp;
1307
1308 n -= k;
1309 t += k;
1310 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1311 }
1312
1313 return 0;
de190aef
LP
1314
1315found:
1316 if (subtract_one && t == 0 && i == 0)
1317 return 0;
1318
1319 if (subtract_one && i == 0)
1320 p = last_p;
1321 else if (subtract_one)
1322 p = le64toh(array->entry_array.items[i-1]);
1323 else
1324 p = le64toh(array->entry_array.items[i]);
1325
1326 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1327 if (r < 0)
1328 return r;
1329
1330 if (ret)
1331 *ret = o;
1332
1333 if (offset)
1334 *offset = p;
1335
1336 if (idx)
cbdca852 1337 *idx = t + i + (subtract_one ? -1 : 0);
de190aef
LP
1338
1339 return 1;
cec736d2
LP
1340}
1341
de190aef
LP
1342static int generic_array_bisect_plus_one(JournalFile *f,
1343 uint64_t extra,
1344 uint64_t first,
1345 uint64_t n,
1346 uint64_t needle,
1347 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1348 direction_t direction,
1349 Object **ret,
1350 uint64_t *offset,
1351 uint64_t *idx) {
1352
cec736d2 1353 int r;
cbdca852
LP
1354 bool step_back = false;
1355 Object *o;
cec736d2
LP
1356
1357 assert(f);
de190aef 1358 assert(test_object);
cec736d2 1359
de190aef
LP
1360 if (n <= 0)
1361 return 0;
cec736d2 1362
de190aef
LP
1363 /* This bisects the array in object 'first', but first checks
1364 * an extra */
de190aef
LP
1365 r = test_object(f, extra, needle);
1366 if (r < 0)
1367 return r;
a536e261
LP
1368
1369 if (r == TEST_FOUND)
1370 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1371
cbdca852
LP
1372 /* if we are looking with DIRECTION_UP then we need to first
1373 see if in the actual array there is a matching entry, and
1374 return the last one of that. But if there isn't any we need
1375 to return this one. Hence remember this, and return it
1376 below. */
1377 if (r == TEST_LEFT)
1378 step_back = direction == DIRECTION_UP;
de190aef 1379
cbdca852
LP
1380 if (r == TEST_RIGHT) {
1381 if (direction == DIRECTION_DOWN)
1382 goto found;
1383 else
1384 return 0;
a536e261 1385 }
cec736d2 1386
de190aef
LP
1387 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1388
cbdca852
LP
1389 if (r == 0 && step_back)
1390 goto found;
1391
ecf68b1d 1392 if (r > 0 && idx)
de190aef
LP
1393 (*idx) ++;
1394
1395 return r;
cbdca852
LP
1396
1397found:
1398 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1399 if (r < 0)
1400 return r;
1401
1402 if (ret)
1403 *ret = o;
1404
1405 if (offset)
1406 *offset = extra;
1407
1408 if (idx)
1409 *idx = 0;
1410
1411 return 1;
1412}
1413
1414static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1415 assert(f);
1416 assert(p > 0);
1417
1418 if (p == needle)
1419 return TEST_FOUND;
1420 else if (p < needle)
1421 return TEST_LEFT;
1422 else
1423 return TEST_RIGHT;
1424}
1425
1426int journal_file_move_to_entry_by_offset(
1427 JournalFile *f,
1428 uint64_t p,
1429 direction_t direction,
1430 Object **ret,
1431 uint64_t *offset) {
1432
1433 return generic_array_bisect(f,
1434 le64toh(f->header->entry_array_offset),
1435 le64toh(f->header->n_entries),
1436 p,
1437 test_object_offset,
1438 direction,
1439 ret, offset, NULL);
de190aef
LP
1440}
1441
cbdca852 1442
de190aef
LP
1443static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1444 Object *o;
1445 int r;
1446
1447 assert(f);
1448 assert(p > 0);
1449
1450 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1451 if (r < 0)
1452 return r;
1453
de190aef
LP
1454 if (le64toh(o->entry.seqnum) == needle)
1455 return TEST_FOUND;
1456 else if (le64toh(o->entry.seqnum) < needle)
1457 return TEST_LEFT;
1458 else
1459 return TEST_RIGHT;
1460}
cec736d2 1461
de190aef
LP
1462int journal_file_move_to_entry_by_seqnum(
1463 JournalFile *f,
1464 uint64_t seqnum,
1465 direction_t direction,
1466 Object **ret,
1467 uint64_t *offset) {
1468
1469 return generic_array_bisect(f,
1470 le64toh(f->header->entry_array_offset),
1471 le64toh(f->header->n_entries),
1472 seqnum,
1473 test_object_seqnum,
1474 direction,
1475 ret, offset, NULL);
1476}
cec736d2 1477
de190aef
LP
1478static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1479 Object *o;
1480 int r;
1481
1482 assert(f);
1483 assert(p > 0);
1484
1485 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1486 if (r < 0)
1487 return r;
1488
1489 if (le64toh(o->entry.realtime) == needle)
1490 return TEST_FOUND;
1491 else if (le64toh(o->entry.realtime) < needle)
1492 return TEST_LEFT;
1493 else
1494 return TEST_RIGHT;
cec736d2
LP
1495}
1496
de190aef
LP
1497int journal_file_move_to_entry_by_realtime(
1498 JournalFile *f,
1499 uint64_t realtime,
1500 direction_t direction,
1501 Object **ret,
1502 uint64_t *offset) {
1503
1504 return generic_array_bisect(f,
1505 le64toh(f->header->entry_array_offset),
1506 le64toh(f->header->n_entries),
1507 realtime,
1508 test_object_realtime,
1509 direction,
1510 ret, offset, NULL);
1511}
1512
1513static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1514 Object *o;
1515 int r;
1516
1517 assert(f);
1518 assert(p > 0);
1519
1520 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1521 if (r < 0)
1522 return r;
1523
1524 if (le64toh(o->entry.monotonic) == needle)
1525 return TEST_FOUND;
1526 else if (le64toh(o->entry.monotonic) < needle)
1527 return TEST_LEFT;
1528 else
1529 return TEST_RIGHT;
1530}
1531
1532int journal_file_move_to_entry_by_monotonic(
1533 JournalFile *f,
1534 sd_id128_t boot_id,
1535 uint64_t monotonic,
1536 direction_t direction,
1537 Object **ret,
1538 uint64_t *offset) {
1539
10b6f904 1540 char t[9+32+1] = "_BOOT_ID=";
de190aef
LP
1541 Object *o;
1542 int r;
1543
cbdca852 1544 assert(f);
de190aef 1545
cbdca852 1546 sd_id128_to_string(boot_id, t + 9);
de190aef
LP
1547 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1548 if (r < 0)
1549 return r;
cbdca852 1550 if (r == 0)
de190aef
LP
1551 return -ENOENT;
1552
1553 return generic_array_bisect_plus_one(f,
1554 le64toh(o->data.entry_offset),
1555 le64toh(o->data.entry_array_offset),
1556 le64toh(o->data.n_entries),
1557 monotonic,
1558 test_object_monotonic,
1559 direction,
1560 ret, offset, NULL);
1561}
1562
de190aef
LP
1563int journal_file_next_entry(
1564 JournalFile *f,
1565 Object *o, uint64_t p,
1566 direction_t direction,
1567 Object **ret, uint64_t *offset) {
1568
1569 uint64_t i, n;
cec736d2
LP
1570 int r;
1571
1572 assert(f);
de190aef
LP
1573 assert(p > 0 || !o);
1574
1575 n = le64toh(f->header->n_entries);
1576 if (n <= 0)
1577 return 0;
cec736d2
LP
1578
1579 if (!o)
de190aef 1580 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1581 else {
de190aef 1582 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1583 return -EINVAL;
1584
de190aef
LP
1585 r = generic_array_bisect(f,
1586 le64toh(f->header->entry_array_offset),
1587 le64toh(f->header->n_entries),
1588 p,
1589 test_object_offset,
1590 DIRECTION_DOWN,
1591 NULL, NULL,
1592 &i);
1593 if (r <= 0)
1594 return r;
1595
1596 if (direction == DIRECTION_DOWN) {
1597 if (i >= n - 1)
1598 return 0;
1599
1600 i++;
1601 } else {
1602 if (i <= 0)
1603 return 0;
1604
1605 i--;
1606 }
cec736d2
LP
1607 }
1608
de190aef
LP
1609 /* And jump to it */
1610 return generic_array_get(f,
1611 le64toh(f->header->entry_array_offset),
1612 i,
1613 ret, offset);
1614}
cec736d2 1615
de190aef
LP
1616int journal_file_skip_entry(
1617 JournalFile *f,
1618 Object *o, uint64_t p,
1619 int64_t skip,
1620 Object **ret, uint64_t *offset) {
1621
1622 uint64_t i, n;
1623 int r;
1624
1625 assert(f);
1626 assert(o);
1627 assert(p > 0);
1628
1629 if (o->object.type != OBJECT_ENTRY)
1630 return -EINVAL;
1631
1632 r = generic_array_bisect(f,
1633 le64toh(f->header->entry_array_offset),
1634 le64toh(f->header->n_entries),
1635 p,
1636 test_object_offset,
1637 DIRECTION_DOWN,
1638 NULL, NULL,
1639 &i);
1640 if (r <= 0)
cec736d2
LP
1641 return r;
1642
de190aef
LP
1643 /* Calculate new index */
1644 if (skip < 0) {
1645 if ((uint64_t) -skip >= i)
1646 i = 0;
1647 else
1648 i = i - (uint64_t) -skip;
1649 } else
1650 i += (uint64_t) skip;
cec736d2 1651
de190aef
LP
1652 n = le64toh(f->header->n_entries);
1653 if (n <= 0)
1654 return -EBADMSG;
cec736d2 1655
de190aef
LP
1656 if (i >= n)
1657 i = n-1;
1658
1659 return generic_array_get(f,
1660 le64toh(f->header->entry_array_offset),
1661 i,
1662 ret, offset);
cec736d2
LP
1663}
1664
de190aef
LP
1665int journal_file_next_entry_for_data(
1666 JournalFile *f,
1667 Object *o, uint64_t p,
1668 uint64_t data_offset,
1669 direction_t direction,
1670 Object **ret, uint64_t *offset) {
1671
1672 uint64_t n, i;
cec736d2 1673 int r;
de190aef 1674 Object *d;
cec736d2
LP
1675
1676 assert(f);
de190aef 1677 assert(p > 0 || !o);
cec736d2 1678
de190aef 1679 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 1680 if (r < 0)
de190aef 1681 return r;
cec736d2 1682
de190aef
LP
1683 n = le64toh(d->data.n_entries);
1684 if (n <= 0)
1685 return n;
cec736d2 1686
de190aef
LP
1687 if (!o)
1688 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1689 else {
1690 if (o->object.type != OBJECT_ENTRY)
1691 return -EINVAL;
cec736d2 1692
de190aef
LP
1693 r = generic_array_bisect_plus_one(f,
1694 le64toh(d->data.entry_offset),
1695 le64toh(d->data.entry_array_offset),
1696 le64toh(d->data.n_entries),
1697 p,
1698 test_object_offset,
1699 DIRECTION_DOWN,
1700 NULL, NULL,
1701 &i);
1702
1703 if (r <= 0)
cec736d2
LP
1704 return r;
1705
de190aef
LP
1706 if (direction == DIRECTION_DOWN) {
1707 if (i >= n - 1)
1708 return 0;
cec736d2 1709
de190aef
LP
1710 i++;
1711 } else {
1712 if (i <= 0)
1713 return 0;
cec736d2 1714
de190aef
LP
1715 i--;
1716 }
cec736d2 1717
de190aef 1718 }
cec736d2 1719
de190aef
LP
1720 return generic_array_get_plus_one(f,
1721 le64toh(d->data.entry_offset),
1722 le64toh(d->data.entry_array_offset),
1723 i,
1724 ret, offset);
1725}
cec736d2 1726
cbdca852
LP
1727int journal_file_move_to_entry_by_offset_for_data(
1728 JournalFile *f,
1729 uint64_t data_offset,
1730 uint64_t p,
1731 direction_t direction,
1732 Object **ret, uint64_t *offset) {
1733
1734 int r;
1735 Object *d;
1736
1737 assert(f);
1738
1739 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1740 if (r < 0)
1741 return r;
1742
1743 return generic_array_bisect_plus_one(f,
1744 le64toh(d->data.entry_offset),
1745 le64toh(d->data.entry_array_offset),
1746 le64toh(d->data.n_entries),
1747 p,
1748 test_object_offset,
1749 direction,
1750 ret, offset, NULL);
1751}
1752
1753int journal_file_move_to_entry_by_monotonic_for_data(
1754 JournalFile *f,
1755 uint64_t data_offset,
1756 sd_id128_t boot_id,
1757 uint64_t monotonic,
1758 direction_t direction,
1759 Object **ret, uint64_t *offset) {
1760
1761 char t[9+32+1] = "_BOOT_ID=";
1762 Object *o, *d;
1763 int r;
1764 uint64_t b, z;
1765
1766 assert(f);
1767
1768 /* First, seek by time */
1769 sd_id128_to_string(boot_id, t + 9);
1770 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1771 if (r < 0)
1772 return r;
1773 if (r == 0)
1774 return -ENOENT;
1775
1776 r = generic_array_bisect_plus_one(f,
1777 le64toh(o->data.entry_offset),
1778 le64toh(o->data.entry_array_offset),
1779 le64toh(o->data.n_entries),
1780 monotonic,
1781 test_object_monotonic,
1782 direction,
1783 NULL, &z, NULL);
1784 if (r <= 0)
1785 return r;
1786
1787 /* And now, continue seeking until we find an entry that
1788 * exists in both bisection arrays */
1789
1790 for (;;) {
1791 Object *qo;
1792 uint64_t p, q;
1793
1794 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1795 if (r < 0)
1796 return r;
1797
1798 r = generic_array_bisect_plus_one(f,
1799 le64toh(d->data.entry_offset),
1800 le64toh(d->data.entry_array_offset),
1801 le64toh(d->data.n_entries),
1802 z,
1803 test_object_offset,
1804 direction,
1805 NULL, &p, NULL);
1806 if (r <= 0)
1807 return r;
1808
1809 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1810 if (r < 0)
1811 return r;
1812
1813 r = generic_array_bisect_plus_one(f,
1814 le64toh(o->data.entry_offset),
1815 le64toh(o->data.entry_array_offset),
1816 le64toh(o->data.n_entries),
1817 p,
1818 test_object_offset,
1819 direction,
1820 &qo, &q, NULL);
1821
1822 if (r <= 0)
1823 return r;
1824
1825 if (p == q) {
1826 if (ret)
1827 *ret = qo;
1828 if (offset)
1829 *offset = q;
1830
1831 return 1;
1832 }
1833
1834 z = q;
1835 }
1836
1837 return 0;
1838}
1839
de190aef
LP
1840int journal_file_move_to_entry_by_seqnum_for_data(
1841 JournalFile *f,
1842 uint64_t data_offset,
1843 uint64_t seqnum,
1844 direction_t direction,
1845 Object **ret, uint64_t *offset) {
cec736d2 1846
de190aef
LP
1847 Object *d;
1848 int r;
cec736d2 1849
91a31dde
LP
1850 assert(f);
1851
de190aef 1852 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1853 if (r < 0)
de190aef 1854 return r;
cec736d2 1855
de190aef
LP
1856 return generic_array_bisect_plus_one(f,
1857 le64toh(d->data.entry_offset),
1858 le64toh(d->data.entry_array_offset),
1859 le64toh(d->data.n_entries),
1860 seqnum,
1861 test_object_seqnum,
1862 direction,
1863 ret, offset, NULL);
1864}
cec736d2 1865
de190aef
LP
1866int journal_file_move_to_entry_by_realtime_for_data(
1867 JournalFile *f,
1868 uint64_t data_offset,
1869 uint64_t realtime,
1870 direction_t direction,
1871 Object **ret, uint64_t *offset) {
1872
1873 Object *d;
1874 int r;
1875
91a31dde
LP
1876 assert(f);
1877
de190aef 1878 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1879 if (r < 0)
de190aef
LP
1880 return r;
1881
1882 return generic_array_bisect_plus_one(f,
1883 le64toh(d->data.entry_offset),
1884 le64toh(d->data.entry_array_offset),
1885 le64toh(d->data.n_entries),
1886 realtime,
1887 test_object_realtime,
1888 direction,
1889 ret, offset, NULL);
cec736d2
LP
1890}
1891
7560fffc
LP
1892static void *fsprg_state(JournalFile *f) {
1893 uint64_t a, b;
1894 assert(f);
1895
1896 if (!f->authenticate)
1897 return NULL;
1898
1899 a = le64toh(f->fsprg_header->header_size);
1900 b = le64toh(f->fsprg_header->state_size);
1901
1902 if (a + b > f->fsprg_size)
1903 return NULL;
1904
1905 return (uint8_t*) f->fsprg_header + a;
1906}
1907
b0af6f41 1908int journal_file_append_tag(JournalFile *f) {
7560fffc
LP
1909 Object *o;
1910 uint64_t p;
1911 int r;
1912
1913 assert(f);
1914
1915 if (!f->authenticate)
1916 return 0;
1917
1918 if (!f->hmac_running)
1919 return 0;
1920
1921 log_debug("Writing tag for epoch %llu\n", (unsigned long long) FSPRG_GetEpoch(fsprg_state(f)));
1922
1923 assert(f->hmac);
1924
1925 r = journal_file_append_object(f, OBJECT_TAG, sizeof(struct TagObject), &o, &p);
1926 if (r < 0)
1927 return r;
1928
1929 /* Get the HMAC tag and store it in the object */
1930 memcpy(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH);
1931 f->hmac_running = false;
1932
1933 return 0;
1934}
1935
1936static int journal_file_hmac_start(JournalFile *f) {
1937 uint8_t key[256 / 8]; /* Let's pass 256 bit from FSPRG to HMAC */
1938
1939 assert(f);
1940
1941 if (!f->authenticate)
1942 return 0;
1943
1944 if (f->hmac_running)
1945 return 0;
1946
1947 /* Prepare HMAC for next cycle */
1948 gcry_md_reset(f->hmac);
1949 FSPRG_GetKey(fsprg_state(f), key, sizeof(key), 0);
1950 gcry_md_setkey(f->hmac, key, sizeof(key));
1951
1952 f->hmac_running = true;
1953
1954 return 0;
1955}
1956
1957static int journal_file_get_epoch(JournalFile *f, uint64_t realtime, uint64_t *epoch) {
1958 uint64_t t;
1959
1960 assert(f);
1961 assert(epoch);
1962 assert(f->authenticate);
1963
1964 if (le64toh(f->fsprg_header->fsprg_start_usec) == 0 ||
1965 le64toh(f->fsprg_header->fsprg_interval_usec) == 0)
1966 return -ENOTSUP;
1967
1968 if (realtime < le64toh(f->fsprg_header->fsprg_start_usec))
1969 return -ESTALE;
1970
1971 t = realtime - le64toh(f->fsprg_header->fsprg_start_usec);
1972 t = t / le64toh(f->fsprg_header->fsprg_interval_usec);
1973
1974 *epoch = t;
1975 return 0;
1976}
1977
1978static int journal_file_need_evolve(JournalFile *f, uint64_t realtime) {
1979 uint64_t goal, epoch;
1980 int r;
1981 assert(f);
1982
1983 if (!f->authenticate)
1984 return 0;
1985
1986 r = journal_file_get_epoch(f, realtime, &goal);
1987 if (r < 0)
1988 return r;
1989
1990 epoch = FSPRG_GetEpoch(fsprg_state(f));
1991 if (epoch > goal)
1992 return -ESTALE;
1993
1994 return epoch != goal;
1995}
1996
1997static int journal_file_evolve(JournalFile *f, uint64_t realtime) {
1998 uint64_t goal, epoch;
1999 int r;
2000
2001 assert(f);
2002
2003 if (!f->authenticate)
2004 return 0;
2005
2006 r = journal_file_get_epoch(f, realtime, &goal);
2007 if (r < 0)
2008 return r;
2009
2010 epoch = FSPRG_GetEpoch(fsprg_state(f));
2011 if (epoch < goal)
2012 log_debug("Evolving FSPRG key from epoch %llu to %llu.", (unsigned long long) epoch, (unsigned long long) goal);
2013
2014 for (;;) {
2015 if (epoch > goal)
2016 return -ESTALE;
2017 if (epoch == goal)
2018 return 0;
2019
2020 FSPRG_Evolve(fsprg_state(f));
2021 epoch = FSPRG_GetEpoch(fsprg_state(f));
2022 }
2023}
2024
2025static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime) {
2026 int r;
2027
2028 assert(f);
2029
2030 if (!f->authenticate)
2031 return 0;
2032
2033 r = journal_file_need_evolve(f, realtime);
2034 if (r <= 0)
2035 return 0;
2036
2037 r = journal_file_append_tag(f);
2038 if (r < 0)
2039 return r;
2040
2041 r = journal_file_evolve(f, realtime);
2042 if (r < 0)
2043 return r;
2044
2045 r = journal_file_hmac_start(f);
2046 if (r < 0)
2047 return r;
2048
2049 return 0;
2050}
2051
2052static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p) {
2053 int r;
2054 Object *o;
2055
2056 assert(f);
2057
2058 if (!f->authenticate)
2059 return 0;
2060
2061 r = journal_file_hmac_start(f);
2062 if (r < 0)
2063 return r;
2064
2065 r = journal_file_move_to_object(f, type, p, &o);
2066 if (r < 0)
2067 return r;
2068
2069 gcry_md_write(f->hmac, o, offsetof(ObjectHeader, payload));
2070
2071 switch (o->object.type) {
2072
2073 case OBJECT_DATA:
2074 /* All but: entry_array_offset, n_entries are mutable */
2075 gcry_md_write(f->hmac, &o->data.hash, offsetof(DataObject, entry_array_offset) - offsetof(DataObject, hash));
2076 gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(DataObject, payload));
2077 break;
2078
2079 case OBJECT_ENTRY:
2080 /* All */
2081 gcry_md_write(f->hmac, &o->entry.seqnum, le64toh(o->object.size) - offsetof(EntryObject, seqnum));
2082 break;
2083
2084 case OBJECT_FIELD_HASH_TABLE:
2085 case OBJECT_DATA_HASH_TABLE:
2086 case OBJECT_ENTRY_ARRAY:
2087 /* Nothing: everything is mutable */
2088 break;
2089
2090 case OBJECT_TAG:
2091 /* All */
2092 gcry_md_write(f->hmac, o->tag.tag, le64toh(o->object.size) - offsetof(TagObject, tag));
2093 break;
2094
2095 default:
2096 return -EINVAL;
2097 }
2098
2099 return 0;
2100}
2101
2102static int journal_file_hmac_put_header(JournalFile *f) {
2103 int r;
2104
2105 assert(f);
2106
2107 if (!f->authenticate)
2108 return 0;
2109
2110 r = journal_file_hmac_start(f);
2111 if (r < 0)
2112 return r;
2113
2114 /* All but state+reserved, boot_id, arena_size,
2115 * tail_object_offset, n_objects, n_entries, tail_seqnum,
2116 * head_entry_realtime, tail_entry_realtime,
2117 * tail_entry_monotonic, n_data, n_fields, header_tag */
2118
2119 gcry_md_write(f->hmac, f->header->signature, offsetof(Header, state) - offsetof(Header, signature));
2120 gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, boot_id) - offsetof(Header, file_id));
2121 gcry_md_write(f->hmac, &f->header->seqnum_id, offsetof(Header, arena_size) - offsetof(Header, seqnum_id));
2122 gcry_md_write(f->hmac, &f->header->data_hash_table_offset, offsetof(Header, tail_object_offset) - offsetof(Header, data_hash_table_offset));
2123 gcry_md_write(f->hmac, &f->header->head_seqnum, offsetof(Header, head_entry_realtime) - offsetof(Header, head_seqnum));
2124
2125 return 0;
2126}
2127
2128static int journal_file_load_fsprg(JournalFile *f) {
2129 int r, fd = -1;
2130 char *p = NULL;
2131 struct stat st;
2132 FSPRGHeader *m = NULL;
2133 sd_id128_t machine;
2134
2135 assert(f);
2136
2137 if (!f->authenticate)
2138 return 0;
2139
2140 r = sd_id128_get_machine(&machine);
2141 if (r < 0)
2142 return r;
2143
2144 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/fsprg",
2145 SD_ID128_FORMAT_VAL(machine)) < 0)
2146 return -ENOMEM;
2147
2148 fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY, 0600);
2149 if (fd < 0) {
2150 log_error("Failed to open %s: %m", p);
2151 r = -errno;
2152 goto finish;
2153 }
2154
2155 if (fstat(fd, &st) < 0) {
2156 r = -errno;
2157 goto finish;
2158 }
2159
2160 if (st.st_size < (off_t) sizeof(FSPRGHeader)) {
2161 r = -ENODATA;
2162 goto finish;
2163 }
2164
2165 m = mmap(NULL, PAGE_ALIGN(sizeof(FSPRGHeader)), PROT_READ, MAP_SHARED, fd, 0);
2166 if (m == MAP_FAILED) {
2167 m = NULL;
2168 r = -errno;
2169 goto finish;
2170 }
2171
2172 if (memcmp(m->signature, FSPRG_HEADER_SIGNATURE, 8) != 0) {
2173 r = -EBADMSG;
2174 goto finish;
2175 }
2176
2177 if (m->incompatible_flags != 0) {
2178 r = -EPROTONOSUPPORT;
2179 goto finish;
2180 }
2181
2182 if (le64toh(m->header_size) < sizeof(FSPRGHeader)) {
2183 r = -EBADMSG;
2184 goto finish;
2185 }
2186
2187 if (le64toh(m->state_size) != FSPRG_stateinbytes(m->secpar)) {
2188 r = -EBADMSG;
2189 goto finish;
2190 }
2191
2192 f->fsprg_size = le64toh(m->header_size) + le64toh(m->state_size);
2193 if ((uint64_t) st.st_size < f->fsprg_size) {
2194 r = -ENODATA;
2195 goto finish;
2196 }
2197
2198 if (!sd_id128_equal(machine, m->machine_id)) {
2199 r = -EHOSTDOWN;
2200 goto finish;
2201 }
2202
2203 if (le64toh(m->fsprg_start_usec) <= 0 ||
2204 le64toh(m->fsprg_interval_usec) <= 0) {
2205 r = -EBADMSG;
2206 goto finish;
2207 }
2208
2209 f->fsprg_header = mmap(NULL, PAGE_ALIGN(f->fsprg_size), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2210 if (f->fsprg_header == MAP_FAILED) {
2211 f->fsprg_header = NULL;
2212 r = -errno;
2213 goto finish;
2214 }
2215
2216 r = 0;
2217
2218finish:
2219 if (m)
2220 munmap(m, PAGE_ALIGN(sizeof(FSPRGHeader)));
2221
2222 if (fd >= 0)
2223 close_nointr_nofail(fd);
2224
2225 free(p);
2226 return r;
2227}
2228
2229static int journal_file_setup_hmac(JournalFile *f) {
2230 gcry_error_t e;
2231
2232 if (!f->authenticate)
2233 return 0;
2234
2235 e = gcry_md_open(&f->hmac, GCRY_MD_SHA256, GCRY_MD_FLAG_HMAC);
2236 if (e != 0)
2237 return -ENOTSUP;
2238
2239 return 0;
2240}
2241
2242static int journal_file_append_first_tag(JournalFile *f) {
2243 int r;
2244 uint64_t p;
2245
2246 if (!f->authenticate)
2247 return 0;
2248
2249 log_debug("Calculating first tag...");
2250
2251 r = journal_file_hmac_put_header(f);
2252 if (r < 0)
2253 return r;
2254
2255 p = le64toh(f->header->field_hash_table_offset);
2256 if (p < offsetof(Object, hash_table.items))
2257 return -EINVAL;
2258 p -= offsetof(Object, hash_table.items);
2259
2260 r = journal_file_hmac_put_object(f, OBJECT_FIELD_HASH_TABLE, p);
2261 if (r < 0)
2262 return r;
2263
2264 p = le64toh(f->header->data_hash_table_offset);
2265 if (p < offsetof(Object, hash_table.items))
2266 return -EINVAL;
2267 p -= offsetof(Object, hash_table.items);
2268
2269 r = journal_file_hmac_put_object(f, OBJECT_DATA_HASH_TABLE, p);
2270 if (r < 0)
2271 return r;
2272
2273 r = journal_file_append_tag(f);
2274 if (r < 0)
2275 return r;
2276
2277 return 0;
2278}
2279
cec736d2 2280void journal_file_dump(JournalFile *f) {
cec736d2
LP
2281 Object *o;
2282 int r;
2283 uint64_t p;
2284
2285 assert(f);
2286
dca6219e 2287 journal_file_print_header(f);
cec736d2 2288
23b0b2b2 2289 p = le64toh(f->header->header_size);
cec736d2 2290 while (p != 0) {
de190aef 2291 r = journal_file_move_to_object(f, -1, p, &o);
cec736d2
LP
2292 if (r < 0)
2293 goto fail;
2294
2295 switch (o->object.type) {
2296
2297 case OBJECT_UNUSED:
2298 printf("Type: OBJECT_UNUSED\n");
2299 break;
2300
2301 case OBJECT_DATA:
2302 printf("Type: OBJECT_DATA\n");
2303 break;
2304
2305 case OBJECT_ENTRY:
3fbf9cbb
LP
2306 printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
2307 (unsigned long long) le64toh(o->entry.seqnum),
2308 (unsigned long long) le64toh(o->entry.monotonic),
2309 (unsigned long long) le64toh(o->entry.realtime));
cec736d2
LP
2310 break;
2311
de190aef
LP
2312 case OBJECT_FIELD_HASH_TABLE:
2313 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
cec736d2
LP
2314 break;
2315
de190aef
LP
2316 case OBJECT_DATA_HASH_TABLE:
2317 printf("Type: OBJECT_DATA_HASH_TABLE\n");
2318 break;
2319
2320 case OBJECT_ENTRY_ARRAY:
2321 printf("Type: OBJECT_ENTRY_ARRAY\n");
cec736d2 2322 break;
8144056f 2323
7560fffc
LP
2324 case OBJECT_TAG:
2325 printf("Type: OBJECT_TAG\n");
8144056f 2326 break;
cec736d2
LP
2327 }
2328
807e17f0
LP
2329 if (o->object.flags & OBJECT_COMPRESSED)
2330 printf("Flags: COMPRESSED\n");
2331
cec736d2
LP
2332 if (p == le64toh(f->header->tail_object_offset))
2333 p = 0;
2334 else
2335 p = p + ALIGN64(le64toh(o->object.size));
2336 }
2337
2338 return;
2339fail:
2340 log_error("File corrupt");
2341}
2342
dca6219e
LP
2343void journal_file_print_header(JournalFile *f) {
2344 char a[33], b[33], c[33];
2345 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
2346
2347 assert(f);
2348
2349 printf("File Path: %s\n"
2350 "File ID: %s\n"
2351 "Machine ID: %s\n"
2352 "Boot ID: %s\n"
2353 "Sequential Number ID: %s\n"
dc36ac67
LP
2354 "State: %s\n"
2355 "Compatible Flags:%s%s\n"
2356 "Incompatible Flags:%s%s\n"
dca6219e
LP
2357 "Header size: %llu\n"
2358 "Arena size: %llu\n"
2359 "Data Hash Table Size: %llu\n"
2360 "Field Hash Table Size: %llu\n"
2361 "Objects: %llu\n"
2362 "Entry Objects: %llu\n"
2363 "Rotate Suggested: %s\n"
2364 "Head Sequential Number: %llu\n"
2365 "Tail Sequential Number: %llu\n"
2366 "Head Realtime Timestamp: %s\n"
2367 "Tail Realtime Timestamp: %s\n",
2368 f->path,
2369 sd_id128_to_string(f->header->file_id, a),
2370 sd_id128_to_string(f->header->machine_id, b),
2371 sd_id128_to_string(f->header->boot_id, c),
2372 sd_id128_to_string(f->header->seqnum_id, c),
dc36ac67
LP
2373 f->header->state == STATE_OFFLINE ? "offline" :
2374 f->header->state == STATE_ONLINE ? "online" :
2375 f->header->state == STATE_ARCHIVED ? "archived" : "unknown",
7560fffc
LP
2376 (f->header->compatible_flags & HEADER_COMPATIBLE_AUTHENTICATED) ? " AUTHENTICATED" : "",
2377 (f->header->compatible_flags & ~HEADER_COMPATIBLE_AUTHENTICATED) ? " ???" : "",
dc36ac67
LP
2378 (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
2379 (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
dca6219e
LP
2380 (unsigned long long) le64toh(f->header->header_size),
2381 (unsigned long long) le64toh(f->header->arena_size),
2382 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2383 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
2384 (unsigned long long) le64toh(f->header->n_objects),
2385 (unsigned long long) le64toh(f->header->n_entries),
2386 yes_no(journal_file_rotate_suggested(f)),
2387 (unsigned long long) le64toh(f->header->head_seqnum),
2388 (unsigned long long) le64toh(f->header->tail_seqnum),
2389 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
2390 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)));
2391
2392 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2393 printf("Data Objects: %llu\n"
2394 "Data Hash Table Fill: %.1f%%\n",
2395 (unsigned long long) le64toh(f->header->n_data),
2396 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
2397
2398 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2399 printf("Field Objects: %llu\n"
2400 "Field Hash Table Fill: %.1f%%\n",
2401 (unsigned long long) le64toh(f->header->n_fields),
2402 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
2403}
2404
cec736d2
LP
2405int journal_file_open(
2406 const char *fname,
2407 int flags,
2408 mode_t mode,
7560fffc
LP
2409 bool compress,
2410 bool authenticate,
4a92baf3 2411 JournalMetrics *metrics,
0ac38b70 2412 JournalFile *template,
cec736d2
LP
2413 JournalFile **ret) {
2414
2415 JournalFile *f;
2416 int r;
2417 bool newly_created = false;
2418
2419 assert(fname);
2420
2421 if ((flags & O_ACCMODE) != O_RDONLY &&
2422 (flags & O_ACCMODE) != O_RDWR)
2423 return -EINVAL;
2424
9447a7f1
LP
2425 if (!endswith(fname, ".journal"))
2426 return -EINVAL;
2427
cec736d2
LP
2428 f = new0(JournalFile, 1);
2429 if (!f)
2430 return -ENOMEM;
2431
0ac38b70 2432 f->fd = -1;
0ac38b70 2433 f->mode = mode;
cec736d2 2434
7560fffc
LP
2435 f->flags = flags;
2436 f->prot = prot_from_flags(flags);
2437 f->writable = (flags & O_ACCMODE) != O_RDONLY;
2438 f->compress = compress;
2439 f->authenticate = authenticate;
15944db8 2440
cec736d2
LP
2441 f->path = strdup(fname);
2442 if (!f->path) {
2443 r = -ENOMEM;
2444 goto fail;
2445 }
2446
0ac38b70
LP
2447 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2448 if (f->fd < 0) {
2449 r = -errno;
2450 goto fail;
2451 }
2452
cec736d2
LP
2453 if (fstat(f->fd, &f->last_stat) < 0) {
2454 r = -errno;
2455 goto fail;
2456 }
2457
2458 if (f->last_stat.st_size == 0 && f->writable) {
2459 newly_created = true;
2460
7560fffc
LP
2461 /* Try to load the FSPRG state, and if we can't, then
2462 * just don't do authentication */
2463 r = journal_file_load_fsprg(f);
2464 if (r < 0)
2465 f->authenticate = false;
2466
0ac38b70 2467 r = journal_file_init_header(f, template);
cec736d2
LP
2468 if (r < 0)
2469 goto fail;
2470
2471 if (fstat(f->fd, &f->last_stat) < 0) {
2472 r = -errno;
2473 goto fail;
2474 }
2475 }
2476
dca6219e 2477 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
cec736d2
LP
2478 r = -EIO;
2479 goto fail;
2480 }
2481
2482 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2483 if (f->header == MAP_FAILED) {
2484 f->header = NULL;
2485 r = -errno;
2486 goto fail;
2487 }
2488
2489 if (!newly_created) {
2490 r = journal_file_verify_header(f);
2491 if (r < 0)
2492 goto fail;
b0af6f41 2493 }
7560fffc 2494
b0af6f41 2495 if (!newly_created && f->writable) {
7560fffc
LP
2496 r = journal_file_load_fsprg(f);
2497 if (r < 0)
2498 goto fail;
cec736d2
LP
2499 }
2500
2501 if (f->writable) {
4a92baf3
LP
2502 if (metrics) {
2503 journal_default_metrics(metrics, f->fd);
2504 f->metrics = *metrics;
2505 } else if (template)
2506 f->metrics = template->metrics;
2507
cec736d2
LP
2508 r = journal_file_refresh_header(f);
2509 if (r < 0)
2510 goto fail;
7560fffc
LP
2511
2512 r = journal_file_setup_hmac(f);
2513 if (r < 0)
2514 goto fail;
cec736d2
LP
2515 }
2516
2517 if (newly_created) {
de190aef 2518 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
2519 if (r < 0)
2520 goto fail;
2521
de190aef 2522 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
2523 if (r < 0)
2524 goto fail;
7560fffc
LP
2525
2526 r = journal_file_append_first_tag(f);
2527 if (r < 0)
2528 goto fail;
cec736d2
LP
2529 }
2530
de190aef 2531 r = journal_file_map_field_hash_table(f);
cec736d2
LP
2532 if (r < 0)
2533 goto fail;
2534
de190aef 2535 r = journal_file_map_data_hash_table(f);
cec736d2
LP
2536 if (r < 0)
2537 goto fail;
2538
2539 if (ret)
2540 *ret = f;
2541
2542 return 0;
2543
2544fail:
2545 journal_file_close(f);
2546
2547 return r;
2548}
0ac38b70 2549
7560fffc 2550int journal_file_rotate(JournalFile **f, bool compress, bool authenticate) {
0ac38b70
LP
2551 char *p;
2552 size_t l;
2553 JournalFile *old_file, *new_file = NULL;
2554 int r;
2555
2556 assert(f);
2557 assert(*f);
2558
2559 old_file = *f;
2560
2561 if (!old_file->writable)
2562 return -EINVAL;
2563
2564 if (!endswith(old_file->path, ".journal"))
2565 return -EINVAL;
2566
2567 l = strlen(old_file->path);
2568
9447a7f1 2569 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
0ac38b70
LP
2570 if (!p)
2571 return -ENOMEM;
2572
2573 memcpy(p, old_file->path, l - 8);
2574 p[l-8] = '@';
2575 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2576 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2577 "-%016llx-%016llx.journal",
dca6219e 2578 (unsigned long long) le64toh((*f)->header->tail_seqnum),
0ac38b70
LP
2579 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2580
2581 r = rename(old_file->path, p);
2582 free(p);
2583
2584 if (r < 0)
2585 return -errno;
2586
ccdbaf91 2587 old_file->header->state = STATE_ARCHIVED;
0ac38b70 2588
7560fffc 2589 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, authenticate, NULL, old_file, &new_file);
0ac38b70
LP
2590 journal_file_close(old_file);
2591
2592 *f = new_file;
2593 return r;
2594}
2595
9447a7f1
LP
2596int journal_file_open_reliably(
2597 const char *fname,
2598 int flags,
2599 mode_t mode,
7560fffc
LP
2600 bool compress,
2601 bool authenticate,
4a92baf3 2602 JournalMetrics *metrics,
9447a7f1
LP
2603 JournalFile *template,
2604 JournalFile **ret) {
2605
2606 int r;
2607 size_t l;
2608 char *p;
2609
7560fffc 2610 r = journal_file_open(fname, flags, mode, compress, authenticate, metrics, template, ret);
0071d9f1
LP
2611 if (r != -EBADMSG && /* corrupted */
2612 r != -ENODATA && /* truncated */
2613 r != -EHOSTDOWN && /* other machine */
a1a1898f
LP
2614 r != -EPROTONOSUPPORT && /* incompatible feature */
2615 r != -EBUSY && /* unclean shutdown */
2616 r != -ESHUTDOWN /* already archived */)
9447a7f1
LP
2617 return r;
2618
2619 if ((flags & O_ACCMODE) == O_RDONLY)
2620 return r;
2621
2622 if (!(flags & O_CREAT))
2623 return r;
2624
7560fffc
LP
2625 if (!endswith(fname, ".journal"))
2626 return r;
2627
5c70eab4
LP
2628 /* The file is corrupted. Rotate it away and try it again (but only once) */
2629
9447a7f1
LP
2630 l = strlen(fname);
2631 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2632 (int) (l-8), fname,
2633 (unsigned long long) now(CLOCK_REALTIME),
2634 random_ull()) < 0)
2635 return -ENOMEM;
2636
2637 r = rename(fname, p);
2638 free(p);
2639 if (r < 0)
2640 return -errno;
2641
a1a1898f 2642 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
9447a7f1 2643
7560fffc 2644 return journal_file_open(fname, flags, mode, compress, authenticate, metrics, template, ret);
9447a7f1
LP
2645}
2646
0ac38b70
LP
2647struct vacuum_info {
2648 off_t usage;
2649 char *filename;
2650
2651 uint64_t realtime;
2652 sd_id128_t seqnum_id;
2653 uint64_t seqnum;
5c70eab4
LP
2654
2655 bool have_seqnum;
0ac38b70
LP
2656};
2657
2658static int vacuum_compare(const void *_a, const void *_b) {
2659 const struct vacuum_info *a, *b;
2660
2661 a = _a;
2662 b = _b;
2663
5c70eab4
LP
2664 if (a->have_seqnum && b->have_seqnum &&
2665 sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
0ac38b70
LP
2666 if (a->seqnum < b->seqnum)
2667 return -1;
2668 else if (a->seqnum > b->seqnum)
2669 return 1;
2670 else
2671 return 0;
2672 }
2673
2674 if (a->realtime < b->realtime)
2675 return -1;
2676 else if (a->realtime > b->realtime)
2677 return 1;
5c70eab4 2678 else if (a->have_seqnum && b->have_seqnum)
0ac38b70 2679 return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
5c70eab4
LP
2680 else
2681 return strcmp(a->filename, b->filename);
0ac38b70
LP
2682}
2683
2684int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
2685 DIR *d;
2686 int r = 0;
2687 struct vacuum_info *list = NULL;
2688 unsigned n_list = 0, n_allocated = 0, i;
2689 uint64_t sum = 0;
2690
2691 assert(directory);
2692
2693 if (max_use <= 0)
babfc091 2694 return 0;
0ac38b70
LP
2695
2696 d = opendir(directory);
2697 if (!d)
2698 return -errno;
2699
2700 for (;;) {
2701 int k;
2702 struct dirent buf, *de;
2703 size_t q;
2704 struct stat st;
2705 char *p;
7ea07dcd 2706 unsigned long long seqnum = 0, realtime;
0ac38b70 2707 sd_id128_t seqnum_id;
5c70eab4 2708 bool have_seqnum;
0ac38b70
LP
2709
2710 k = readdir_r(d, &buf, &de);
2711 if (k != 0) {
2712 r = -k;
2713 goto finish;
2714 }
2715
2716 if (!de)
2717 break;
2718
5c70eab4
LP
2719 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
2720 continue;
2721
2722 if (!S_ISREG(st.st_mode))
0ac38b70
LP
2723 continue;
2724
2725 q = strlen(de->d_name);
2726
5c70eab4 2727 if (endswith(de->d_name, ".journal")) {
0ac38b70 2728
5c70eab4 2729 /* Vacuum archived files */
0ac38b70 2730
5c70eab4
LP
2731 if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
2732 continue;
0ac38b70 2733
5c70eab4
LP
2734 if (de->d_name[q-8-16-1] != '-' ||
2735 de->d_name[q-8-16-1-16-1] != '-' ||
2736 de->d_name[q-8-16-1-16-1-32-1] != '@')
2737 continue;
0ac38b70 2738
5c70eab4
LP
2739 p = strdup(de->d_name);
2740 if (!p) {
2741 r = -ENOMEM;
2742 goto finish;
2743 }
0ac38b70 2744
5c70eab4
LP
2745 de->d_name[q-8-16-1-16-1] = 0;
2746 if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
2747 free(p);
2748 continue;
2749 }
2750
2751 if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
2752 free(p);
2753 continue;
2754 }
2755
2756 have_seqnum = true;
2757
2758 } else if (endswith(de->d_name, ".journal~")) {
2759 unsigned long long tmp;
2760
2761 /* Vacuum corrupted files */
2762
2763 if (q < 1 + 16 + 1 + 16 + 8 + 1)
2764 continue;
0ac38b70 2765
5c70eab4
LP
2766 if (de->d_name[q-1-8-16-1] != '-' ||
2767 de->d_name[q-1-8-16-1-16-1] != '@')
2768 continue;
2769
2770 p = strdup(de->d_name);
2771 if (!p) {
2772 r = -ENOMEM;
2773 goto finish;
2774 }
2775
2776 if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
2777 free(p);
2778 continue;
2779 }
2780
2781 have_seqnum = false;
2782 } else
0ac38b70 2783 continue;
0ac38b70
LP
2784
2785 if (n_list >= n_allocated) {
2786 struct vacuum_info *j;
2787
2788 n_allocated = MAX(n_allocated * 2U, 8U);
2789 j = realloc(list, n_allocated * sizeof(struct vacuum_info));
2790 if (!j) {
2791 free(p);
2792 r = -ENOMEM;
2793 goto finish;
2794 }
2795
2796 list = j;
2797 }
2798
2799 list[n_list].filename = p;
a3a52c0f 2800 list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
0ac38b70
LP
2801 list[n_list].seqnum = seqnum;
2802 list[n_list].realtime = realtime;
2803 list[n_list].seqnum_id = seqnum_id;
5c70eab4 2804 list[n_list].have_seqnum = have_seqnum;
0ac38b70
LP
2805
2806 sum += list[n_list].usage;
2807
2808 n_list ++;
2809 }
2810
64825d3c
LP
2811 if (n_list > 0)
2812 qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
0ac38b70
LP
2813
2814 for(i = 0; i < n_list; i++) {
2815 struct statvfs ss;
2816
2817 if (fstatvfs(dirfd(d), &ss) < 0) {
2818 r = -errno;
2819 goto finish;
2820 }
2821
2822 if (sum <= max_use &&
2823 (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
2824 break;
2825
2826 if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
e7bf07b3 2827 log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
0ac38b70
LP
2828 sum -= list[i].usage;
2829 } else if (errno != ENOENT)
2830 log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
2831 }
2832
2833finish:
2834 for (i = 0; i < n_list; i++)
2835 free(list[i].filename);
2836
2837 free(list);
2838
de190aef
LP
2839 if (d)
2840 closedir(d);
2841
0ac38b70
LP
2842 return r;
2843}
cf244689
LP
2844
2845int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2846 uint64_t i, n;
2847 uint64_t q, xor_hash = 0;
2848 int r;
2849 EntryItem *items;
2850 dual_timestamp ts;
2851
2852 assert(from);
2853 assert(to);
2854 assert(o);
2855 assert(p);
2856
2857 if (!to->writable)
2858 return -EPERM;
2859
2860 ts.monotonic = le64toh(o->entry.monotonic);
2861 ts.realtime = le64toh(o->entry.realtime);
2862
2863 if (to->tail_entry_monotonic_valid &&
2864 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2865 return -EINVAL;
2866
cf244689
LP
2867 n = journal_file_entry_n_items(o);
2868 items = alloca(sizeof(EntryItem) * n);
2869
2870 for (i = 0; i < n; i++) {
4fd052ae
FC
2871 uint64_t l, h;
2872 le64_t le_hash;
cf244689
LP
2873 size_t t;
2874 void *data;
2875 Object *u;
2876
2877 q = le64toh(o->entry.items[i].object_offset);
2878 le_hash = o->entry.items[i].hash;
2879
2880 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2881 if (r < 0)
2882 return r;
2883
2884 if (le_hash != o->data.hash)
2885 return -EBADMSG;
2886
2887 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2888 t = (size_t) l;
2889
2890 /* We hit the limit on 32bit machines */
2891 if ((uint64_t) t != l)
2892 return -E2BIG;
2893
2894 if (o->object.flags & OBJECT_COMPRESSED) {
2895#ifdef HAVE_XZ
2896 uint64_t rsize;
2897
2898 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2899 return -EBADMSG;
2900
2901 data = from->compress_buffer;
2902 l = rsize;
2903#else
2904 return -EPROTONOSUPPORT;
2905#endif
2906 } else
2907 data = o->data.payload;
2908
2909 r = journal_file_append_data(to, data, l, &u, &h);
2910 if (r < 0)
2911 return r;
2912
2913 xor_hash ^= le64toh(u->data.hash);
2914 items[i].object_offset = htole64(h);
2915 items[i].hash = u->data.hash;
2916
2917 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2918 if (r < 0)
2919 return r;
2920 }
2921
2922 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2923}
babfc091
LP
2924
2925void journal_default_metrics(JournalMetrics *m, int fd) {
2926 uint64_t fs_size = 0;
2927 struct statvfs ss;
a7bc2c2a 2928 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2929
2930 assert(m);
2931 assert(fd >= 0);
2932
2933 if (fstatvfs(fd, &ss) >= 0)
2934 fs_size = ss.f_frsize * ss.f_blocks;
2935
2936 if (m->max_use == (uint64_t) -1) {
2937
2938 if (fs_size > 0) {
2939 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2940
2941 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2942 m->max_use = DEFAULT_MAX_USE_UPPER;
2943
2944 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2945 m->max_use = DEFAULT_MAX_USE_LOWER;
2946 } else
2947 m->max_use = DEFAULT_MAX_USE_LOWER;
2948 } else {
2949 m->max_use = PAGE_ALIGN(m->max_use);
2950
2951 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2952 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2953 }
2954
2955 if (m->max_size == (uint64_t) -1) {
2956 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2957
2958 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2959 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2960 } else
2961 m->max_size = PAGE_ALIGN(m->max_size);
2962
2963 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2964 m->max_size = JOURNAL_FILE_SIZE_MIN;
2965
2966 if (m->max_size*2 > m->max_use)
2967 m->max_use = m->max_size*2;
2968
2969 if (m->min_size == (uint64_t) -1)
2970 m->min_size = JOURNAL_FILE_SIZE_MIN;
2971 else {
2972 m->min_size = PAGE_ALIGN(m->min_size);
2973
2974 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2975 m->min_size = JOURNAL_FILE_SIZE_MIN;
2976
2977 if (m->min_size > m->max_size)
2978 m->max_size = m->min_size;
2979 }
2980
2981 if (m->keep_free == (uint64_t) -1) {
2982
2983 if (fs_size > 0) {
2984 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2985
2986 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2987 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2988
2989 } else
2990 m->keep_free = DEFAULT_KEEP_FREE;
2991 }
2992
e7bf07b3
LP
2993 log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2994 format_bytes(a, sizeof(a), m->max_use),
2995 format_bytes(b, sizeof(b), m->max_size),
2996 format_bytes(c, sizeof(c), m->min_size),
2997 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2998}
08984293
LP
2999
3000int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
08984293
LP
3001 assert(f);
3002 assert(from || to);
3003
3004 if (from) {
162566a4
LP
3005 if (f->header->head_entry_realtime == 0)
3006 return -ENOENT;
08984293 3007
162566a4 3008 *from = le64toh(f->header->head_entry_realtime);
08984293
LP
3009 }
3010
3011 if (to) {
162566a4
LP
3012 if (f->header->tail_entry_realtime == 0)
3013 return -ENOENT;
08984293 3014
162566a4 3015 *to = le64toh(f->header->tail_entry_realtime);
08984293
LP
3016 }
3017
3018 return 1;
3019}
3020
3021int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
3022 char t[9+32+1] = "_BOOT_ID=";
3023 Object *o;
3024 uint64_t p;
3025 int r;
3026
3027 assert(f);
3028 assert(from || to);
3029
3030 sd_id128_to_string(boot_id, t + 9);
3031
3032 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
3033 if (r <= 0)
3034 return r;
3035
3036 if (le64toh(o->data.n_entries) <= 0)
3037 return 0;
3038
3039 if (from) {
3040 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
3041 if (r < 0)
3042 return r;
3043
3044 *from = le64toh(o->entry.monotonic);
3045 }
3046
3047 if (to) {
3048 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
3049 if (r < 0)
3050 return r;
3051
3052 r = generic_array_get_plus_one(f,
3053 le64toh(o->data.entry_offset),
3054 le64toh(o->data.entry_array_offset),
3055 le64toh(o->data.n_entries)-1,
3056 &o, NULL);
3057 if (r <= 0)
3058 return r;
3059
3060 *to = le64toh(o->entry.monotonic);
3061 }
3062
3063 return 1;
3064}
dca6219e
LP
3065
3066bool journal_file_rotate_suggested(JournalFile *f) {
3067 assert(f);
3068
3069 /* If we gained new header fields we gained new features,
3070 * hence suggest a rotation */
361f9cbc
LP
3071 if (le64toh(f->header->header_size) < sizeof(Header)) {
3072 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
dca6219e 3073 return true;
361f9cbc 3074 }
dca6219e
LP
3075
3076 /* Let's check if the hash tables grew over a certain fill
3077 * level (75%, borrowing this value from Java's hash table
3078 * implementation), and if so suggest a rotation. To calculate
3079 * the fill level we need the n_data field, which only exists
3080 * in newer versions. */
3081
3082 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
361f9cbc
LP
3083 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
3084 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
3085 f->path,
3086 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
3087 (unsigned long long) le64toh(f->header->n_data),
3088 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
3089 (unsigned long long) (f->last_stat.st_size),
3090 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
dca6219e 3091 return true;
361f9cbc 3092 }
dca6219e
LP
3093
3094 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
361f9cbc
LP
3095 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
3096 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
3097 f->path,
3098 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
3099 (unsigned long long) le64toh(f->header->n_fields),
3100 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
dca6219e 3101 return true;
361f9cbc 3102 }
dca6219e
LP
3103
3104 return false;
3105}