]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
conf-parser: make parsing exit status lists non-fatal
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
30#include "journal-def.h"
31#include "journal-file.h"
32#include "lookup3.h"
807e17f0 33#include "compress.h"
7560fffc 34#include "fsprg.h"
cec736d2 35
4a92baf3
LP
36#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
37#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
cec736d2 38
be19b7df 39#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 40
babfc091 41/* This is the minimum journal file size */
b47ffcfd 42#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
babfc091
LP
43
44/* These are the lower and upper bounds if we deduce the max_use value
45 * from the file system size */
46#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
47#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
48
49/* This is the upper bound if we deduce max_size from max_use */
71100051 50#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
51
52/* This is the upper bound if we deduce the keep_free value from the
53 * file system size */
54#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
55
56/* This is the keep_free value when we can't determine the system
57 * size */
58#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
59
dca6219e
LP
60/* n_data was the first entry we added after the initial file format design */
61#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
cec736d2
LP
62
63#define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
64
dca6219e
LP
65#define JOURNAL_HEADER_CONTAINS(h, field) \
66 (le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field))
67
7560fffc 68static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime);
b0af6f41 69static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p);
dca6219e 70
cec736d2 71void journal_file_close(JournalFile *f) {
de190aef 72 assert(f);
cec736d2 73
b0af6f41
LP
74 /* Write the final tag */
75 if (f->authenticate)
76 journal_file_append_tag(f);
77
7560fffc 78 /* Sync everything to disk, before we mark the file offline */
16e9f408
LP
79 if (f->mmap && f->fd >= 0)
80 mmap_cache_close_fd(f->mmap, f->fd);
7560fffc
LP
81
82 if (f->writable && f->fd >= 0)
83 fdatasync(f->fd);
84
d384c7a8 85 if (f->header) {
cd96b3b8
LP
86 /* Mark the file offline. Don't override the archived state if it already is set */
87 if (f->writable && f->header->state == STATE_ONLINE)
d384c7a8 88 f->header->state = STATE_OFFLINE;
cec736d2 89
d384c7a8
MS
90 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
91 }
cec736d2 92
0ac38b70
LP
93 if (f->fd >= 0)
94 close_nointr_nofail(f->fd);
95
cec736d2 96 free(f->path);
807e17f0 97
16e9f408
LP
98 if (f->mmap)
99 mmap_cache_unref(f->mmap);
100
807e17f0
LP
101#ifdef HAVE_XZ
102 free(f->compress_buffer);
103#endif
104
7560fffc
LP
105#ifdef HAVE_GCRYPT
106 if (f->fsprg_header)
107 munmap(f->fsprg_header, PAGE_ALIGN(f->fsprg_size));
108
109 if (f->hmac)
110 gcry_md_close(f->hmac);
111#endif
112
cec736d2
LP
113 free(f);
114}
115
0ac38b70 116static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
117 Header h;
118 ssize_t k;
119 int r;
120
121 assert(f);
122
123 zero(h);
7560fffc 124 memcpy(h.signature, HEADER_SIGNATURE, 8);
23b0b2b2 125 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2 126
7560fffc
LP
127 h.incompatible_flags =
128 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
129
130 h.compatible_flags =
131 htole32(f->authenticate ? HEADER_COMPATIBLE_AUTHENTICATED : 0);
132
cec736d2
LP
133 r = sd_id128_randomize(&h.file_id);
134 if (r < 0)
135 return r;
136
0ac38b70
LP
137 if (template) {
138 h.seqnum_id = template->header->seqnum_id;
dca6219e 139 h.tail_seqnum = template->header->tail_seqnum;
0ac38b70
LP
140 } else
141 h.seqnum_id = h.file_id;
cec736d2
LP
142
143 k = pwrite(f->fd, &h, sizeof(h), 0);
144 if (k < 0)
145 return -errno;
146
147 if (k != sizeof(h))
148 return -EIO;
149
150 return 0;
151}
152
153static int journal_file_refresh_header(JournalFile *f) {
154 int r;
de190aef 155 sd_id128_t boot_id;
cec736d2
LP
156
157 assert(f);
158
159 r = sd_id128_get_machine(&f->header->machine_id);
160 if (r < 0)
161 return r;
162
de190aef 163 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
164 if (r < 0)
165 return r;
166
de190aef
LP
167 if (sd_id128_equal(boot_id, f->header->boot_id))
168 f->tail_entry_monotonic_valid = true;
169
170 f->header->boot_id = boot_id;
171
172 f->header->state = STATE_ONLINE;
b788cc23 173
7560fffc
LP
174 /* Sync the online state to disk */
175 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
176 fdatasync(f->fd);
b788cc23 177
cec736d2
LP
178 return 0;
179}
180
181static int journal_file_verify_header(JournalFile *f) {
182 assert(f);
183
7560fffc 184 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
cec736d2
LP
185 return -EBADMSG;
186
7560fffc
LP
187 /* In both read and write mode we refuse to open files with
188 * incompatible flags we don't know */
807e17f0 189#ifdef HAVE_XZ
7560fffc 190 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
807e17f0
LP
191 return -EPROTONOSUPPORT;
192#else
cec736d2
LP
193 if (f->header->incompatible_flags != 0)
194 return -EPROTONOSUPPORT;
807e17f0 195#endif
cec736d2 196
7560fffc
LP
197 /* When open for writing we refuse to open files with
198 * compatible flags, too */
199 if (f->writable) {
200#ifdef HAVE_GCRYPT
201 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_AUTHENTICATED) != 0)
202 return -EPROTONOSUPPORT;
203#else
204 if (f->header->compatible_flags != 0)
205 return -EPROTONOSUPPORT;
206#endif
207 }
208
dca6219e
LP
209 /* The first addition was n_data, so check that we are at least this large */
210 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
23b0b2b2
LP
211 return -EBADMSG;
212
213 if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
cec736d2
LP
214 return -ENODATA;
215
216 if (f->writable) {
ccdbaf91 217 uint8_t state;
cec736d2
LP
218 sd_id128_t machine_id;
219 int r;
220
221 r = sd_id128_get_machine(&machine_id);
222 if (r < 0)
223 return r;
224
225 if (!sd_id128_equal(machine_id, f->header->machine_id))
226 return -EHOSTDOWN;
227
de190aef 228 state = f->header->state;
cec736d2 229
71fa6f00
LP
230 if (state == STATE_ONLINE) {
231 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
232 return -EBUSY;
233 } else if (state == STATE_ARCHIVED)
cec736d2 234 return -ESHUTDOWN;
71fa6f00
LP
235 else if (state != STATE_OFFLINE) {
236 log_debug("Journal file %s has unknown state %u.", f->path, state);
237 return -EBUSY;
238 }
cec736d2
LP
239 }
240
7560fffc
LP
241 f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
242 f->authenticate = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED);
243
cec736d2
LP
244 return 0;
245}
246
247static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
cec736d2 248 uint64_t old_size, new_size;
fec2aa2f 249 int r;
cec736d2
LP
250
251 assert(f);
252
cec736d2 253 /* We assume that this file is not sparse, and we know that
38ac38b2 254 * for sure, since we always call posix_fallocate()
cec736d2
LP
255 * ourselves */
256
257 old_size =
23b0b2b2 258 le64toh(f->header->header_size) +
cec736d2
LP
259 le64toh(f->header->arena_size);
260
bc85bfee 261 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
262 if (new_size < le64toh(f->header->header_size))
263 new_size = le64toh(f->header->header_size);
bc85bfee
LP
264
265 if (new_size <= old_size)
cec736d2
LP
266 return 0;
267
bc85bfee
LP
268 if (f->metrics.max_size > 0 &&
269 new_size > f->metrics.max_size)
270 return -E2BIG;
cec736d2 271
bc85bfee
LP
272 if (new_size > f->metrics.min_size &&
273 f->metrics.keep_free > 0) {
cec736d2
LP
274 struct statvfs svfs;
275
276 if (fstatvfs(f->fd, &svfs) >= 0) {
277 uint64_t available;
278
279 available = svfs.f_bfree * svfs.f_bsize;
280
bc85bfee
LP
281 if (available >= f->metrics.keep_free)
282 available -= f->metrics.keep_free;
cec736d2
LP
283 else
284 available = 0;
285
286 if (new_size - old_size > available)
287 return -E2BIG;
288 }
289 }
290
bc85bfee
LP
291 /* Note that the glibc fallocate() fallback is very
292 inefficient, hence we try to minimize the allocation area
293 as we can. */
fec2aa2f
GV
294 r = posix_fallocate(f->fd, old_size, new_size - old_size);
295 if (r != 0)
296 return -r;
cec736d2
LP
297
298 if (fstat(f->fd, &f->last_stat) < 0)
299 return -errno;
300
23b0b2b2 301 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
302
303 return 0;
304}
305
16e9f408 306static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, uint64_t size, void **ret) {
cec736d2 307 assert(f);
cec736d2
LP
308 assert(ret);
309
2a59ea54 310 /* Avoid SIGBUS on invalid accesses */
4bbdcdb3
LP
311 if (offset + size > (uint64_t) f->last_stat.st_size) {
312 /* Hmm, out of range? Let's refresh the fstat() data
313 * first, before we trust that check. */
314
315 if (fstat(f->fd, &f->last_stat) < 0 ||
316 offset + size > (uint64_t) f->last_stat.st_size)
317 return -EADDRNOTAVAIL;
318 }
319
16e9f408 320 return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
cec736d2
LP
321}
322
323static bool verify_hash(Object *o) {
de190aef 324 uint64_t h1, h2;
cec736d2
LP
325
326 assert(o);
327
807e17f0 328 if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
cec736d2 329 h1 = le64toh(o->data.hash);
de190aef
LP
330 h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
331 } else if (o->object.type == OBJECT_FIELD) {
332 h1 = le64toh(o->field.hash);
333 h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
334 } else
335 return true;
cec736d2 336
de190aef 337 return h1 == h2;
cec736d2
LP
338}
339
16e9f408
LP
340static uint64_t minimum_header_size(Object *o) {
341
342 static uint64_t table[] = {
343 [OBJECT_DATA] = sizeof(DataObject),
344 [OBJECT_FIELD] = sizeof(FieldObject),
345 [OBJECT_ENTRY] = sizeof(EntryObject),
346 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
347 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
348 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
349 [OBJECT_TAG] = sizeof(TagObject),
350 };
351
352 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
353 return sizeof(ObjectHeader);
354
355 return table[o->object.type];
356}
357
de190aef 358int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
359 int r;
360 void *t;
361 Object *o;
362 uint64_t s;
16e9f408 363 unsigned context;
cec736d2
LP
364
365 assert(f);
366 assert(ret);
367
16e9f408
LP
368 /* One context for each type, plus one catch-all for the rest */
369 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
370
371 r = journal_file_move_to(f, context, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
372 if (r < 0)
373 return r;
374
375 o = (Object*) t;
376 s = le64toh(o->object.size);
377
378 if (s < sizeof(ObjectHeader))
379 return -EBADMSG;
380
16e9f408
LP
381 if (o->object.type <= OBJECT_UNUSED)
382 return -EBADMSG;
383
384 if (s < minimum_header_size(o))
385 return -EBADMSG;
386
de190aef 387 if (type >= 0 && o->object.type != type)
cec736d2
LP
388 return -EBADMSG;
389
390 if (s > sizeof(ObjectHeader)) {
de190aef 391 r = journal_file_move_to(f, o->object.type, offset, s, &t);
cec736d2
LP
392 if (r < 0)
393 return r;
394
395 o = (Object*) t;
396 }
397
398 if (!verify_hash(o))
399 return -EBADMSG;
400
401 *ret = o;
402 return 0;
403}
404
d98cc1f2 405static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
406 uint64_t r;
407
408 assert(f);
409
dca6219e 410 r = le64toh(f->header->tail_seqnum) + 1;
c2373f84
LP
411
412 if (seqnum) {
de190aef 413 /* If an external seqnum counter was passed, we update
c2373f84
LP
414 * both the local and the external one, and set it to
415 * the maximum of both */
416
417 if (*seqnum + 1 > r)
418 r = *seqnum + 1;
419
420 *seqnum = r;
421 }
422
dca6219e 423 f->header->tail_seqnum = htole64(r);
cec736d2 424
dca6219e
LP
425 if (f->header->head_seqnum == 0)
426 f->header->head_seqnum = htole64(r);
de190aef 427
cec736d2
LP
428 return r;
429}
430
de190aef 431static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
432 int r;
433 uint64_t p;
434 Object *tail, *o;
435 void *t;
436
437 assert(f);
16e9f408 438 assert(type > 0 && type < _OBJECT_TYPE_MAX);
cec736d2
LP
439 assert(size >= sizeof(ObjectHeader));
440 assert(offset);
441 assert(ret);
442
443 p = le64toh(f->header->tail_object_offset);
cec736d2 444 if (p == 0)
23b0b2b2 445 p = le64toh(f->header->header_size);
cec736d2 446 else {
de190aef 447 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
448 if (r < 0)
449 return r;
450
451 p += ALIGN64(le64toh(tail->object.size));
452 }
453
454 r = journal_file_allocate(f, p, size);
455 if (r < 0)
456 return r;
457
de190aef 458 r = journal_file_move_to(f, type, p, size, &t);
cec736d2
LP
459 if (r < 0)
460 return r;
461
462 o = (Object*) t;
463
464 zero(o->object);
de190aef 465 o->object.type = type;
cec736d2
LP
466 o->object.size = htole64(size);
467
468 f->header->tail_object_offset = htole64(p);
cec736d2
LP
469 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
470
471 *ret = o;
472 *offset = p;
473
474 return 0;
475}
476
de190aef 477static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
478 uint64_t s, p;
479 Object *o;
480 int r;
481
482 assert(f);
483
dfabe643 484 /* We estimate that we need 1 hash table entry per 768 of
4a92baf3
LP
485 journal file and we want to make sure we never get beyond
486 75% fill level. Calculate the hash table size for the
487 maximum file size based on these metrics. */
488
dfabe643 489 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
4a92baf3
LP
490 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
491 s = DEFAULT_DATA_HASH_TABLE_SIZE;
492
dfabe643 493 log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
4a92baf3 494
de190aef
LP
495 r = journal_file_append_object(f,
496 OBJECT_DATA_HASH_TABLE,
497 offsetof(Object, hash_table.items) + s,
498 &o, &p);
cec736d2
LP
499 if (r < 0)
500 return r;
501
de190aef 502 memset(o->hash_table.items, 0, s);
cec736d2 503
de190aef
LP
504 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
505 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
506
507 return 0;
508}
509
de190aef 510static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
511 uint64_t s, p;
512 Object *o;
513 int r;
514
515 assert(f);
516
de190aef
LP
517 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
518 r = journal_file_append_object(f,
519 OBJECT_FIELD_HASH_TABLE,
520 offsetof(Object, hash_table.items) + s,
521 &o, &p);
cec736d2
LP
522 if (r < 0)
523 return r;
524
de190aef 525 memset(o->hash_table.items, 0, s);
cec736d2 526
de190aef
LP
527 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
528 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
529
530 return 0;
531}
532
de190aef 533static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
534 uint64_t s, p;
535 void *t;
536 int r;
537
538 assert(f);
539
de190aef
LP
540 p = le64toh(f->header->data_hash_table_offset);
541 s = le64toh(f->header->data_hash_table_size);
cec736d2 542
de190aef 543 r = journal_file_move_to(f,
16e9f408 544 OBJECT_DATA_HASH_TABLE,
de190aef
LP
545 p, s,
546 &t);
cec736d2
LP
547 if (r < 0)
548 return r;
549
de190aef 550 f->data_hash_table = t;
cec736d2
LP
551 return 0;
552}
553
de190aef 554static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
555 uint64_t s, p;
556 void *t;
557 int r;
558
559 assert(f);
560
de190aef
LP
561 p = le64toh(f->header->field_hash_table_offset);
562 s = le64toh(f->header->field_hash_table_size);
cec736d2 563
de190aef 564 r = journal_file_move_to(f,
16e9f408 565 OBJECT_FIELD_HASH_TABLE,
de190aef
LP
566 p, s,
567 &t);
cec736d2
LP
568 if (r < 0)
569 return r;
570
de190aef 571 f->field_hash_table = t;
cec736d2
LP
572 return 0;
573}
574
de190aef
LP
575static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
576 uint64_t p, h;
cec736d2
LP
577 int r;
578
579 assert(f);
580 assert(o);
581 assert(offset > 0);
de190aef 582 assert(o->object.type == OBJECT_DATA);
cec736d2 583
48496df6
LP
584 /* This might alter the window we are looking at */
585
de190aef
LP
586 o->data.next_hash_offset = o->data.next_field_offset = 0;
587 o->data.entry_offset = o->data.entry_array_offset = 0;
588 o->data.n_entries = 0;
cec736d2 589
de190aef 590 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
8db4213e 591 p = le64toh(f->data_hash_table[h].tail_hash_offset);
cec736d2
LP
592 if (p == 0) {
593 /* Only entry in the hash table is easy */
de190aef 594 f->data_hash_table[h].head_hash_offset = htole64(offset);
cec736d2 595 } else {
48496df6
LP
596 /* Move back to the previous data object, to patch in
597 * pointer */
cec736d2 598
de190aef 599 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
600 if (r < 0)
601 return r;
602
de190aef 603 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
604 }
605
de190aef 606 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2 607
dca6219e
LP
608 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
609 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
610
cec736d2
LP
611 return 0;
612}
613
de190aef
LP
614int journal_file_find_data_object_with_hash(
615 JournalFile *f,
616 const void *data, uint64_t size, uint64_t hash,
617 Object **ret, uint64_t *offset) {
48496df6 618
de190aef 619 uint64_t p, osize, h;
cec736d2
LP
620 int r;
621
622 assert(f);
623 assert(data || size == 0);
624
625 osize = offsetof(Object, data.payload) + size;
626
bc85bfee
LP
627 if (f->header->data_hash_table_size == 0)
628 return -EBADMSG;
629
de190aef
LP
630 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
631 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 632
de190aef
LP
633 while (p > 0) {
634 Object *o;
cec736d2 635
de190aef 636 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
637 if (r < 0)
638 return r;
639
807e17f0 640 if (le64toh(o->data.hash) != hash)
85a131e8 641 goto next;
807e17f0
LP
642
643 if (o->object.flags & OBJECT_COMPRESSED) {
644#ifdef HAVE_XZ
b785c858 645 uint64_t l, rsize;
cec736d2 646
807e17f0
LP
647 l = le64toh(o->object.size);
648 if (l <= offsetof(Object, data.payload))
cec736d2
LP
649 return -EBADMSG;
650
807e17f0
LP
651 l -= offsetof(Object, data.payload);
652
653 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
654 return -EBADMSG;
655
b785c858 656 if (rsize == size &&
807e17f0
LP
657 memcmp(f->compress_buffer, data, size) == 0) {
658
659 if (ret)
660 *ret = o;
661
662 if (offset)
663 *offset = p;
664
665 return 1;
666 }
667#else
668 return -EPROTONOSUPPORT;
669#endif
670
671 } else if (le64toh(o->object.size) == osize &&
672 memcmp(o->data.payload, data, size) == 0) {
673
cec736d2
LP
674 if (ret)
675 *ret = o;
676
677 if (offset)
678 *offset = p;
679
de190aef 680 return 1;
cec736d2
LP
681 }
682
85a131e8 683 next:
cec736d2
LP
684 p = le64toh(o->data.next_hash_offset);
685 }
686
de190aef
LP
687 return 0;
688}
689
690int journal_file_find_data_object(
691 JournalFile *f,
692 const void *data, uint64_t size,
693 Object **ret, uint64_t *offset) {
694
695 uint64_t hash;
696
697 assert(f);
698 assert(data || size == 0);
699
700 hash = hash64(data, size);
701
702 return journal_file_find_data_object_with_hash(f,
703 data, size, hash,
704 ret, offset);
705}
706
48496df6
LP
707static int journal_file_append_data(
708 JournalFile *f,
709 const void *data, uint64_t size,
710 Object **ret, uint64_t *offset) {
711
de190aef
LP
712 uint64_t hash, p;
713 uint64_t osize;
714 Object *o;
715 int r;
807e17f0 716 bool compressed = false;
de190aef
LP
717
718 assert(f);
719 assert(data || size == 0);
720
721 hash = hash64(data, size);
722
723 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
724 if (r < 0)
725 return r;
726 else if (r > 0) {
727
728 if (ret)
729 *ret = o;
730
731 if (offset)
732 *offset = p;
733
734 return 0;
735 }
736
737 osize = offsetof(Object, data.payload) + size;
738 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
739 if (r < 0)
740 return r;
741
cec736d2 742 o->data.hash = htole64(hash);
807e17f0
LP
743
744#ifdef HAVE_XZ
745 if (f->compress &&
746 size >= COMPRESSION_SIZE_THRESHOLD) {
747 uint64_t rsize;
748
749 compressed = compress_blob(data, size, o->data.payload, &rsize);
750
751 if (compressed) {
752 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
753 o->object.flags |= OBJECT_COMPRESSED;
754
807e17f0
LP
755 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
756 }
757 }
758#endif
759
64825d3c 760 if (!compressed && size > 0)
807e17f0 761 memcpy(o->data.payload, data, size);
cec736d2 762
de190aef 763 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
764 if (r < 0)
765 return r;
766
b0af6f41
LP
767 r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
768 if (r < 0)
769 return r;
770
48496df6
LP
771 /* The linking might have altered the window, so let's
772 * refresh our pointer */
773 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
774 if (r < 0)
775 return r;
776
cec736d2
LP
777 if (ret)
778 *ret = o;
779
780 if (offset)
de190aef 781 *offset = p;
cec736d2
LP
782
783 return 0;
784}
785
786uint64_t journal_file_entry_n_items(Object *o) {
787 assert(o);
7be3aa17 788 assert(o->object.type == OBJECT_ENTRY);
cec736d2
LP
789
790 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
791}
792
de190aef
LP
793static uint64_t journal_file_entry_array_n_items(Object *o) {
794 assert(o);
7be3aa17 795 assert(o->object.type == OBJECT_ENTRY_ARRAY);
de190aef
LP
796
797 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
798}
799
800static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
801 le64_t *first,
802 le64_t *idx,
de190aef 803 uint64_t p) {
cec736d2 804 int r;
de190aef
LP
805 uint64_t n = 0, ap = 0, q, i, a, hidx;
806 Object *o;
807
cec736d2 808 assert(f);
de190aef
LP
809 assert(first);
810 assert(idx);
811 assert(p > 0);
cec736d2 812
de190aef
LP
813 a = le64toh(*first);
814 i = hidx = le64toh(*idx);
815 while (a > 0) {
816
817 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
818 if (r < 0)
819 return r;
cec736d2 820
de190aef
LP
821 n = journal_file_entry_array_n_items(o);
822 if (i < n) {
823 o->entry_array.items[i] = htole64(p);
824 *idx = htole64(hidx + 1);
825 return 0;
826 }
cec736d2 827
de190aef
LP
828 i -= n;
829 ap = a;
830 a = le64toh(o->entry_array.next_entry_array_offset);
831 }
832
833 if (hidx > n)
834 n = (hidx+1) * 2;
835 else
836 n = n * 2;
837
838 if (n < 4)
839 n = 4;
840
841 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
842 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
843 &o, &q);
cec736d2
LP
844 if (r < 0)
845 return r;
846
b0af6f41
LP
847 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
848 if (r < 0)
849 return r;
850
de190aef 851 o->entry_array.items[i] = htole64(p);
cec736d2 852
de190aef 853 if (ap == 0)
7be3aa17 854 *first = htole64(q);
cec736d2 855 else {
de190aef 856 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
857 if (r < 0)
858 return r;
859
de190aef
LP
860 o->entry_array.next_entry_array_offset = htole64(q);
861 }
cec736d2 862
de190aef
LP
863 *idx = htole64(hidx + 1);
864
865 return 0;
866}
cec736d2 867
de190aef 868static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
869 le64_t *extra,
870 le64_t *first,
871 le64_t *idx,
de190aef
LP
872 uint64_t p) {
873
874 int r;
875
876 assert(f);
877 assert(extra);
878 assert(first);
879 assert(idx);
880 assert(p > 0);
881
882 if (*idx == 0)
883 *extra = htole64(p);
884 else {
4fd052ae 885 le64_t i;
de190aef 886
7be3aa17 887 i = htole64(le64toh(*idx) - 1);
de190aef
LP
888 r = link_entry_into_array(f, first, &i, p);
889 if (r < 0)
890 return r;
cec736d2
LP
891 }
892
de190aef
LP
893 *idx = htole64(le64toh(*idx) + 1);
894 return 0;
895}
896
897static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
898 uint64_t p;
899 int r;
900 assert(f);
901 assert(o);
902 assert(offset > 0);
903
904 p = le64toh(o->entry.items[i].object_offset);
905 if (p == 0)
906 return -EINVAL;
907
908 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
909 if (r < 0)
910 return r;
911
de190aef
LP
912 return link_entry_into_array_plus_one(f,
913 &o->data.entry_offset,
914 &o->data.entry_array_offset,
915 &o->data.n_entries,
916 offset);
cec736d2
LP
917}
918
919static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 920 uint64_t n, i;
cec736d2
LP
921 int r;
922
923 assert(f);
924 assert(o);
925 assert(offset > 0);
de190aef 926 assert(o->object.type == OBJECT_ENTRY);
cec736d2 927
b788cc23
LP
928 __sync_synchronize();
929
cec736d2 930 /* Link up the entry itself */
de190aef
LP
931 r = link_entry_into_array(f,
932 &f->header->entry_array_offset,
933 &f->header->n_entries,
934 offset);
935 if (r < 0)
936 return r;
cec736d2 937
aaf53376 938 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
cec736d2 939
de190aef 940 if (f->header->head_entry_realtime == 0)
0ac38b70 941 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 942
0ac38b70 943 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
944 f->header->tail_entry_monotonic = o->entry.monotonic;
945
946 f->tail_entry_monotonic_valid = true;
cec736d2
LP
947
948 /* Link up the items */
949 n = journal_file_entry_n_items(o);
950 for (i = 0; i < n; i++) {
951 r = journal_file_link_entry_item(f, o, offset, i);
952 if (r < 0)
953 return r;
954 }
955
cec736d2
LP
956 return 0;
957}
958
959static int journal_file_append_entry_internal(
960 JournalFile *f,
961 const dual_timestamp *ts,
962 uint64_t xor_hash,
963 const EntryItem items[], unsigned n_items,
de190aef 964 uint64_t *seqnum,
cec736d2
LP
965 Object **ret, uint64_t *offset) {
966 uint64_t np;
967 uint64_t osize;
968 Object *o;
969 int r;
970
971 assert(f);
972 assert(items || n_items == 0);
de190aef 973 assert(ts);
cec736d2
LP
974
975 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
976
de190aef 977 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
978 if (r < 0)
979 return r;
980
d98cc1f2 981 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
cec736d2 982 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
983 o->entry.realtime = htole64(ts->realtime);
984 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
985 o->entry.xor_hash = htole64(xor_hash);
986 o->entry.boot_id = f->header->boot_id;
987
b0af6f41
LP
988 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
989 if (r < 0)
990 return r;
991
cec736d2
LP
992 r = journal_file_link_entry(f, o, np);
993 if (r < 0)
994 return r;
995
996 if (ret)
997 *ret = o;
998
999 if (offset)
1000 *offset = np;
1001
1002 return 0;
1003}
1004
cf244689 1005void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
1006 assert(f);
1007
1008 /* inotify() does not receive IN_MODIFY events from file
1009 * accesses done via mmap(). After each access we hence
1010 * trigger IN_MODIFY by truncating the journal file to its
1011 * current size which triggers IN_MODIFY. */
1012
bc85bfee
LP
1013 __sync_synchronize();
1014
50f20cfd
LP
1015 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1016 log_error("Failed to to truncate file to its own size: %m");
1017}
1018
de190aef 1019int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1020 unsigned i;
1021 EntryItem *items;
1022 int r;
1023 uint64_t xor_hash = 0;
de190aef 1024 struct dual_timestamp _ts;
cec736d2
LP
1025
1026 assert(f);
1027 assert(iovec || n_iovec == 0);
1028
de190aef
LP
1029 if (!f->writable)
1030 return -EPERM;
1031
1032 if (!ts) {
1033 dual_timestamp_get(&_ts);
1034 ts = &_ts;
1035 }
1036
1037 if (f->tail_entry_monotonic_valid &&
1038 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1039 return -EINVAL;
1040
7560fffc
LP
1041 r = journal_file_maybe_append_tag(f, ts->realtime);
1042 if (r < 0)
1043 return r;
1044
64825d3c
LP
1045 /* alloca() can't take 0, hence let's allocate at least one */
1046 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
cec736d2
LP
1047
1048 for (i = 0; i < n_iovec; i++) {
1049 uint64_t p;
1050 Object *o;
1051
1052 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1053 if (r < 0)
cf244689 1054 return r;
cec736d2
LP
1055
1056 xor_hash ^= le64toh(o->data.hash);
1057 items[i].object_offset = htole64(p);
de7b95cd 1058 items[i].hash = o->data.hash;
cec736d2
LP
1059 }
1060
de190aef 1061 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1062
50f20cfd
LP
1063 journal_file_post_change(f);
1064
cec736d2
LP
1065 return r;
1066}
1067
de190aef
LP
1068static int generic_array_get(JournalFile *f,
1069 uint64_t first,
1070 uint64_t i,
1071 Object **ret, uint64_t *offset) {
1072
cec736d2 1073 Object *o;
6c8a39b8 1074 uint64_t p = 0, a;
cec736d2
LP
1075 int r;
1076
1077 assert(f);
1078
de190aef
LP
1079 a = first;
1080 while (a > 0) {
1081 uint64_t n;
cec736d2 1082
de190aef
LP
1083 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1084 if (r < 0)
1085 return r;
cec736d2 1086
de190aef
LP
1087 n = journal_file_entry_array_n_items(o);
1088 if (i < n) {
1089 p = le64toh(o->entry_array.items[i]);
1090 break;
cec736d2
LP
1091 }
1092
de190aef
LP
1093 i -= n;
1094 a = le64toh(o->entry_array.next_entry_array_offset);
1095 }
1096
1097 if (a <= 0 || p <= 0)
1098 return 0;
1099
1100 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1101 if (r < 0)
1102 return r;
1103
1104 if (ret)
1105 *ret = o;
1106
1107 if (offset)
1108 *offset = p;
1109
1110 return 1;
1111}
1112
1113static int generic_array_get_plus_one(JournalFile *f,
1114 uint64_t extra,
1115 uint64_t first,
1116 uint64_t i,
1117 Object **ret, uint64_t *offset) {
1118
1119 Object *o;
1120
1121 assert(f);
1122
1123 if (i == 0) {
1124 int r;
1125
1126 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1127 if (r < 0)
1128 return r;
1129
de190aef
LP
1130 if (ret)
1131 *ret = o;
cec736d2 1132
de190aef
LP
1133 if (offset)
1134 *offset = extra;
cec736d2 1135
de190aef 1136 return 1;
cec736d2
LP
1137 }
1138
de190aef
LP
1139 return generic_array_get(f, first, i-1, ret, offset);
1140}
cec736d2 1141
de190aef
LP
1142enum {
1143 TEST_FOUND,
1144 TEST_LEFT,
1145 TEST_RIGHT
1146};
cec736d2 1147
de190aef
LP
1148static int generic_array_bisect(JournalFile *f,
1149 uint64_t first,
1150 uint64_t n,
1151 uint64_t needle,
1152 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1153 direction_t direction,
1154 Object **ret,
1155 uint64_t *offset,
1156 uint64_t *idx) {
1157
1158 uint64_t a, p, t = 0, i = 0, last_p = 0;
1159 bool subtract_one = false;
1160 Object *o, *array = NULL;
1161 int r;
cec736d2 1162
de190aef
LP
1163 assert(f);
1164 assert(test_object);
cec736d2 1165
de190aef
LP
1166 a = first;
1167 while (a > 0) {
1168 uint64_t left, right, k, lp;
1169
1170 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1171 if (r < 0)
1172 return r;
1173
de190aef
LP
1174 k = journal_file_entry_array_n_items(array);
1175 right = MIN(k, n);
1176 if (right <= 0)
1177 return 0;
cec736d2 1178
de190aef
LP
1179 i = right - 1;
1180 lp = p = le64toh(array->entry_array.items[i]);
1181 if (p <= 0)
1182 return -EBADMSG;
cec736d2 1183
de190aef
LP
1184 r = test_object(f, p, needle);
1185 if (r < 0)
1186 return r;
cec736d2 1187
de190aef
LP
1188 if (r == TEST_FOUND)
1189 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1190
1191 if (r == TEST_RIGHT) {
1192 left = 0;
1193 right -= 1;
1194 for (;;) {
1195 if (left == right) {
1196 if (direction == DIRECTION_UP)
1197 subtract_one = true;
1198
1199 i = left;
1200 goto found;
1201 }
1202
1203 assert(left < right);
1204
1205 i = (left + right) / 2;
1206 p = le64toh(array->entry_array.items[i]);
1207 if (p <= 0)
1208 return -EBADMSG;
1209
1210 r = test_object(f, p, needle);
1211 if (r < 0)
1212 return r;
cec736d2 1213
de190aef
LP
1214 if (r == TEST_FOUND)
1215 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1216
1217 if (r == TEST_RIGHT)
1218 right = i;
1219 else
1220 left = i + 1;
1221 }
1222 }
1223
cbdca852
LP
1224 if (k > n) {
1225 if (direction == DIRECTION_UP) {
1226 i = n;
1227 subtract_one = true;
1228 goto found;
1229 }
1230
cec736d2 1231 return 0;
cbdca852 1232 }
cec736d2 1233
de190aef
LP
1234 last_p = lp;
1235
1236 n -= k;
1237 t += k;
1238 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1239 }
1240
1241 return 0;
de190aef
LP
1242
1243found:
1244 if (subtract_one && t == 0 && i == 0)
1245 return 0;
1246
1247 if (subtract_one && i == 0)
1248 p = last_p;
1249 else if (subtract_one)
1250 p = le64toh(array->entry_array.items[i-1]);
1251 else
1252 p = le64toh(array->entry_array.items[i]);
1253
1254 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1255 if (r < 0)
1256 return r;
1257
1258 if (ret)
1259 *ret = o;
1260
1261 if (offset)
1262 *offset = p;
1263
1264 if (idx)
cbdca852 1265 *idx = t + i + (subtract_one ? -1 : 0);
de190aef
LP
1266
1267 return 1;
cec736d2
LP
1268}
1269
de190aef
LP
1270static int generic_array_bisect_plus_one(JournalFile *f,
1271 uint64_t extra,
1272 uint64_t first,
1273 uint64_t n,
1274 uint64_t needle,
1275 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1276 direction_t direction,
1277 Object **ret,
1278 uint64_t *offset,
1279 uint64_t *idx) {
1280
cec736d2 1281 int r;
cbdca852
LP
1282 bool step_back = false;
1283 Object *o;
cec736d2
LP
1284
1285 assert(f);
de190aef 1286 assert(test_object);
cec736d2 1287
de190aef
LP
1288 if (n <= 0)
1289 return 0;
cec736d2 1290
de190aef
LP
1291 /* This bisects the array in object 'first', but first checks
1292 * an extra */
de190aef
LP
1293 r = test_object(f, extra, needle);
1294 if (r < 0)
1295 return r;
a536e261
LP
1296
1297 if (r == TEST_FOUND)
1298 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1299
cbdca852
LP
1300 /* if we are looking with DIRECTION_UP then we need to first
1301 see if in the actual array there is a matching entry, and
1302 return the last one of that. But if there isn't any we need
1303 to return this one. Hence remember this, and return it
1304 below. */
1305 if (r == TEST_LEFT)
1306 step_back = direction == DIRECTION_UP;
de190aef 1307
cbdca852
LP
1308 if (r == TEST_RIGHT) {
1309 if (direction == DIRECTION_DOWN)
1310 goto found;
1311 else
1312 return 0;
a536e261 1313 }
cec736d2 1314
de190aef
LP
1315 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1316
cbdca852
LP
1317 if (r == 0 && step_back)
1318 goto found;
1319
ecf68b1d 1320 if (r > 0 && idx)
de190aef
LP
1321 (*idx) ++;
1322
1323 return r;
cbdca852
LP
1324
1325found:
1326 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1327 if (r < 0)
1328 return r;
1329
1330 if (ret)
1331 *ret = o;
1332
1333 if (offset)
1334 *offset = extra;
1335
1336 if (idx)
1337 *idx = 0;
1338
1339 return 1;
1340}
1341
1342static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1343 assert(f);
1344 assert(p > 0);
1345
1346 if (p == needle)
1347 return TEST_FOUND;
1348 else if (p < needle)
1349 return TEST_LEFT;
1350 else
1351 return TEST_RIGHT;
1352}
1353
1354int journal_file_move_to_entry_by_offset(
1355 JournalFile *f,
1356 uint64_t p,
1357 direction_t direction,
1358 Object **ret,
1359 uint64_t *offset) {
1360
1361 return generic_array_bisect(f,
1362 le64toh(f->header->entry_array_offset),
1363 le64toh(f->header->n_entries),
1364 p,
1365 test_object_offset,
1366 direction,
1367 ret, offset, NULL);
de190aef
LP
1368}
1369
cbdca852 1370
de190aef
LP
1371static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1372 Object *o;
1373 int r;
1374
1375 assert(f);
1376 assert(p > 0);
1377
1378 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1379 if (r < 0)
1380 return r;
1381
de190aef
LP
1382 if (le64toh(o->entry.seqnum) == needle)
1383 return TEST_FOUND;
1384 else if (le64toh(o->entry.seqnum) < needle)
1385 return TEST_LEFT;
1386 else
1387 return TEST_RIGHT;
1388}
cec736d2 1389
de190aef
LP
1390int journal_file_move_to_entry_by_seqnum(
1391 JournalFile *f,
1392 uint64_t seqnum,
1393 direction_t direction,
1394 Object **ret,
1395 uint64_t *offset) {
1396
1397 return generic_array_bisect(f,
1398 le64toh(f->header->entry_array_offset),
1399 le64toh(f->header->n_entries),
1400 seqnum,
1401 test_object_seqnum,
1402 direction,
1403 ret, offset, NULL);
1404}
cec736d2 1405
de190aef
LP
1406static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1407 Object *o;
1408 int r;
1409
1410 assert(f);
1411 assert(p > 0);
1412
1413 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1414 if (r < 0)
1415 return r;
1416
1417 if (le64toh(o->entry.realtime) == needle)
1418 return TEST_FOUND;
1419 else if (le64toh(o->entry.realtime) < needle)
1420 return TEST_LEFT;
1421 else
1422 return TEST_RIGHT;
cec736d2
LP
1423}
1424
de190aef
LP
1425int journal_file_move_to_entry_by_realtime(
1426 JournalFile *f,
1427 uint64_t realtime,
1428 direction_t direction,
1429 Object **ret,
1430 uint64_t *offset) {
1431
1432 return generic_array_bisect(f,
1433 le64toh(f->header->entry_array_offset),
1434 le64toh(f->header->n_entries),
1435 realtime,
1436 test_object_realtime,
1437 direction,
1438 ret, offset, NULL);
1439}
1440
1441static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1442 Object *o;
1443 int r;
1444
1445 assert(f);
1446 assert(p > 0);
1447
1448 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1449 if (r < 0)
1450 return r;
1451
1452 if (le64toh(o->entry.monotonic) == needle)
1453 return TEST_FOUND;
1454 else if (le64toh(o->entry.monotonic) < needle)
1455 return TEST_LEFT;
1456 else
1457 return TEST_RIGHT;
1458}
1459
1460int journal_file_move_to_entry_by_monotonic(
1461 JournalFile *f,
1462 sd_id128_t boot_id,
1463 uint64_t monotonic,
1464 direction_t direction,
1465 Object **ret,
1466 uint64_t *offset) {
1467
10b6f904 1468 char t[9+32+1] = "_BOOT_ID=";
de190aef
LP
1469 Object *o;
1470 int r;
1471
cbdca852 1472 assert(f);
de190aef 1473
cbdca852 1474 sd_id128_to_string(boot_id, t + 9);
de190aef
LP
1475 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1476 if (r < 0)
1477 return r;
cbdca852 1478 if (r == 0)
de190aef
LP
1479 return -ENOENT;
1480
1481 return generic_array_bisect_plus_one(f,
1482 le64toh(o->data.entry_offset),
1483 le64toh(o->data.entry_array_offset),
1484 le64toh(o->data.n_entries),
1485 monotonic,
1486 test_object_monotonic,
1487 direction,
1488 ret, offset, NULL);
1489}
1490
de190aef
LP
1491int journal_file_next_entry(
1492 JournalFile *f,
1493 Object *o, uint64_t p,
1494 direction_t direction,
1495 Object **ret, uint64_t *offset) {
1496
1497 uint64_t i, n;
cec736d2
LP
1498 int r;
1499
1500 assert(f);
de190aef
LP
1501 assert(p > 0 || !o);
1502
1503 n = le64toh(f->header->n_entries);
1504 if (n <= 0)
1505 return 0;
cec736d2
LP
1506
1507 if (!o)
de190aef 1508 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1509 else {
de190aef 1510 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1511 return -EINVAL;
1512
de190aef
LP
1513 r = generic_array_bisect(f,
1514 le64toh(f->header->entry_array_offset),
1515 le64toh(f->header->n_entries),
1516 p,
1517 test_object_offset,
1518 DIRECTION_DOWN,
1519 NULL, NULL,
1520 &i);
1521 if (r <= 0)
1522 return r;
1523
1524 if (direction == DIRECTION_DOWN) {
1525 if (i >= n - 1)
1526 return 0;
1527
1528 i++;
1529 } else {
1530 if (i <= 0)
1531 return 0;
1532
1533 i--;
1534 }
cec736d2
LP
1535 }
1536
de190aef
LP
1537 /* And jump to it */
1538 return generic_array_get(f,
1539 le64toh(f->header->entry_array_offset),
1540 i,
1541 ret, offset);
1542}
cec736d2 1543
de190aef
LP
1544int journal_file_skip_entry(
1545 JournalFile *f,
1546 Object *o, uint64_t p,
1547 int64_t skip,
1548 Object **ret, uint64_t *offset) {
1549
1550 uint64_t i, n;
1551 int r;
1552
1553 assert(f);
1554 assert(o);
1555 assert(p > 0);
1556
1557 if (o->object.type != OBJECT_ENTRY)
1558 return -EINVAL;
1559
1560 r = generic_array_bisect(f,
1561 le64toh(f->header->entry_array_offset),
1562 le64toh(f->header->n_entries),
1563 p,
1564 test_object_offset,
1565 DIRECTION_DOWN,
1566 NULL, NULL,
1567 &i);
1568 if (r <= 0)
cec736d2
LP
1569 return r;
1570
de190aef
LP
1571 /* Calculate new index */
1572 if (skip < 0) {
1573 if ((uint64_t) -skip >= i)
1574 i = 0;
1575 else
1576 i = i - (uint64_t) -skip;
1577 } else
1578 i += (uint64_t) skip;
cec736d2 1579
de190aef
LP
1580 n = le64toh(f->header->n_entries);
1581 if (n <= 0)
1582 return -EBADMSG;
cec736d2 1583
de190aef
LP
1584 if (i >= n)
1585 i = n-1;
1586
1587 return generic_array_get(f,
1588 le64toh(f->header->entry_array_offset),
1589 i,
1590 ret, offset);
cec736d2
LP
1591}
1592
de190aef
LP
1593int journal_file_next_entry_for_data(
1594 JournalFile *f,
1595 Object *o, uint64_t p,
1596 uint64_t data_offset,
1597 direction_t direction,
1598 Object **ret, uint64_t *offset) {
1599
1600 uint64_t n, i;
cec736d2 1601 int r;
de190aef 1602 Object *d;
cec736d2
LP
1603
1604 assert(f);
de190aef 1605 assert(p > 0 || !o);
cec736d2 1606
de190aef 1607 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 1608 if (r < 0)
de190aef 1609 return r;
cec736d2 1610
de190aef
LP
1611 n = le64toh(d->data.n_entries);
1612 if (n <= 0)
1613 return n;
cec736d2 1614
de190aef
LP
1615 if (!o)
1616 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1617 else {
1618 if (o->object.type != OBJECT_ENTRY)
1619 return -EINVAL;
cec736d2 1620
de190aef
LP
1621 r = generic_array_bisect_plus_one(f,
1622 le64toh(d->data.entry_offset),
1623 le64toh(d->data.entry_array_offset),
1624 le64toh(d->data.n_entries),
1625 p,
1626 test_object_offset,
1627 DIRECTION_DOWN,
1628 NULL, NULL,
1629 &i);
1630
1631 if (r <= 0)
cec736d2
LP
1632 return r;
1633
de190aef
LP
1634 if (direction == DIRECTION_DOWN) {
1635 if (i >= n - 1)
1636 return 0;
cec736d2 1637
de190aef
LP
1638 i++;
1639 } else {
1640 if (i <= 0)
1641 return 0;
cec736d2 1642
de190aef
LP
1643 i--;
1644 }
cec736d2 1645
de190aef 1646 }
cec736d2 1647
de190aef
LP
1648 return generic_array_get_plus_one(f,
1649 le64toh(d->data.entry_offset),
1650 le64toh(d->data.entry_array_offset),
1651 i,
1652 ret, offset);
1653}
cec736d2 1654
cbdca852
LP
1655int journal_file_move_to_entry_by_offset_for_data(
1656 JournalFile *f,
1657 uint64_t data_offset,
1658 uint64_t p,
1659 direction_t direction,
1660 Object **ret, uint64_t *offset) {
1661
1662 int r;
1663 Object *d;
1664
1665 assert(f);
1666
1667 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1668 if (r < 0)
1669 return r;
1670
1671 return generic_array_bisect_plus_one(f,
1672 le64toh(d->data.entry_offset),
1673 le64toh(d->data.entry_array_offset),
1674 le64toh(d->data.n_entries),
1675 p,
1676 test_object_offset,
1677 direction,
1678 ret, offset, NULL);
1679}
1680
1681int journal_file_move_to_entry_by_monotonic_for_data(
1682 JournalFile *f,
1683 uint64_t data_offset,
1684 sd_id128_t boot_id,
1685 uint64_t monotonic,
1686 direction_t direction,
1687 Object **ret, uint64_t *offset) {
1688
1689 char t[9+32+1] = "_BOOT_ID=";
1690 Object *o, *d;
1691 int r;
1692 uint64_t b, z;
1693
1694 assert(f);
1695
1696 /* First, seek by time */
1697 sd_id128_to_string(boot_id, t + 9);
1698 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1699 if (r < 0)
1700 return r;
1701 if (r == 0)
1702 return -ENOENT;
1703
1704 r = generic_array_bisect_plus_one(f,
1705 le64toh(o->data.entry_offset),
1706 le64toh(o->data.entry_array_offset),
1707 le64toh(o->data.n_entries),
1708 monotonic,
1709 test_object_monotonic,
1710 direction,
1711 NULL, &z, NULL);
1712 if (r <= 0)
1713 return r;
1714
1715 /* And now, continue seeking until we find an entry that
1716 * exists in both bisection arrays */
1717
1718 for (;;) {
1719 Object *qo;
1720 uint64_t p, q;
1721
1722 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1723 if (r < 0)
1724 return r;
1725
1726 r = generic_array_bisect_plus_one(f,
1727 le64toh(d->data.entry_offset),
1728 le64toh(d->data.entry_array_offset),
1729 le64toh(d->data.n_entries),
1730 z,
1731 test_object_offset,
1732 direction,
1733 NULL, &p, NULL);
1734 if (r <= 0)
1735 return r;
1736
1737 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1738 if (r < 0)
1739 return r;
1740
1741 r = generic_array_bisect_plus_one(f,
1742 le64toh(o->data.entry_offset),
1743 le64toh(o->data.entry_array_offset),
1744 le64toh(o->data.n_entries),
1745 p,
1746 test_object_offset,
1747 direction,
1748 &qo, &q, NULL);
1749
1750 if (r <= 0)
1751 return r;
1752
1753 if (p == q) {
1754 if (ret)
1755 *ret = qo;
1756 if (offset)
1757 *offset = q;
1758
1759 return 1;
1760 }
1761
1762 z = q;
1763 }
1764
1765 return 0;
1766}
1767
de190aef
LP
1768int journal_file_move_to_entry_by_seqnum_for_data(
1769 JournalFile *f,
1770 uint64_t data_offset,
1771 uint64_t seqnum,
1772 direction_t direction,
1773 Object **ret, uint64_t *offset) {
cec736d2 1774
de190aef
LP
1775 Object *d;
1776 int r;
cec736d2 1777
91a31dde
LP
1778 assert(f);
1779
de190aef 1780 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1781 if (r < 0)
de190aef 1782 return r;
cec736d2 1783
de190aef
LP
1784 return generic_array_bisect_plus_one(f,
1785 le64toh(d->data.entry_offset),
1786 le64toh(d->data.entry_array_offset),
1787 le64toh(d->data.n_entries),
1788 seqnum,
1789 test_object_seqnum,
1790 direction,
1791 ret, offset, NULL);
1792}
cec736d2 1793
de190aef
LP
1794int journal_file_move_to_entry_by_realtime_for_data(
1795 JournalFile *f,
1796 uint64_t data_offset,
1797 uint64_t realtime,
1798 direction_t direction,
1799 Object **ret, uint64_t *offset) {
1800
1801 Object *d;
1802 int r;
1803
91a31dde
LP
1804 assert(f);
1805
de190aef 1806 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1807 if (r < 0)
de190aef
LP
1808 return r;
1809
1810 return generic_array_bisect_plus_one(f,
1811 le64toh(d->data.entry_offset),
1812 le64toh(d->data.entry_array_offset),
1813 le64toh(d->data.n_entries),
1814 realtime,
1815 test_object_realtime,
1816 direction,
1817 ret, offset, NULL);
cec736d2
LP
1818}
1819
7560fffc
LP
1820static void *fsprg_state(JournalFile *f) {
1821 uint64_t a, b;
1822 assert(f);
1823
1824 if (!f->authenticate)
1825 return NULL;
1826
1827 a = le64toh(f->fsprg_header->header_size);
1828 b = le64toh(f->fsprg_header->state_size);
1829
1830 if (a + b > f->fsprg_size)
1831 return NULL;
1832
1833 return (uint8_t*) f->fsprg_header + a;
1834}
1835
d98cc1f2
LP
1836static uint64_t journal_file_tag_seqnum(JournalFile *f) {
1837 uint64_t r;
1838
1839 assert(f);
1840
1841 r = le64toh(f->header->n_tags) + 1;
1842 f->header->n_tags = htole64(r);
1843
1844 return r;
1845}
1846
b0af6f41 1847int journal_file_append_tag(JournalFile *f) {
7560fffc
LP
1848 Object *o;
1849 uint64_t p;
1850 int r;
1851
1852 assert(f);
1853
1854 if (!f->authenticate)
1855 return 0;
1856
1857 if (!f->hmac_running)
1858 return 0;
1859
1860 log_debug("Writing tag for epoch %llu\n", (unsigned long long) FSPRG_GetEpoch(fsprg_state(f)));
1861
1862 assert(f->hmac);
1863
1864 r = journal_file_append_object(f, OBJECT_TAG, sizeof(struct TagObject), &o, &p);
1865 if (r < 0)
1866 return r;
1867
d98cc1f2
LP
1868 o->tag.seqnum = htole64(journal_file_tag_seqnum(f));
1869
1870 /* Add the tag object itself, so that we can protect its
1871 * header. This will exclude the actual hash value in it */
1872 r = journal_file_hmac_put_object(f, OBJECT_TAG, p);
1873 if (r < 0)
1874 return r;
1875
7560fffc
LP
1876 /* Get the HMAC tag and store it in the object */
1877 memcpy(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH);
1878 f->hmac_running = false;
1879
1880 return 0;
1881}
1882
1883static int journal_file_hmac_start(JournalFile *f) {
1884 uint8_t key[256 / 8]; /* Let's pass 256 bit from FSPRG to HMAC */
1885
1886 assert(f);
1887
1888 if (!f->authenticate)
1889 return 0;
1890
1891 if (f->hmac_running)
1892 return 0;
1893
1894 /* Prepare HMAC for next cycle */
1895 gcry_md_reset(f->hmac);
1896 FSPRG_GetKey(fsprg_state(f), key, sizeof(key), 0);
1897 gcry_md_setkey(f->hmac, key, sizeof(key));
1898
1899 f->hmac_running = true;
1900
1901 return 0;
1902}
1903
1904static int journal_file_get_epoch(JournalFile *f, uint64_t realtime, uint64_t *epoch) {
1905 uint64_t t;
1906
1907 assert(f);
1908 assert(epoch);
1909 assert(f->authenticate);
1910
1911 if (le64toh(f->fsprg_header->fsprg_start_usec) == 0 ||
1912 le64toh(f->fsprg_header->fsprg_interval_usec) == 0)
1913 return -ENOTSUP;
1914
1915 if (realtime < le64toh(f->fsprg_header->fsprg_start_usec))
1916 return -ESTALE;
1917
1918 t = realtime - le64toh(f->fsprg_header->fsprg_start_usec);
1919 t = t / le64toh(f->fsprg_header->fsprg_interval_usec);
1920
1921 *epoch = t;
1922 return 0;
1923}
1924
1925static int journal_file_need_evolve(JournalFile *f, uint64_t realtime) {
1926 uint64_t goal, epoch;
1927 int r;
1928 assert(f);
1929
1930 if (!f->authenticate)
1931 return 0;
1932
1933 r = journal_file_get_epoch(f, realtime, &goal);
1934 if (r < 0)
1935 return r;
1936
1937 epoch = FSPRG_GetEpoch(fsprg_state(f));
1938 if (epoch > goal)
1939 return -ESTALE;
1940
1941 return epoch != goal;
1942}
1943
1944static int journal_file_evolve(JournalFile *f, uint64_t realtime) {
1945 uint64_t goal, epoch;
1946 int r;
1947
1948 assert(f);
1949
1950 if (!f->authenticate)
1951 return 0;
1952
1953 r = journal_file_get_epoch(f, realtime, &goal);
1954 if (r < 0)
1955 return r;
1956
1957 epoch = FSPRG_GetEpoch(fsprg_state(f));
1958 if (epoch < goal)
1959 log_debug("Evolving FSPRG key from epoch %llu to %llu.", (unsigned long long) epoch, (unsigned long long) goal);
1960
1961 for (;;) {
1962 if (epoch > goal)
1963 return -ESTALE;
1964 if (epoch == goal)
1965 return 0;
1966
1967 FSPRG_Evolve(fsprg_state(f));
1968 epoch = FSPRG_GetEpoch(fsprg_state(f));
1969 }
1970}
1971
1972static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime) {
1973 int r;
1974
1975 assert(f);
1976
1977 if (!f->authenticate)
1978 return 0;
1979
1980 r = journal_file_need_evolve(f, realtime);
1981 if (r <= 0)
1982 return 0;
1983
1984 r = journal_file_append_tag(f);
1985 if (r < 0)
1986 return r;
1987
1988 r = journal_file_evolve(f, realtime);
1989 if (r < 0)
1990 return r;
1991
1992 r = journal_file_hmac_start(f);
1993 if (r < 0)
1994 return r;
1995
1996 return 0;
1997}
1998
1999static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p) {
2000 int r;
2001 Object *o;
2002
2003 assert(f);
2004
2005 if (!f->authenticate)
2006 return 0;
2007
2008 r = journal_file_hmac_start(f);
2009 if (r < 0)
2010 return r;
2011
2012 r = journal_file_move_to_object(f, type, p, &o);
2013 if (r < 0)
2014 return r;
2015
2016 gcry_md_write(f->hmac, o, offsetof(ObjectHeader, payload));
2017
2018 switch (o->object.type) {
2019
2020 case OBJECT_DATA:
d98cc1f2
LP
2021 /* All but: hash and payload are mutable */
2022 gcry_md_write(f->hmac, &o->data.hash, sizeof(o->data.hash));
7560fffc
LP
2023 gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(DataObject, payload));
2024 break;
2025
2026 case OBJECT_ENTRY:
2027 /* All */
2028 gcry_md_write(f->hmac, &o->entry.seqnum, le64toh(o->object.size) - offsetof(EntryObject, seqnum));
2029 break;
2030
2031 case OBJECT_FIELD_HASH_TABLE:
2032 case OBJECT_DATA_HASH_TABLE:
2033 case OBJECT_ENTRY_ARRAY:
2034 /* Nothing: everything is mutable */
2035 break;
2036
2037 case OBJECT_TAG:
d98cc1f2
LP
2038 /* All but the tag itself */
2039 gcry_md_write(f->hmac, &o->tag.seqnum, sizeof(o->tag.seqnum));
7560fffc 2040 break;
7560fffc
LP
2041 default:
2042 return -EINVAL;
2043 }
2044
2045 return 0;
2046}
2047
2048static int journal_file_hmac_put_header(JournalFile *f) {
2049 int r;
2050
2051 assert(f);
2052
2053 if (!f->authenticate)
2054 return 0;
2055
2056 r = journal_file_hmac_start(f);
2057 if (r < 0)
2058 return r;
2059
2060 /* All but state+reserved, boot_id, arena_size,
2061 * tail_object_offset, n_objects, n_entries, tail_seqnum,
2062 * head_entry_realtime, tail_entry_realtime,
2063 * tail_entry_monotonic, n_data, n_fields, header_tag */
2064
2065 gcry_md_write(f->hmac, f->header->signature, offsetof(Header, state) - offsetof(Header, signature));
2066 gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, boot_id) - offsetof(Header, file_id));
2067 gcry_md_write(f->hmac, &f->header->seqnum_id, offsetof(Header, arena_size) - offsetof(Header, seqnum_id));
2068 gcry_md_write(f->hmac, &f->header->data_hash_table_offset, offsetof(Header, tail_object_offset) - offsetof(Header, data_hash_table_offset));
2069 gcry_md_write(f->hmac, &f->header->head_seqnum, offsetof(Header, head_entry_realtime) - offsetof(Header, head_seqnum));
2070
2071 return 0;
2072}
2073
2074static int journal_file_load_fsprg(JournalFile *f) {
2075 int r, fd = -1;
2076 char *p = NULL;
2077 struct stat st;
2078 FSPRGHeader *m = NULL;
2079 sd_id128_t machine;
2080
2081 assert(f);
2082
2083 if (!f->authenticate)
2084 return 0;
2085
2086 r = sd_id128_get_machine(&machine);
2087 if (r < 0)
2088 return r;
2089
2090 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/fsprg",
2091 SD_ID128_FORMAT_VAL(machine)) < 0)
2092 return -ENOMEM;
2093
2094 fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY, 0600);
2095 if (fd < 0) {
2096 log_error("Failed to open %s: %m", p);
2097 r = -errno;
2098 goto finish;
2099 }
2100
2101 if (fstat(fd, &st) < 0) {
2102 r = -errno;
2103 goto finish;
2104 }
2105
2106 if (st.st_size < (off_t) sizeof(FSPRGHeader)) {
2107 r = -ENODATA;
2108 goto finish;
2109 }
2110
2111 m = mmap(NULL, PAGE_ALIGN(sizeof(FSPRGHeader)), PROT_READ, MAP_SHARED, fd, 0);
2112 if (m == MAP_FAILED) {
2113 m = NULL;
2114 r = -errno;
2115 goto finish;
2116 }
2117
2118 if (memcmp(m->signature, FSPRG_HEADER_SIGNATURE, 8) != 0) {
2119 r = -EBADMSG;
2120 goto finish;
2121 }
2122
2123 if (m->incompatible_flags != 0) {
2124 r = -EPROTONOSUPPORT;
2125 goto finish;
2126 }
2127
2128 if (le64toh(m->header_size) < sizeof(FSPRGHeader)) {
2129 r = -EBADMSG;
2130 goto finish;
2131 }
2132
2133 if (le64toh(m->state_size) != FSPRG_stateinbytes(m->secpar)) {
2134 r = -EBADMSG;
2135 goto finish;
2136 }
2137
2138 f->fsprg_size = le64toh(m->header_size) + le64toh(m->state_size);
2139 if ((uint64_t) st.st_size < f->fsprg_size) {
2140 r = -ENODATA;
2141 goto finish;
2142 }
2143
2144 if (!sd_id128_equal(machine, m->machine_id)) {
2145 r = -EHOSTDOWN;
2146 goto finish;
2147 }
2148
2149 if (le64toh(m->fsprg_start_usec) <= 0 ||
2150 le64toh(m->fsprg_interval_usec) <= 0) {
2151 r = -EBADMSG;
2152 goto finish;
2153 }
2154
2155 f->fsprg_header = mmap(NULL, PAGE_ALIGN(f->fsprg_size), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2156 if (f->fsprg_header == MAP_FAILED) {
2157 f->fsprg_header = NULL;
2158 r = -errno;
2159 goto finish;
2160 }
2161
2162 r = 0;
2163
2164finish:
2165 if (m)
2166 munmap(m, PAGE_ALIGN(sizeof(FSPRGHeader)));
2167
2168 if (fd >= 0)
2169 close_nointr_nofail(fd);
2170
2171 free(p);
2172 return r;
2173}
2174
2175static int journal_file_setup_hmac(JournalFile *f) {
2176 gcry_error_t e;
2177
2178 if (!f->authenticate)
2179 return 0;
2180
2181 e = gcry_md_open(&f->hmac, GCRY_MD_SHA256, GCRY_MD_FLAG_HMAC);
2182 if (e != 0)
2183 return -ENOTSUP;
2184
2185 return 0;
2186}
2187
2188static int journal_file_append_first_tag(JournalFile *f) {
2189 int r;
2190 uint64_t p;
2191
2192 if (!f->authenticate)
2193 return 0;
2194
2195 log_debug("Calculating first tag...");
2196
2197 r = journal_file_hmac_put_header(f);
2198 if (r < 0)
2199 return r;
2200
2201 p = le64toh(f->header->field_hash_table_offset);
2202 if (p < offsetof(Object, hash_table.items))
2203 return -EINVAL;
2204 p -= offsetof(Object, hash_table.items);
2205
2206 r = journal_file_hmac_put_object(f, OBJECT_FIELD_HASH_TABLE, p);
2207 if (r < 0)
2208 return r;
2209
2210 p = le64toh(f->header->data_hash_table_offset);
2211 if (p < offsetof(Object, hash_table.items))
2212 return -EINVAL;
2213 p -= offsetof(Object, hash_table.items);
2214
2215 r = journal_file_hmac_put_object(f, OBJECT_DATA_HASH_TABLE, p);
2216 if (r < 0)
2217 return r;
2218
2219 r = journal_file_append_tag(f);
2220 if (r < 0)
2221 return r;
2222
2223 return 0;
2224}
2225
cec736d2 2226void journal_file_dump(JournalFile *f) {
cec736d2
LP
2227 Object *o;
2228 int r;
2229 uint64_t p;
2230
2231 assert(f);
2232
dca6219e 2233 journal_file_print_header(f);
cec736d2 2234
23b0b2b2 2235 p = le64toh(f->header->header_size);
cec736d2 2236 while (p != 0) {
de190aef 2237 r = journal_file_move_to_object(f, -1, p, &o);
cec736d2
LP
2238 if (r < 0)
2239 goto fail;
2240
2241 switch (o->object.type) {
2242
2243 case OBJECT_UNUSED:
2244 printf("Type: OBJECT_UNUSED\n");
2245 break;
2246
2247 case OBJECT_DATA:
2248 printf("Type: OBJECT_DATA\n");
2249 break;
2250
2251 case OBJECT_ENTRY:
3fbf9cbb
LP
2252 printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
2253 (unsigned long long) le64toh(o->entry.seqnum),
2254 (unsigned long long) le64toh(o->entry.monotonic),
2255 (unsigned long long) le64toh(o->entry.realtime));
cec736d2
LP
2256 break;
2257
de190aef
LP
2258 case OBJECT_FIELD_HASH_TABLE:
2259 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
cec736d2
LP
2260 break;
2261
de190aef
LP
2262 case OBJECT_DATA_HASH_TABLE:
2263 printf("Type: OBJECT_DATA_HASH_TABLE\n");
2264 break;
2265
2266 case OBJECT_ENTRY_ARRAY:
2267 printf("Type: OBJECT_ENTRY_ARRAY\n");
cec736d2 2268 break;
8144056f 2269
7560fffc 2270 case OBJECT_TAG:
d98cc1f2
LP
2271 printf("Type: OBJECT_TAG %llu\n",
2272 (unsigned long long) le64toh(o->tag.seqnum));
8144056f 2273 break;
cec736d2
LP
2274 }
2275
807e17f0
LP
2276 if (o->object.flags & OBJECT_COMPRESSED)
2277 printf("Flags: COMPRESSED\n");
2278
cec736d2
LP
2279 if (p == le64toh(f->header->tail_object_offset))
2280 p = 0;
2281 else
2282 p = p + ALIGN64(le64toh(o->object.size));
2283 }
2284
2285 return;
2286fail:
2287 log_error("File corrupt");
2288}
2289
dca6219e
LP
2290void journal_file_print_header(JournalFile *f) {
2291 char a[33], b[33], c[33];
2292 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
2293
2294 assert(f);
2295
2296 printf("File Path: %s\n"
2297 "File ID: %s\n"
2298 "Machine ID: %s\n"
2299 "Boot ID: %s\n"
2300 "Sequential Number ID: %s\n"
dc36ac67
LP
2301 "State: %s\n"
2302 "Compatible Flags:%s%s\n"
2303 "Incompatible Flags:%s%s\n"
dca6219e
LP
2304 "Header size: %llu\n"
2305 "Arena size: %llu\n"
2306 "Data Hash Table Size: %llu\n"
2307 "Field Hash Table Size: %llu\n"
2308 "Objects: %llu\n"
2309 "Entry Objects: %llu\n"
2310 "Rotate Suggested: %s\n"
2311 "Head Sequential Number: %llu\n"
2312 "Tail Sequential Number: %llu\n"
2313 "Head Realtime Timestamp: %s\n"
2314 "Tail Realtime Timestamp: %s\n",
2315 f->path,
2316 sd_id128_to_string(f->header->file_id, a),
2317 sd_id128_to_string(f->header->machine_id, b),
2318 sd_id128_to_string(f->header->boot_id, c),
2319 sd_id128_to_string(f->header->seqnum_id, c),
dc36ac67
LP
2320 f->header->state == STATE_OFFLINE ? "offline" :
2321 f->header->state == STATE_ONLINE ? "online" :
2322 f->header->state == STATE_ARCHIVED ? "archived" : "unknown",
7560fffc
LP
2323 (f->header->compatible_flags & HEADER_COMPATIBLE_AUTHENTICATED) ? " AUTHENTICATED" : "",
2324 (f->header->compatible_flags & ~HEADER_COMPATIBLE_AUTHENTICATED) ? " ???" : "",
dc36ac67
LP
2325 (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
2326 (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
dca6219e
LP
2327 (unsigned long long) le64toh(f->header->header_size),
2328 (unsigned long long) le64toh(f->header->arena_size),
2329 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2330 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
2331 (unsigned long long) le64toh(f->header->n_objects),
2332 (unsigned long long) le64toh(f->header->n_entries),
2333 yes_no(journal_file_rotate_suggested(f)),
2334 (unsigned long long) le64toh(f->header->head_seqnum),
2335 (unsigned long long) le64toh(f->header->tail_seqnum),
2336 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
2337 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)));
2338
2339 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2340 printf("Data Objects: %llu\n"
2341 "Data Hash Table Fill: %.1f%%\n",
2342 (unsigned long long) le64toh(f->header->n_data),
2343 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
2344
2345 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2346 printf("Field Objects: %llu\n"
2347 "Field Hash Table Fill: %.1f%%\n",
2348 (unsigned long long) le64toh(f->header->n_fields),
2349 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
2350}
2351
cec736d2
LP
2352int journal_file_open(
2353 const char *fname,
2354 int flags,
2355 mode_t mode,
7560fffc
LP
2356 bool compress,
2357 bool authenticate,
4a92baf3 2358 JournalMetrics *metrics,
16e9f408 2359 MMapCache *mmap_cache,
0ac38b70 2360 JournalFile *template,
cec736d2
LP
2361 JournalFile **ret) {
2362
2363 JournalFile *f;
2364 int r;
2365 bool newly_created = false;
2366
2367 assert(fname);
2368
2369 if ((flags & O_ACCMODE) != O_RDONLY &&
2370 (flags & O_ACCMODE) != O_RDWR)
2371 return -EINVAL;
2372
9447a7f1
LP
2373 if (!endswith(fname, ".journal"))
2374 return -EINVAL;
2375
cec736d2
LP
2376 f = new0(JournalFile, 1);
2377 if (!f)
2378 return -ENOMEM;
2379
0ac38b70 2380 f->fd = -1;
0ac38b70 2381 f->mode = mode;
cec736d2 2382
7560fffc
LP
2383 f->flags = flags;
2384 f->prot = prot_from_flags(flags);
2385 f->writable = (flags & O_ACCMODE) != O_RDONLY;
2386 f->compress = compress;
2387 f->authenticate = authenticate;
15944db8 2388
16e9f408
LP
2389 if (mmap_cache)
2390 f->mmap = mmap_cache_ref(mmap_cache);
2391 else {
2392 /* One context for each type, plus the zeroth catchall
2393 * context. One fd for the file plus one for each type
2394 * (which we need during verification */
2395 f->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, 1 + _OBJECT_TYPE_MAX);
2396 if (!f->mmap) {
2397 r = -ENOMEM;
2398 goto fail;
2399 }
2400 }
2401
cec736d2
LP
2402 f->path = strdup(fname);
2403 if (!f->path) {
2404 r = -ENOMEM;
2405 goto fail;
2406 }
2407
0ac38b70
LP
2408 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2409 if (f->fd < 0) {
2410 r = -errno;
2411 goto fail;
2412 }
2413
cec736d2
LP
2414 if (fstat(f->fd, &f->last_stat) < 0) {
2415 r = -errno;
2416 goto fail;
2417 }
2418
2419 if (f->last_stat.st_size == 0 && f->writable) {
2420 newly_created = true;
2421
7560fffc
LP
2422 /* Try to load the FSPRG state, and if we can't, then
2423 * just don't do authentication */
2424 r = journal_file_load_fsprg(f);
2425 if (r < 0)
2426 f->authenticate = false;
2427
0ac38b70 2428 r = journal_file_init_header(f, template);
cec736d2
LP
2429 if (r < 0)
2430 goto fail;
2431
2432 if (fstat(f->fd, &f->last_stat) < 0) {
2433 r = -errno;
2434 goto fail;
2435 }
2436 }
2437
dca6219e 2438 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
cec736d2
LP
2439 r = -EIO;
2440 goto fail;
2441 }
2442
2443 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2444 if (f->header == MAP_FAILED) {
2445 f->header = NULL;
2446 r = -errno;
2447 goto fail;
2448 }
2449
2450 if (!newly_created) {
2451 r = journal_file_verify_header(f);
2452 if (r < 0)
2453 goto fail;
b0af6f41 2454 }
7560fffc 2455
b0af6f41 2456 if (!newly_created && f->writable) {
7560fffc
LP
2457 r = journal_file_load_fsprg(f);
2458 if (r < 0)
2459 goto fail;
cec736d2
LP
2460 }
2461
2462 if (f->writable) {
4a92baf3
LP
2463 if (metrics) {
2464 journal_default_metrics(metrics, f->fd);
2465 f->metrics = *metrics;
2466 } else if (template)
2467 f->metrics = template->metrics;
2468
cec736d2
LP
2469 r = journal_file_refresh_header(f);
2470 if (r < 0)
2471 goto fail;
7560fffc
LP
2472
2473 r = journal_file_setup_hmac(f);
2474 if (r < 0)
2475 goto fail;
cec736d2
LP
2476 }
2477
2478 if (newly_created) {
de190aef 2479 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
2480 if (r < 0)
2481 goto fail;
2482
de190aef 2483 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
2484 if (r < 0)
2485 goto fail;
7560fffc
LP
2486
2487 r = journal_file_append_first_tag(f);
2488 if (r < 0)
2489 goto fail;
cec736d2
LP
2490 }
2491
de190aef 2492 r = journal_file_map_field_hash_table(f);
cec736d2
LP
2493 if (r < 0)
2494 goto fail;
2495
de190aef 2496 r = journal_file_map_data_hash_table(f);
cec736d2
LP
2497 if (r < 0)
2498 goto fail;
2499
2500 if (ret)
2501 *ret = f;
2502
2503 return 0;
2504
2505fail:
2506 journal_file_close(f);
2507
2508 return r;
2509}
0ac38b70 2510
7560fffc 2511int journal_file_rotate(JournalFile **f, bool compress, bool authenticate) {
0ac38b70
LP
2512 char *p;
2513 size_t l;
2514 JournalFile *old_file, *new_file = NULL;
2515 int r;
2516
2517 assert(f);
2518 assert(*f);
2519
2520 old_file = *f;
2521
2522 if (!old_file->writable)
2523 return -EINVAL;
2524
2525 if (!endswith(old_file->path, ".journal"))
2526 return -EINVAL;
2527
2528 l = strlen(old_file->path);
2529
9447a7f1 2530 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
0ac38b70
LP
2531 if (!p)
2532 return -ENOMEM;
2533
2534 memcpy(p, old_file->path, l - 8);
2535 p[l-8] = '@';
2536 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2537 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2538 "-%016llx-%016llx.journal",
dca6219e 2539 (unsigned long long) le64toh((*f)->header->tail_seqnum),
0ac38b70
LP
2540 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2541
2542 r = rename(old_file->path, p);
2543 free(p);
2544
2545 if (r < 0)
2546 return -errno;
2547
ccdbaf91 2548 old_file->header->state = STATE_ARCHIVED;
0ac38b70 2549
16e9f408 2550 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, authenticate, NULL, old_file->mmap, old_file, &new_file);
0ac38b70
LP
2551 journal_file_close(old_file);
2552
2553 *f = new_file;
2554 return r;
2555}
2556
9447a7f1
LP
2557int journal_file_open_reliably(
2558 const char *fname,
2559 int flags,
2560 mode_t mode,
7560fffc
LP
2561 bool compress,
2562 bool authenticate,
4a92baf3 2563 JournalMetrics *metrics,
16e9f408 2564 MMapCache *mmap,
9447a7f1
LP
2565 JournalFile *template,
2566 JournalFile **ret) {
2567
2568 int r;
2569 size_t l;
2570 char *p;
2571
16e9f408 2572 r = journal_file_open(fname, flags, mode, compress, authenticate, metrics, mmap, template, ret);
0071d9f1
LP
2573 if (r != -EBADMSG && /* corrupted */
2574 r != -ENODATA && /* truncated */
2575 r != -EHOSTDOWN && /* other machine */
a1a1898f
LP
2576 r != -EPROTONOSUPPORT && /* incompatible feature */
2577 r != -EBUSY && /* unclean shutdown */
2578 r != -ESHUTDOWN /* already archived */)
9447a7f1
LP
2579 return r;
2580
2581 if ((flags & O_ACCMODE) == O_RDONLY)
2582 return r;
2583
2584 if (!(flags & O_CREAT))
2585 return r;
2586
7560fffc
LP
2587 if (!endswith(fname, ".journal"))
2588 return r;
2589
5c70eab4
LP
2590 /* The file is corrupted. Rotate it away and try it again (but only once) */
2591
9447a7f1
LP
2592 l = strlen(fname);
2593 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2594 (int) (l-8), fname,
2595 (unsigned long long) now(CLOCK_REALTIME),
2596 random_ull()) < 0)
2597 return -ENOMEM;
2598
2599 r = rename(fname, p);
2600 free(p);
2601 if (r < 0)
2602 return -errno;
2603
a1a1898f 2604 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
9447a7f1 2605
16e9f408 2606 return journal_file_open(fname, flags, mode, compress, authenticate, metrics, mmap, template, ret);
9447a7f1
LP
2607}
2608
0ac38b70
LP
2609struct vacuum_info {
2610 off_t usage;
2611 char *filename;
2612
2613 uint64_t realtime;
2614 sd_id128_t seqnum_id;
2615 uint64_t seqnum;
5c70eab4
LP
2616
2617 bool have_seqnum;
0ac38b70
LP
2618};
2619
2620static int vacuum_compare(const void *_a, const void *_b) {
2621 const struct vacuum_info *a, *b;
2622
2623 a = _a;
2624 b = _b;
2625
5c70eab4
LP
2626 if (a->have_seqnum && b->have_seqnum &&
2627 sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
0ac38b70
LP
2628 if (a->seqnum < b->seqnum)
2629 return -1;
2630 else if (a->seqnum > b->seqnum)
2631 return 1;
2632 else
2633 return 0;
2634 }
2635
2636 if (a->realtime < b->realtime)
2637 return -1;
2638 else if (a->realtime > b->realtime)
2639 return 1;
5c70eab4 2640 else if (a->have_seqnum && b->have_seqnum)
0ac38b70 2641 return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
5c70eab4
LP
2642 else
2643 return strcmp(a->filename, b->filename);
0ac38b70
LP
2644}
2645
2646int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
2647 DIR *d;
2648 int r = 0;
2649 struct vacuum_info *list = NULL;
2650 unsigned n_list = 0, n_allocated = 0, i;
2651 uint64_t sum = 0;
2652
2653 assert(directory);
2654
2655 if (max_use <= 0)
babfc091 2656 return 0;
0ac38b70
LP
2657
2658 d = opendir(directory);
2659 if (!d)
2660 return -errno;
2661
2662 for (;;) {
2663 int k;
2664 struct dirent buf, *de;
2665 size_t q;
2666 struct stat st;
2667 char *p;
7ea07dcd 2668 unsigned long long seqnum = 0, realtime;
0ac38b70 2669 sd_id128_t seqnum_id;
5c70eab4 2670 bool have_seqnum;
0ac38b70
LP
2671
2672 k = readdir_r(d, &buf, &de);
2673 if (k != 0) {
2674 r = -k;
2675 goto finish;
2676 }
2677
2678 if (!de)
2679 break;
2680
5c70eab4
LP
2681 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
2682 continue;
2683
2684 if (!S_ISREG(st.st_mode))
0ac38b70
LP
2685 continue;
2686
2687 q = strlen(de->d_name);
2688
5c70eab4 2689 if (endswith(de->d_name, ".journal")) {
0ac38b70 2690
5c70eab4 2691 /* Vacuum archived files */
0ac38b70 2692
5c70eab4
LP
2693 if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
2694 continue;
0ac38b70 2695
5c70eab4
LP
2696 if (de->d_name[q-8-16-1] != '-' ||
2697 de->d_name[q-8-16-1-16-1] != '-' ||
2698 de->d_name[q-8-16-1-16-1-32-1] != '@')
2699 continue;
0ac38b70 2700
5c70eab4
LP
2701 p = strdup(de->d_name);
2702 if (!p) {
2703 r = -ENOMEM;
2704 goto finish;
2705 }
0ac38b70 2706
5c70eab4
LP
2707 de->d_name[q-8-16-1-16-1] = 0;
2708 if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
2709 free(p);
2710 continue;
2711 }
2712
2713 if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
2714 free(p);
2715 continue;
2716 }
2717
2718 have_seqnum = true;
2719
2720 } else if (endswith(de->d_name, ".journal~")) {
2721 unsigned long long tmp;
2722
2723 /* Vacuum corrupted files */
2724
2725 if (q < 1 + 16 + 1 + 16 + 8 + 1)
2726 continue;
0ac38b70 2727
5c70eab4
LP
2728 if (de->d_name[q-1-8-16-1] != '-' ||
2729 de->d_name[q-1-8-16-1-16-1] != '@')
2730 continue;
2731
2732 p = strdup(de->d_name);
2733 if (!p) {
2734 r = -ENOMEM;
2735 goto finish;
2736 }
2737
2738 if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
2739 free(p);
2740 continue;
2741 }
2742
2743 have_seqnum = false;
2744 } else
0ac38b70 2745 continue;
0ac38b70
LP
2746
2747 if (n_list >= n_allocated) {
2748 struct vacuum_info *j;
2749
2750 n_allocated = MAX(n_allocated * 2U, 8U);
2751 j = realloc(list, n_allocated * sizeof(struct vacuum_info));
2752 if (!j) {
2753 free(p);
2754 r = -ENOMEM;
2755 goto finish;
2756 }
2757
2758 list = j;
2759 }
2760
2761 list[n_list].filename = p;
a3a52c0f 2762 list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
0ac38b70
LP
2763 list[n_list].seqnum = seqnum;
2764 list[n_list].realtime = realtime;
2765 list[n_list].seqnum_id = seqnum_id;
5c70eab4 2766 list[n_list].have_seqnum = have_seqnum;
0ac38b70
LP
2767
2768 sum += list[n_list].usage;
2769
2770 n_list ++;
2771 }
2772
64825d3c
LP
2773 if (n_list > 0)
2774 qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
0ac38b70
LP
2775
2776 for(i = 0; i < n_list; i++) {
2777 struct statvfs ss;
2778
2779 if (fstatvfs(dirfd(d), &ss) < 0) {
2780 r = -errno;
2781 goto finish;
2782 }
2783
2784 if (sum <= max_use &&
2785 (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
2786 break;
2787
2788 if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
e7bf07b3 2789 log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
0ac38b70
LP
2790 sum -= list[i].usage;
2791 } else if (errno != ENOENT)
2792 log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
2793 }
2794
2795finish:
2796 for (i = 0; i < n_list; i++)
2797 free(list[i].filename);
2798
2799 free(list);
2800
de190aef
LP
2801 if (d)
2802 closedir(d);
2803
0ac38b70
LP
2804 return r;
2805}
cf244689
LP
2806
2807int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2808 uint64_t i, n;
2809 uint64_t q, xor_hash = 0;
2810 int r;
2811 EntryItem *items;
2812 dual_timestamp ts;
2813
2814 assert(from);
2815 assert(to);
2816 assert(o);
2817 assert(p);
2818
2819 if (!to->writable)
2820 return -EPERM;
2821
2822 ts.monotonic = le64toh(o->entry.monotonic);
2823 ts.realtime = le64toh(o->entry.realtime);
2824
2825 if (to->tail_entry_monotonic_valid &&
2826 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2827 return -EINVAL;
2828
cf244689
LP
2829 n = journal_file_entry_n_items(o);
2830 items = alloca(sizeof(EntryItem) * n);
2831
2832 for (i = 0; i < n; i++) {
4fd052ae
FC
2833 uint64_t l, h;
2834 le64_t le_hash;
cf244689
LP
2835 size_t t;
2836 void *data;
2837 Object *u;
2838
2839 q = le64toh(o->entry.items[i].object_offset);
2840 le_hash = o->entry.items[i].hash;
2841
2842 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2843 if (r < 0)
2844 return r;
2845
2846 if (le_hash != o->data.hash)
2847 return -EBADMSG;
2848
2849 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2850 t = (size_t) l;
2851
2852 /* We hit the limit on 32bit machines */
2853 if ((uint64_t) t != l)
2854 return -E2BIG;
2855
2856 if (o->object.flags & OBJECT_COMPRESSED) {
2857#ifdef HAVE_XZ
2858 uint64_t rsize;
2859
2860 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2861 return -EBADMSG;
2862
2863 data = from->compress_buffer;
2864 l = rsize;
2865#else
2866 return -EPROTONOSUPPORT;
2867#endif
2868 } else
2869 data = o->data.payload;
2870
2871 r = journal_file_append_data(to, data, l, &u, &h);
2872 if (r < 0)
2873 return r;
2874
2875 xor_hash ^= le64toh(u->data.hash);
2876 items[i].object_offset = htole64(h);
2877 items[i].hash = u->data.hash;
2878
2879 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2880 if (r < 0)
2881 return r;
2882 }
2883
2884 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2885}
babfc091
LP
2886
2887void journal_default_metrics(JournalMetrics *m, int fd) {
2888 uint64_t fs_size = 0;
2889 struct statvfs ss;
a7bc2c2a 2890 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2891
2892 assert(m);
2893 assert(fd >= 0);
2894
2895 if (fstatvfs(fd, &ss) >= 0)
2896 fs_size = ss.f_frsize * ss.f_blocks;
2897
2898 if (m->max_use == (uint64_t) -1) {
2899
2900 if (fs_size > 0) {
2901 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2902
2903 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2904 m->max_use = DEFAULT_MAX_USE_UPPER;
2905
2906 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2907 m->max_use = DEFAULT_MAX_USE_LOWER;
2908 } else
2909 m->max_use = DEFAULT_MAX_USE_LOWER;
2910 } else {
2911 m->max_use = PAGE_ALIGN(m->max_use);
2912
2913 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2914 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2915 }
2916
2917 if (m->max_size == (uint64_t) -1) {
2918 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2919
2920 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2921 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2922 } else
2923 m->max_size = PAGE_ALIGN(m->max_size);
2924
2925 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2926 m->max_size = JOURNAL_FILE_SIZE_MIN;
2927
2928 if (m->max_size*2 > m->max_use)
2929 m->max_use = m->max_size*2;
2930
2931 if (m->min_size == (uint64_t) -1)
2932 m->min_size = JOURNAL_FILE_SIZE_MIN;
2933 else {
2934 m->min_size = PAGE_ALIGN(m->min_size);
2935
2936 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2937 m->min_size = JOURNAL_FILE_SIZE_MIN;
2938
2939 if (m->min_size > m->max_size)
2940 m->max_size = m->min_size;
2941 }
2942
2943 if (m->keep_free == (uint64_t) -1) {
2944
2945 if (fs_size > 0) {
2946 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2947
2948 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2949 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2950
2951 } else
2952 m->keep_free = DEFAULT_KEEP_FREE;
2953 }
2954
e7bf07b3
LP
2955 log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2956 format_bytes(a, sizeof(a), m->max_use),
2957 format_bytes(b, sizeof(b), m->max_size),
2958 format_bytes(c, sizeof(c), m->min_size),
2959 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2960}
08984293
LP
2961
2962int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
08984293
LP
2963 assert(f);
2964 assert(from || to);
2965
2966 if (from) {
162566a4
LP
2967 if (f->header->head_entry_realtime == 0)
2968 return -ENOENT;
08984293 2969
162566a4 2970 *from = le64toh(f->header->head_entry_realtime);
08984293
LP
2971 }
2972
2973 if (to) {
162566a4
LP
2974 if (f->header->tail_entry_realtime == 0)
2975 return -ENOENT;
08984293 2976
162566a4 2977 *to = le64toh(f->header->tail_entry_realtime);
08984293
LP
2978 }
2979
2980 return 1;
2981}
2982
2983int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2984 char t[9+32+1] = "_BOOT_ID=";
2985 Object *o;
2986 uint64_t p;
2987 int r;
2988
2989 assert(f);
2990 assert(from || to);
2991
2992 sd_id128_to_string(boot_id, t + 9);
2993
2994 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2995 if (r <= 0)
2996 return r;
2997
2998 if (le64toh(o->data.n_entries) <= 0)
2999 return 0;
3000
3001 if (from) {
3002 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
3003 if (r < 0)
3004 return r;
3005
3006 *from = le64toh(o->entry.monotonic);
3007 }
3008
3009 if (to) {
3010 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
3011 if (r < 0)
3012 return r;
3013
3014 r = generic_array_get_plus_one(f,
3015 le64toh(o->data.entry_offset),
3016 le64toh(o->data.entry_array_offset),
3017 le64toh(o->data.n_entries)-1,
3018 &o, NULL);
3019 if (r <= 0)
3020 return r;
3021
3022 *to = le64toh(o->entry.monotonic);
3023 }
3024
3025 return 1;
3026}
dca6219e
LP
3027
3028bool journal_file_rotate_suggested(JournalFile *f) {
3029 assert(f);
3030
3031 /* If we gained new header fields we gained new features,
3032 * hence suggest a rotation */
361f9cbc
LP
3033 if (le64toh(f->header->header_size) < sizeof(Header)) {
3034 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
dca6219e 3035 return true;
361f9cbc 3036 }
dca6219e
LP
3037
3038 /* Let's check if the hash tables grew over a certain fill
3039 * level (75%, borrowing this value from Java's hash table
3040 * implementation), and if so suggest a rotation. To calculate
3041 * the fill level we need the n_data field, which only exists
3042 * in newer versions. */
3043
3044 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
361f9cbc
LP
3045 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
3046 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
3047 f->path,
3048 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
3049 (unsigned long long) le64toh(f->header->n_data),
3050 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
3051 (unsigned long long) (f->last_stat.st_size),
3052 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
dca6219e 3053 return true;
361f9cbc 3054 }
dca6219e
LP
3055
3056 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
361f9cbc
LP
3057 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
3058 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
3059 f->path,
3060 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
3061 (unsigned long long) le64toh(f->header->n_fields),
3062 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
dca6219e 3063 return true;
361f9cbc 3064 }
dca6219e
LP
3065
3066 return false;
3067}