]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
journal: use a macro to check for file header flags
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
30#include "journal-def.h"
31#include "journal-file.h"
0284adc6 32#include "journal-authenticate.h"
cec736d2 33#include "lookup3.h"
807e17f0 34#include "compress.h"
7560fffc 35#include "fsprg.h"
cec736d2 36
4a92baf3
LP
37#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
cec736d2 39
be19b7df 40#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 41
babfc091 42/* This is the minimum journal file size */
b47ffcfd 43#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
babfc091
LP
44
45/* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
49
50/* This is the upper bound if we deduce max_size from max_use */
71100051 51#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
52
53/* This is the upper bound if we deduce the keep_free value from the
54 * file system size */
55#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57/* This is the keep_free value when we can't determine the system
58 * size */
59#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
60
dca6219e
LP
61/* n_data was the first entry we added after the initial file format design */
62#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
cec736d2 63
cec736d2 64void journal_file_close(JournalFile *f) {
de190aef 65 assert(f);
cec736d2 66
b0af6f41 67 /* Write the final tag */
c586dbf1 68 if (f->seal && f->writable)
b0af6f41
LP
69 journal_file_append_tag(f);
70
7560fffc 71 /* Sync everything to disk, before we mark the file offline */
16e9f408
LP
72 if (f->mmap && f->fd >= 0)
73 mmap_cache_close_fd(f->mmap, f->fd);
7560fffc
LP
74
75 if (f->writable && f->fd >= 0)
76 fdatasync(f->fd);
77
d384c7a8 78 if (f->header) {
cd96b3b8
LP
79 /* Mark the file offline. Don't override the archived state if it already is set */
80 if (f->writable && f->header->state == STATE_ONLINE)
d384c7a8 81 f->header->state = STATE_OFFLINE;
cec736d2 82
d384c7a8
MS
83 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
84 }
cec736d2 85
0ac38b70
LP
86 if (f->fd >= 0)
87 close_nointr_nofail(f->fd);
88
cec736d2 89 free(f->path);
807e17f0 90
16e9f408
LP
91 if (f->mmap)
92 mmap_cache_unref(f->mmap);
93
807e17f0
LP
94#ifdef HAVE_XZ
95 free(f->compress_buffer);
96#endif
97
7560fffc 98#ifdef HAVE_GCRYPT
baed47c3
LP
99 if (f->fss_file)
100 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
b7c9ae91
LP
101 else if (f->fsprg_state)
102 free(f->fsprg_state);
103
104 free(f->fsprg_seed);
7560fffc
LP
105
106 if (f->hmac)
107 gcry_md_close(f->hmac);
108#endif
109
cec736d2
LP
110 free(f);
111}
112
0ac38b70 113static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
114 Header h;
115 ssize_t k;
116 int r;
117
118 assert(f);
119
120 zero(h);
7560fffc 121 memcpy(h.signature, HEADER_SIGNATURE, 8);
23b0b2b2 122 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2 123
7560fffc
LP
124 h.incompatible_flags =
125 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
126
127 h.compatible_flags =
baed47c3 128 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
7560fffc 129
cec736d2
LP
130 r = sd_id128_randomize(&h.file_id);
131 if (r < 0)
132 return r;
133
0ac38b70
LP
134 if (template) {
135 h.seqnum_id = template->header->seqnum_id;
beec0085 136 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
0ac38b70
LP
137 } else
138 h.seqnum_id = h.file_id;
cec736d2
LP
139
140 k = pwrite(f->fd, &h, sizeof(h), 0);
141 if (k < 0)
142 return -errno;
143
144 if (k != sizeof(h))
145 return -EIO;
146
147 return 0;
148}
149
150static int journal_file_refresh_header(JournalFile *f) {
151 int r;
de190aef 152 sd_id128_t boot_id;
cec736d2
LP
153
154 assert(f);
155
156 r = sd_id128_get_machine(&f->header->machine_id);
157 if (r < 0)
158 return r;
159
de190aef 160 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
161 if (r < 0)
162 return r;
163
de190aef
LP
164 if (sd_id128_equal(boot_id, f->header->boot_id))
165 f->tail_entry_monotonic_valid = true;
166
167 f->header->boot_id = boot_id;
168
169 f->header->state = STATE_ONLINE;
b788cc23 170
7560fffc
LP
171 /* Sync the online state to disk */
172 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
173 fdatasync(f->fd);
b788cc23 174
cec736d2
LP
175 return 0;
176}
177
178static int journal_file_verify_header(JournalFile *f) {
179 assert(f);
180
7560fffc 181 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
cec736d2
LP
182 return -EBADMSG;
183
7560fffc
LP
184 /* In both read and write mode we refuse to open files with
185 * incompatible flags we don't know */
807e17f0 186#ifdef HAVE_XZ
7560fffc 187 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
807e17f0
LP
188 return -EPROTONOSUPPORT;
189#else
cec736d2
LP
190 if (f->header->incompatible_flags != 0)
191 return -EPROTONOSUPPORT;
807e17f0 192#endif
cec736d2 193
7560fffc
LP
194 /* When open for writing we refuse to open files with
195 * compatible flags, too */
196 if (f->writable) {
197#ifdef HAVE_GCRYPT
baed47c3 198 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
7560fffc
LP
199 return -EPROTONOSUPPORT;
200#else
201 if (f->header->compatible_flags != 0)
202 return -EPROTONOSUPPORT;
203#endif
204 }
205
db11ac1a
LP
206 if (f->header->state >= _STATE_MAX)
207 return -EBADMSG;
208
dca6219e
LP
209 /* The first addition was n_data, so check that we are at least this large */
210 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
23b0b2b2
LP
211 return -EBADMSG;
212
8088cbd3 213 if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
beec0085
LP
214 return -EBADMSG;
215
db11ac1a
LP
216 if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
217 return -ENODATA;
218
219 if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
220 return -ENODATA;
221
222 if (!VALID64(f->header->data_hash_table_offset) ||
223 !VALID64(f->header->field_hash_table_offset) ||
224 !VALID64(f->header->tail_object_offset) ||
225 !VALID64(f->header->entry_array_offset))
cec736d2
LP
226 return -ENODATA;
227
228 if (f->writable) {
ccdbaf91 229 uint8_t state;
cec736d2
LP
230 sd_id128_t machine_id;
231 int r;
232
233 r = sd_id128_get_machine(&machine_id);
234 if (r < 0)
235 return r;
236
237 if (!sd_id128_equal(machine_id, f->header->machine_id))
238 return -EHOSTDOWN;
239
de190aef 240 state = f->header->state;
cec736d2 241
71fa6f00
LP
242 if (state == STATE_ONLINE) {
243 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
244 return -EBUSY;
245 } else if (state == STATE_ARCHIVED)
cec736d2 246 return -ESHUTDOWN;
71fa6f00
LP
247 else if (state != STATE_OFFLINE) {
248 log_debug("Journal file %s has unknown state %u.", f->path, state);
249 return -EBUSY;
250 }
cec736d2
LP
251 }
252
8088cbd3 253 f->compress = JOURNAL_HEADER_COMPRESSED(f->header);
c586dbf1
LP
254
255 if (f->writable)
8088cbd3 256 f->seal = JOURNAL_HEADER_SEALED(f->header);
7560fffc 257
cec736d2
LP
258 return 0;
259}
260
261static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
cec736d2 262 uint64_t old_size, new_size;
fec2aa2f 263 int r;
cec736d2
LP
264
265 assert(f);
266
cec736d2 267 /* We assume that this file is not sparse, and we know that
38ac38b2 268 * for sure, since we always call posix_fallocate()
cec736d2
LP
269 * ourselves */
270
271 old_size =
23b0b2b2 272 le64toh(f->header->header_size) +
cec736d2
LP
273 le64toh(f->header->arena_size);
274
bc85bfee 275 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
276 if (new_size < le64toh(f->header->header_size))
277 new_size = le64toh(f->header->header_size);
bc85bfee
LP
278
279 if (new_size <= old_size)
cec736d2
LP
280 return 0;
281
bc85bfee
LP
282 if (f->metrics.max_size > 0 &&
283 new_size > f->metrics.max_size)
284 return -E2BIG;
cec736d2 285
bc85bfee
LP
286 if (new_size > f->metrics.min_size &&
287 f->metrics.keep_free > 0) {
cec736d2
LP
288 struct statvfs svfs;
289
290 if (fstatvfs(f->fd, &svfs) >= 0) {
291 uint64_t available;
292
293 available = svfs.f_bfree * svfs.f_bsize;
294
bc85bfee
LP
295 if (available >= f->metrics.keep_free)
296 available -= f->metrics.keep_free;
cec736d2
LP
297 else
298 available = 0;
299
300 if (new_size - old_size > available)
301 return -E2BIG;
302 }
303 }
304
bc85bfee
LP
305 /* Note that the glibc fallocate() fallback is very
306 inefficient, hence we try to minimize the allocation area
307 as we can. */
fec2aa2f
GV
308 r = posix_fallocate(f->fd, old_size, new_size - old_size);
309 if (r != 0)
310 return -r;
cec736d2 311
f65425cb
LP
312 mmap_cache_close_fd_range(f->mmap, f->fd, old_size);
313
cec736d2
LP
314 if (fstat(f->fd, &f->last_stat) < 0)
315 return -errno;
316
23b0b2b2 317 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
318
319 return 0;
320}
321
16e9f408 322static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, uint64_t size, void **ret) {
cec736d2 323 assert(f);
cec736d2
LP
324 assert(ret);
325
2a59ea54 326 /* Avoid SIGBUS on invalid accesses */
4bbdcdb3
LP
327 if (offset + size > (uint64_t) f->last_stat.st_size) {
328 /* Hmm, out of range? Let's refresh the fstat() data
329 * first, before we trust that check. */
330
331 if (fstat(f->fd, &f->last_stat) < 0 ||
332 offset + size > (uint64_t) f->last_stat.st_size)
333 return -EADDRNOTAVAIL;
334 }
335
16e9f408 336 return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
cec736d2
LP
337}
338
16e9f408
LP
339static uint64_t minimum_header_size(Object *o) {
340
341 static uint64_t table[] = {
342 [OBJECT_DATA] = sizeof(DataObject),
343 [OBJECT_FIELD] = sizeof(FieldObject),
344 [OBJECT_ENTRY] = sizeof(EntryObject),
345 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
346 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
347 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
348 [OBJECT_TAG] = sizeof(TagObject),
349 };
350
351 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
352 return sizeof(ObjectHeader);
353
354 return table[o->object.type];
355}
356
de190aef 357int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
358 int r;
359 void *t;
360 Object *o;
361 uint64_t s;
16e9f408 362 unsigned context;
cec736d2
LP
363
364 assert(f);
365 assert(ret);
366
db11ac1a
LP
367 /* Objects may only be located at multiple of 64 bit */
368 if (!VALID64(offset))
369 return -EFAULT;
370
16e9f408
LP
371 /* One context for each type, plus one catch-all for the rest */
372 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
373
374 r = journal_file_move_to(f, context, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
375 if (r < 0)
376 return r;
377
378 o = (Object*) t;
379 s = le64toh(o->object.size);
380
381 if (s < sizeof(ObjectHeader))
382 return -EBADMSG;
383
16e9f408
LP
384 if (o->object.type <= OBJECT_UNUSED)
385 return -EBADMSG;
386
387 if (s < minimum_header_size(o))
388 return -EBADMSG;
389
de190aef 390 if (type >= 0 && o->object.type != type)
cec736d2
LP
391 return -EBADMSG;
392
393 if (s > sizeof(ObjectHeader)) {
de190aef 394 r = journal_file_move_to(f, o->object.type, offset, s, &t);
cec736d2
LP
395 if (r < 0)
396 return r;
397
398 o = (Object*) t;
399 }
400
cec736d2
LP
401 *ret = o;
402 return 0;
403}
404
d98cc1f2 405static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
406 uint64_t r;
407
408 assert(f);
409
beec0085 410 r = le64toh(f->header->tail_entry_seqnum) + 1;
c2373f84
LP
411
412 if (seqnum) {
de190aef 413 /* If an external seqnum counter was passed, we update
c2373f84
LP
414 * both the local and the external one, and set it to
415 * the maximum of both */
416
417 if (*seqnum + 1 > r)
418 r = *seqnum + 1;
419
420 *seqnum = r;
421 }
422
beec0085 423 f->header->tail_entry_seqnum = htole64(r);
cec736d2 424
beec0085
LP
425 if (f->header->head_entry_seqnum == 0)
426 f->header->head_entry_seqnum = htole64(r);
de190aef 427
cec736d2
LP
428 return r;
429}
430
0284adc6 431int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
432 int r;
433 uint64_t p;
434 Object *tail, *o;
435 void *t;
436
437 assert(f);
16e9f408 438 assert(type > 0 && type < _OBJECT_TYPE_MAX);
cec736d2
LP
439 assert(size >= sizeof(ObjectHeader));
440 assert(offset);
441 assert(ret);
442
443 p = le64toh(f->header->tail_object_offset);
cec736d2 444 if (p == 0)
23b0b2b2 445 p = le64toh(f->header->header_size);
cec736d2 446 else {
de190aef 447 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
448 if (r < 0)
449 return r;
450
451 p += ALIGN64(le64toh(tail->object.size));
452 }
453
454 r = journal_file_allocate(f, p, size);
455 if (r < 0)
456 return r;
457
de190aef 458 r = journal_file_move_to(f, type, p, size, &t);
cec736d2
LP
459 if (r < 0)
460 return r;
461
462 o = (Object*) t;
463
464 zero(o->object);
de190aef 465 o->object.type = type;
cec736d2
LP
466 o->object.size = htole64(size);
467
468 f->header->tail_object_offset = htole64(p);
cec736d2
LP
469 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
470
471 *ret = o;
472 *offset = p;
473
474 return 0;
475}
476
de190aef 477static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
478 uint64_t s, p;
479 Object *o;
480 int r;
481
482 assert(f);
483
dfabe643 484 /* We estimate that we need 1 hash table entry per 768 of
4a92baf3
LP
485 journal file and we want to make sure we never get beyond
486 75% fill level. Calculate the hash table size for the
487 maximum file size based on these metrics. */
488
dfabe643 489 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
4a92baf3
LP
490 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
491 s = DEFAULT_DATA_HASH_TABLE_SIZE;
492
dfabe643 493 log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
4a92baf3 494
de190aef
LP
495 r = journal_file_append_object(f,
496 OBJECT_DATA_HASH_TABLE,
497 offsetof(Object, hash_table.items) + s,
498 &o, &p);
cec736d2
LP
499 if (r < 0)
500 return r;
501
de190aef 502 memset(o->hash_table.items, 0, s);
cec736d2 503
de190aef
LP
504 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
505 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
506
507 return 0;
508}
509
de190aef 510static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
511 uint64_t s, p;
512 Object *o;
513 int r;
514
515 assert(f);
516
de190aef
LP
517 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
518 r = journal_file_append_object(f,
519 OBJECT_FIELD_HASH_TABLE,
520 offsetof(Object, hash_table.items) + s,
521 &o, &p);
cec736d2
LP
522 if (r < 0)
523 return r;
524
de190aef 525 memset(o->hash_table.items, 0, s);
cec736d2 526
de190aef
LP
527 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
528 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
529
530 return 0;
531}
532
de190aef 533static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
534 uint64_t s, p;
535 void *t;
536 int r;
537
538 assert(f);
539
de190aef
LP
540 p = le64toh(f->header->data_hash_table_offset);
541 s = le64toh(f->header->data_hash_table_size);
cec736d2 542
de190aef 543 r = journal_file_move_to(f,
16e9f408 544 OBJECT_DATA_HASH_TABLE,
de190aef
LP
545 p, s,
546 &t);
cec736d2
LP
547 if (r < 0)
548 return r;
549
de190aef 550 f->data_hash_table = t;
cec736d2
LP
551 return 0;
552}
553
de190aef 554static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
555 uint64_t s, p;
556 void *t;
557 int r;
558
559 assert(f);
560
de190aef
LP
561 p = le64toh(f->header->field_hash_table_offset);
562 s = le64toh(f->header->field_hash_table_size);
cec736d2 563
de190aef 564 r = journal_file_move_to(f,
16e9f408 565 OBJECT_FIELD_HASH_TABLE,
de190aef
LP
566 p, s,
567 &t);
cec736d2
LP
568 if (r < 0)
569 return r;
570
de190aef 571 f->field_hash_table = t;
cec736d2
LP
572 return 0;
573}
574
de190aef
LP
575static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
576 uint64_t p, h;
cec736d2
LP
577 int r;
578
579 assert(f);
580 assert(o);
581 assert(offset > 0);
de190aef 582 assert(o->object.type == OBJECT_DATA);
cec736d2 583
48496df6
LP
584 /* This might alter the window we are looking at */
585
de190aef
LP
586 o->data.next_hash_offset = o->data.next_field_offset = 0;
587 o->data.entry_offset = o->data.entry_array_offset = 0;
588 o->data.n_entries = 0;
cec736d2 589
de190aef 590 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
8db4213e 591 p = le64toh(f->data_hash_table[h].tail_hash_offset);
cec736d2
LP
592 if (p == 0) {
593 /* Only entry in the hash table is easy */
de190aef 594 f->data_hash_table[h].head_hash_offset = htole64(offset);
cec736d2 595 } else {
48496df6
LP
596 /* Move back to the previous data object, to patch in
597 * pointer */
cec736d2 598
de190aef 599 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
600 if (r < 0)
601 return r;
602
de190aef 603 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
604 }
605
de190aef 606 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2 607
dca6219e
LP
608 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
609 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
610
cec736d2
LP
611 return 0;
612}
613
de190aef
LP
614int journal_file_find_data_object_with_hash(
615 JournalFile *f,
616 const void *data, uint64_t size, uint64_t hash,
617 Object **ret, uint64_t *offset) {
48496df6 618
de190aef 619 uint64_t p, osize, h;
cec736d2
LP
620 int r;
621
622 assert(f);
623 assert(data || size == 0);
624
625 osize = offsetof(Object, data.payload) + size;
626
bc85bfee
LP
627 if (f->header->data_hash_table_size == 0)
628 return -EBADMSG;
629
de190aef
LP
630 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
631 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 632
de190aef
LP
633 while (p > 0) {
634 Object *o;
cec736d2 635
de190aef 636 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
637 if (r < 0)
638 return r;
639
807e17f0 640 if (le64toh(o->data.hash) != hash)
85a131e8 641 goto next;
807e17f0
LP
642
643 if (o->object.flags & OBJECT_COMPRESSED) {
644#ifdef HAVE_XZ
b785c858 645 uint64_t l, rsize;
cec736d2 646
807e17f0
LP
647 l = le64toh(o->object.size);
648 if (l <= offsetof(Object, data.payload))
cec736d2
LP
649 return -EBADMSG;
650
807e17f0
LP
651 l -= offsetof(Object, data.payload);
652
653 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
654 return -EBADMSG;
655
b785c858 656 if (rsize == size &&
807e17f0
LP
657 memcmp(f->compress_buffer, data, size) == 0) {
658
659 if (ret)
660 *ret = o;
661
662 if (offset)
663 *offset = p;
664
665 return 1;
666 }
667#else
668 return -EPROTONOSUPPORT;
669#endif
670
671 } else if (le64toh(o->object.size) == osize &&
672 memcmp(o->data.payload, data, size) == 0) {
673
cec736d2
LP
674 if (ret)
675 *ret = o;
676
677 if (offset)
678 *offset = p;
679
de190aef 680 return 1;
cec736d2
LP
681 }
682
85a131e8 683 next:
cec736d2
LP
684 p = le64toh(o->data.next_hash_offset);
685 }
686
de190aef
LP
687 return 0;
688}
689
690int journal_file_find_data_object(
691 JournalFile *f,
692 const void *data, uint64_t size,
693 Object **ret, uint64_t *offset) {
694
695 uint64_t hash;
696
697 assert(f);
698 assert(data || size == 0);
699
700 hash = hash64(data, size);
701
702 return journal_file_find_data_object_with_hash(f,
703 data, size, hash,
704 ret, offset);
705}
706
48496df6
LP
707static int journal_file_append_data(
708 JournalFile *f,
709 const void *data, uint64_t size,
710 Object **ret, uint64_t *offset) {
711
de190aef
LP
712 uint64_t hash, p;
713 uint64_t osize;
714 Object *o;
715 int r;
807e17f0 716 bool compressed = false;
de190aef
LP
717
718 assert(f);
719 assert(data || size == 0);
720
721 hash = hash64(data, size);
722
723 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
724 if (r < 0)
725 return r;
726 else if (r > 0) {
727
728 if (ret)
729 *ret = o;
730
731 if (offset)
732 *offset = p;
733
734 return 0;
735 }
736
737 osize = offsetof(Object, data.payload) + size;
738 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
739 if (r < 0)
740 return r;
741
cec736d2 742 o->data.hash = htole64(hash);
807e17f0
LP
743
744#ifdef HAVE_XZ
745 if (f->compress &&
746 size >= COMPRESSION_SIZE_THRESHOLD) {
747 uint64_t rsize;
748
749 compressed = compress_blob(data, size, o->data.payload, &rsize);
750
751 if (compressed) {
752 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
753 o->object.flags |= OBJECT_COMPRESSED;
754
807e17f0
LP
755 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
756 }
757 }
758#endif
759
64825d3c 760 if (!compressed && size > 0)
807e17f0 761 memcpy(o->data.payload, data, size);
cec736d2 762
de190aef 763 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
764 if (r < 0)
765 return r;
766
b0af6f41
LP
767 r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
768 if (r < 0)
769 return r;
770
48496df6
LP
771 /* The linking might have altered the window, so let's
772 * refresh our pointer */
773 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
774 if (r < 0)
775 return r;
776
cec736d2
LP
777 if (ret)
778 *ret = o;
779
780 if (offset)
de190aef 781 *offset = p;
cec736d2
LP
782
783 return 0;
784}
785
786uint64_t journal_file_entry_n_items(Object *o) {
787 assert(o);
7be3aa17 788 assert(o->object.type == OBJECT_ENTRY);
cec736d2
LP
789
790 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
791}
792
0284adc6 793uint64_t journal_file_entry_array_n_items(Object *o) {
de190aef 794 assert(o);
7be3aa17 795 assert(o->object.type == OBJECT_ENTRY_ARRAY);
de190aef
LP
796
797 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
798}
799
fb9a24b6
LP
800uint64_t journal_file_hash_table_n_items(Object *o) {
801 assert(o);
802 assert(o->object.type == OBJECT_DATA_HASH_TABLE ||
803 o->object.type == OBJECT_FIELD_HASH_TABLE);
804
805 return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
806}
807
de190aef 808static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
809 le64_t *first,
810 le64_t *idx,
de190aef 811 uint64_t p) {
cec736d2 812 int r;
de190aef
LP
813 uint64_t n = 0, ap = 0, q, i, a, hidx;
814 Object *o;
815
cec736d2 816 assert(f);
de190aef
LP
817 assert(first);
818 assert(idx);
819 assert(p > 0);
cec736d2 820
de190aef
LP
821 a = le64toh(*first);
822 i = hidx = le64toh(*idx);
823 while (a > 0) {
824
825 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
826 if (r < 0)
827 return r;
cec736d2 828
de190aef
LP
829 n = journal_file_entry_array_n_items(o);
830 if (i < n) {
831 o->entry_array.items[i] = htole64(p);
832 *idx = htole64(hidx + 1);
833 return 0;
834 }
cec736d2 835
de190aef
LP
836 i -= n;
837 ap = a;
838 a = le64toh(o->entry_array.next_entry_array_offset);
839 }
840
841 if (hidx > n)
842 n = (hidx+1) * 2;
843 else
844 n = n * 2;
845
846 if (n < 4)
847 n = 4;
848
849 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
850 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
851 &o, &q);
cec736d2
LP
852 if (r < 0)
853 return r;
854
b0af6f41
LP
855 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
856 if (r < 0)
857 return r;
858
de190aef 859 o->entry_array.items[i] = htole64(p);
cec736d2 860
de190aef 861 if (ap == 0)
7be3aa17 862 *first = htole64(q);
cec736d2 863 else {
de190aef 864 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
865 if (r < 0)
866 return r;
867
de190aef
LP
868 o->entry_array.next_entry_array_offset = htole64(q);
869 }
cec736d2 870
2dee23eb
LP
871 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
872 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
873
de190aef
LP
874 *idx = htole64(hidx + 1);
875
876 return 0;
877}
cec736d2 878
de190aef 879static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
880 le64_t *extra,
881 le64_t *first,
882 le64_t *idx,
de190aef
LP
883 uint64_t p) {
884
885 int r;
886
887 assert(f);
888 assert(extra);
889 assert(first);
890 assert(idx);
891 assert(p > 0);
892
893 if (*idx == 0)
894 *extra = htole64(p);
895 else {
4fd052ae 896 le64_t i;
de190aef 897
7be3aa17 898 i = htole64(le64toh(*idx) - 1);
de190aef
LP
899 r = link_entry_into_array(f, first, &i, p);
900 if (r < 0)
901 return r;
cec736d2
LP
902 }
903
de190aef
LP
904 *idx = htole64(le64toh(*idx) + 1);
905 return 0;
906}
907
908static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
909 uint64_t p;
910 int r;
911 assert(f);
912 assert(o);
913 assert(offset > 0);
914
915 p = le64toh(o->entry.items[i].object_offset);
916 if (p == 0)
917 return -EINVAL;
918
919 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
920 if (r < 0)
921 return r;
922
de190aef
LP
923 return link_entry_into_array_plus_one(f,
924 &o->data.entry_offset,
925 &o->data.entry_array_offset,
926 &o->data.n_entries,
927 offset);
cec736d2
LP
928}
929
930static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 931 uint64_t n, i;
cec736d2
LP
932 int r;
933
934 assert(f);
935 assert(o);
936 assert(offset > 0);
de190aef 937 assert(o->object.type == OBJECT_ENTRY);
cec736d2 938
b788cc23
LP
939 __sync_synchronize();
940
cec736d2 941 /* Link up the entry itself */
de190aef
LP
942 r = link_entry_into_array(f,
943 &f->header->entry_array_offset,
944 &f->header->n_entries,
945 offset);
946 if (r < 0)
947 return r;
cec736d2 948
aaf53376 949 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
cec736d2 950
de190aef 951 if (f->header->head_entry_realtime == 0)
0ac38b70 952 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 953
0ac38b70 954 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
955 f->header->tail_entry_monotonic = o->entry.monotonic;
956
957 f->tail_entry_monotonic_valid = true;
cec736d2
LP
958
959 /* Link up the items */
960 n = journal_file_entry_n_items(o);
961 for (i = 0; i < n; i++) {
962 r = journal_file_link_entry_item(f, o, offset, i);
963 if (r < 0)
964 return r;
965 }
966
cec736d2
LP
967 return 0;
968}
969
970static int journal_file_append_entry_internal(
971 JournalFile *f,
972 const dual_timestamp *ts,
973 uint64_t xor_hash,
974 const EntryItem items[], unsigned n_items,
de190aef 975 uint64_t *seqnum,
cec736d2
LP
976 Object **ret, uint64_t *offset) {
977 uint64_t np;
978 uint64_t osize;
979 Object *o;
980 int r;
981
982 assert(f);
983 assert(items || n_items == 0);
de190aef 984 assert(ts);
cec736d2
LP
985
986 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
987
de190aef 988 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
989 if (r < 0)
990 return r;
991
d98cc1f2 992 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
cec736d2 993 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
994 o->entry.realtime = htole64(ts->realtime);
995 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
996 o->entry.xor_hash = htole64(xor_hash);
997 o->entry.boot_id = f->header->boot_id;
998
b0af6f41
LP
999 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
1000 if (r < 0)
1001 return r;
1002
cec736d2
LP
1003 r = journal_file_link_entry(f, o, np);
1004 if (r < 0)
1005 return r;
1006
1007 if (ret)
1008 *ret = o;
1009
1010 if (offset)
1011 *offset = np;
1012
1013 return 0;
1014}
1015
cf244689 1016void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
1017 assert(f);
1018
1019 /* inotify() does not receive IN_MODIFY events from file
1020 * accesses done via mmap(). After each access we hence
1021 * trigger IN_MODIFY by truncating the journal file to its
1022 * current size which triggers IN_MODIFY. */
1023
bc85bfee
LP
1024 __sync_synchronize();
1025
50f20cfd
LP
1026 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1027 log_error("Failed to to truncate file to its own size: %m");
1028}
1029
de190aef 1030int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1031 unsigned i;
1032 EntryItem *items;
1033 int r;
1034 uint64_t xor_hash = 0;
de190aef 1035 struct dual_timestamp _ts;
cec736d2
LP
1036
1037 assert(f);
1038 assert(iovec || n_iovec == 0);
1039
de190aef
LP
1040 if (!f->writable)
1041 return -EPERM;
1042
1043 if (!ts) {
1044 dual_timestamp_get(&_ts);
1045 ts = &_ts;
1046 }
1047
1048 if (f->tail_entry_monotonic_valid &&
1049 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1050 return -EINVAL;
1051
7560fffc
LP
1052 r = journal_file_maybe_append_tag(f, ts->realtime);
1053 if (r < 0)
1054 return r;
1055
64825d3c
LP
1056 /* alloca() can't take 0, hence let's allocate at least one */
1057 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
cec736d2
LP
1058
1059 for (i = 0; i < n_iovec; i++) {
1060 uint64_t p;
1061 Object *o;
1062
1063 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1064 if (r < 0)
cf244689 1065 return r;
cec736d2
LP
1066
1067 xor_hash ^= le64toh(o->data.hash);
1068 items[i].object_offset = htole64(p);
de7b95cd 1069 items[i].hash = o->data.hash;
cec736d2
LP
1070 }
1071
de190aef 1072 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1073
50f20cfd
LP
1074 journal_file_post_change(f);
1075
cec736d2
LP
1076 return r;
1077}
1078
de190aef
LP
1079static int generic_array_get(JournalFile *f,
1080 uint64_t first,
1081 uint64_t i,
1082 Object **ret, uint64_t *offset) {
1083
cec736d2 1084 Object *o;
6c8a39b8 1085 uint64_t p = 0, a;
cec736d2
LP
1086 int r;
1087
1088 assert(f);
1089
de190aef
LP
1090 a = first;
1091 while (a > 0) {
1092 uint64_t n;
cec736d2 1093
de190aef
LP
1094 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1095 if (r < 0)
1096 return r;
cec736d2 1097
de190aef
LP
1098 n = journal_file_entry_array_n_items(o);
1099 if (i < n) {
1100 p = le64toh(o->entry_array.items[i]);
1101 break;
cec736d2
LP
1102 }
1103
de190aef
LP
1104 i -= n;
1105 a = le64toh(o->entry_array.next_entry_array_offset);
1106 }
1107
1108 if (a <= 0 || p <= 0)
1109 return 0;
1110
1111 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1112 if (r < 0)
1113 return r;
1114
1115 if (ret)
1116 *ret = o;
1117
1118 if (offset)
1119 *offset = p;
1120
1121 return 1;
1122}
1123
1124static int generic_array_get_plus_one(JournalFile *f,
1125 uint64_t extra,
1126 uint64_t first,
1127 uint64_t i,
1128 Object **ret, uint64_t *offset) {
1129
1130 Object *o;
1131
1132 assert(f);
1133
1134 if (i == 0) {
1135 int r;
1136
1137 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1138 if (r < 0)
1139 return r;
1140
de190aef
LP
1141 if (ret)
1142 *ret = o;
cec736d2 1143
de190aef
LP
1144 if (offset)
1145 *offset = extra;
cec736d2 1146
de190aef 1147 return 1;
cec736d2
LP
1148 }
1149
de190aef
LP
1150 return generic_array_get(f, first, i-1, ret, offset);
1151}
cec736d2 1152
de190aef
LP
1153enum {
1154 TEST_FOUND,
1155 TEST_LEFT,
1156 TEST_RIGHT
1157};
cec736d2 1158
de190aef
LP
1159static int generic_array_bisect(JournalFile *f,
1160 uint64_t first,
1161 uint64_t n,
1162 uint64_t needle,
1163 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1164 direction_t direction,
1165 Object **ret,
1166 uint64_t *offset,
1167 uint64_t *idx) {
1168
1169 uint64_t a, p, t = 0, i = 0, last_p = 0;
1170 bool subtract_one = false;
1171 Object *o, *array = NULL;
1172 int r;
cec736d2 1173
de190aef
LP
1174 assert(f);
1175 assert(test_object);
cec736d2 1176
de190aef
LP
1177 a = first;
1178 while (a > 0) {
1179 uint64_t left, right, k, lp;
1180
1181 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1182 if (r < 0)
1183 return r;
1184
de190aef
LP
1185 k = journal_file_entry_array_n_items(array);
1186 right = MIN(k, n);
1187 if (right <= 0)
1188 return 0;
cec736d2 1189
de190aef
LP
1190 i = right - 1;
1191 lp = p = le64toh(array->entry_array.items[i]);
1192 if (p <= 0)
1193 return -EBADMSG;
cec736d2 1194
de190aef
LP
1195 r = test_object(f, p, needle);
1196 if (r < 0)
1197 return r;
cec736d2 1198
de190aef
LP
1199 if (r == TEST_FOUND)
1200 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1201
1202 if (r == TEST_RIGHT) {
1203 left = 0;
1204 right -= 1;
1205 for (;;) {
1206 if (left == right) {
1207 if (direction == DIRECTION_UP)
1208 subtract_one = true;
1209
1210 i = left;
1211 goto found;
1212 }
1213
1214 assert(left < right);
1215
1216 i = (left + right) / 2;
1217 p = le64toh(array->entry_array.items[i]);
1218 if (p <= 0)
1219 return -EBADMSG;
1220
1221 r = test_object(f, p, needle);
1222 if (r < 0)
1223 return r;
cec736d2 1224
de190aef
LP
1225 if (r == TEST_FOUND)
1226 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1227
1228 if (r == TEST_RIGHT)
1229 right = i;
1230 else
1231 left = i + 1;
1232 }
1233 }
1234
cbdca852
LP
1235 if (k > n) {
1236 if (direction == DIRECTION_UP) {
1237 i = n;
1238 subtract_one = true;
1239 goto found;
1240 }
1241
cec736d2 1242 return 0;
cbdca852 1243 }
cec736d2 1244
de190aef
LP
1245 last_p = lp;
1246
1247 n -= k;
1248 t += k;
1249 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1250 }
1251
1252 return 0;
de190aef
LP
1253
1254found:
1255 if (subtract_one && t == 0 && i == 0)
1256 return 0;
1257
1258 if (subtract_one && i == 0)
1259 p = last_p;
1260 else if (subtract_one)
1261 p = le64toh(array->entry_array.items[i-1]);
1262 else
1263 p = le64toh(array->entry_array.items[i]);
1264
1265 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1266 if (r < 0)
1267 return r;
1268
1269 if (ret)
1270 *ret = o;
1271
1272 if (offset)
1273 *offset = p;
1274
1275 if (idx)
cbdca852 1276 *idx = t + i + (subtract_one ? -1 : 0);
de190aef
LP
1277
1278 return 1;
cec736d2
LP
1279}
1280
de190aef
LP
1281static int generic_array_bisect_plus_one(JournalFile *f,
1282 uint64_t extra,
1283 uint64_t first,
1284 uint64_t n,
1285 uint64_t needle,
1286 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1287 direction_t direction,
1288 Object **ret,
1289 uint64_t *offset,
1290 uint64_t *idx) {
1291
cec736d2 1292 int r;
cbdca852
LP
1293 bool step_back = false;
1294 Object *o;
cec736d2
LP
1295
1296 assert(f);
de190aef 1297 assert(test_object);
cec736d2 1298
de190aef
LP
1299 if (n <= 0)
1300 return 0;
cec736d2 1301
de190aef
LP
1302 /* This bisects the array in object 'first', but first checks
1303 * an extra */
de190aef
LP
1304 r = test_object(f, extra, needle);
1305 if (r < 0)
1306 return r;
a536e261
LP
1307
1308 if (r == TEST_FOUND)
1309 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1310
cbdca852
LP
1311 /* if we are looking with DIRECTION_UP then we need to first
1312 see if in the actual array there is a matching entry, and
1313 return the last one of that. But if there isn't any we need
1314 to return this one. Hence remember this, and return it
1315 below. */
1316 if (r == TEST_LEFT)
1317 step_back = direction == DIRECTION_UP;
de190aef 1318
cbdca852
LP
1319 if (r == TEST_RIGHT) {
1320 if (direction == DIRECTION_DOWN)
1321 goto found;
1322 else
1323 return 0;
a536e261 1324 }
cec736d2 1325
de190aef
LP
1326 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1327
cbdca852
LP
1328 if (r == 0 && step_back)
1329 goto found;
1330
ecf68b1d 1331 if (r > 0 && idx)
de190aef
LP
1332 (*idx) ++;
1333
1334 return r;
cbdca852
LP
1335
1336found:
1337 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1338 if (r < 0)
1339 return r;
1340
1341 if (ret)
1342 *ret = o;
1343
1344 if (offset)
1345 *offset = extra;
1346
1347 if (idx)
1348 *idx = 0;
1349
1350 return 1;
1351}
1352
1353static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1354 assert(f);
1355 assert(p > 0);
1356
1357 if (p == needle)
1358 return TEST_FOUND;
1359 else if (p < needle)
1360 return TEST_LEFT;
1361 else
1362 return TEST_RIGHT;
1363}
1364
1365int journal_file_move_to_entry_by_offset(
1366 JournalFile *f,
1367 uint64_t p,
1368 direction_t direction,
1369 Object **ret,
1370 uint64_t *offset) {
1371
1372 return generic_array_bisect(f,
1373 le64toh(f->header->entry_array_offset),
1374 le64toh(f->header->n_entries),
1375 p,
1376 test_object_offset,
1377 direction,
1378 ret, offset, NULL);
de190aef
LP
1379}
1380
cbdca852 1381
de190aef
LP
1382static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1383 Object *o;
1384 int r;
1385
1386 assert(f);
1387 assert(p > 0);
1388
1389 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1390 if (r < 0)
1391 return r;
1392
de190aef
LP
1393 if (le64toh(o->entry.seqnum) == needle)
1394 return TEST_FOUND;
1395 else if (le64toh(o->entry.seqnum) < needle)
1396 return TEST_LEFT;
1397 else
1398 return TEST_RIGHT;
1399}
cec736d2 1400
de190aef
LP
1401int journal_file_move_to_entry_by_seqnum(
1402 JournalFile *f,
1403 uint64_t seqnum,
1404 direction_t direction,
1405 Object **ret,
1406 uint64_t *offset) {
1407
1408 return generic_array_bisect(f,
1409 le64toh(f->header->entry_array_offset),
1410 le64toh(f->header->n_entries),
1411 seqnum,
1412 test_object_seqnum,
1413 direction,
1414 ret, offset, NULL);
1415}
cec736d2 1416
de190aef
LP
1417static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1418 Object *o;
1419 int r;
1420
1421 assert(f);
1422 assert(p > 0);
1423
1424 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1425 if (r < 0)
1426 return r;
1427
1428 if (le64toh(o->entry.realtime) == needle)
1429 return TEST_FOUND;
1430 else if (le64toh(o->entry.realtime) < needle)
1431 return TEST_LEFT;
1432 else
1433 return TEST_RIGHT;
cec736d2
LP
1434}
1435
de190aef
LP
1436int journal_file_move_to_entry_by_realtime(
1437 JournalFile *f,
1438 uint64_t realtime,
1439 direction_t direction,
1440 Object **ret,
1441 uint64_t *offset) {
1442
1443 return generic_array_bisect(f,
1444 le64toh(f->header->entry_array_offset),
1445 le64toh(f->header->n_entries),
1446 realtime,
1447 test_object_realtime,
1448 direction,
1449 ret, offset, NULL);
1450}
1451
1452static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1453 Object *o;
1454 int r;
1455
1456 assert(f);
1457 assert(p > 0);
1458
1459 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1460 if (r < 0)
1461 return r;
1462
1463 if (le64toh(o->entry.monotonic) == needle)
1464 return TEST_FOUND;
1465 else if (le64toh(o->entry.monotonic) < needle)
1466 return TEST_LEFT;
1467 else
1468 return TEST_RIGHT;
1469}
1470
1471int journal_file_move_to_entry_by_monotonic(
1472 JournalFile *f,
1473 sd_id128_t boot_id,
1474 uint64_t monotonic,
1475 direction_t direction,
1476 Object **ret,
1477 uint64_t *offset) {
1478
10b6f904 1479 char t[9+32+1] = "_BOOT_ID=";
de190aef
LP
1480 Object *o;
1481 int r;
1482
cbdca852 1483 assert(f);
de190aef 1484
cbdca852 1485 sd_id128_to_string(boot_id, t + 9);
de190aef
LP
1486 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1487 if (r < 0)
1488 return r;
cbdca852 1489 if (r == 0)
de190aef
LP
1490 return -ENOENT;
1491
1492 return generic_array_bisect_plus_one(f,
1493 le64toh(o->data.entry_offset),
1494 le64toh(o->data.entry_array_offset),
1495 le64toh(o->data.n_entries),
1496 monotonic,
1497 test_object_monotonic,
1498 direction,
1499 ret, offset, NULL);
1500}
1501
de190aef
LP
1502int journal_file_next_entry(
1503 JournalFile *f,
1504 Object *o, uint64_t p,
1505 direction_t direction,
1506 Object **ret, uint64_t *offset) {
1507
1508 uint64_t i, n;
cec736d2
LP
1509 int r;
1510
1511 assert(f);
de190aef
LP
1512 assert(p > 0 || !o);
1513
1514 n = le64toh(f->header->n_entries);
1515 if (n <= 0)
1516 return 0;
cec736d2
LP
1517
1518 if (!o)
de190aef 1519 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1520 else {
de190aef 1521 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1522 return -EINVAL;
1523
de190aef
LP
1524 r = generic_array_bisect(f,
1525 le64toh(f->header->entry_array_offset),
1526 le64toh(f->header->n_entries),
1527 p,
1528 test_object_offset,
1529 DIRECTION_DOWN,
1530 NULL, NULL,
1531 &i);
1532 if (r <= 0)
1533 return r;
1534
1535 if (direction == DIRECTION_DOWN) {
1536 if (i >= n - 1)
1537 return 0;
1538
1539 i++;
1540 } else {
1541 if (i <= 0)
1542 return 0;
1543
1544 i--;
1545 }
cec736d2
LP
1546 }
1547
de190aef
LP
1548 /* And jump to it */
1549 return generic_array_get(f,
1550 le64toh(f->header->entry_array_offset),
1551 i,
1552 ret, offset);
1553}
cec736d2 1554
de190aef
LP
1555int journal_file_skip_entry(
1556 JournalFile *f,
1557 Object *o, uint64_t p,
1558 int64_t skip,
1559 Object **ret, uint64_t *offset) {
1560
1561 uint64_t i, n;
1562 int r;
1563
1564 assert(f);
1565 assert(o);
1566 assert(p > 0);
1567
1568 if (o->object.type != OBJECT_ENTRY)
1569 return -EINVAL;
1570
1571 r = generic_array_bisect(f,
1572 le64toh(f->header->entry_array_offset),
1573 le64toh(f->header->n_entries),
1574 p,
1575 test_object_offset,
1576 DIRECTION_DOWN,
1577 NULL, NULL,
1578 &i);
1579 if (r <= 0)
cec736d2
LP
1580 return r;
1581
de190aef
LP
1582 /* Calculate new index */
1583 if (skip < 0) {
1584 if ((uint64_t) -skip >= i)
1585 i = 0;
1586 else
1587 i = i - (uint64_t) -skip;
1588 } else
1589 i += (uint64_t) skip;
cec736d2 1590
de190aef
LP
1591 n = le64toh(f->header->n_entries);
1592 if (n <= 0)
1593 return -EBADMSG;
cec736d2 1594
de190aef
LP
1595 if (i >= n)
1596 i = n-1;
1597
1598 return generic_array_get(f,
1599 le64toh(f->header->entry_array_offset),
1600 i,
1601 ret, offset);
cec736d2
LP
1602}
1603
de190aef
LP
1604int journal_file_next_entry_for_data(
1605 JournalFile *f,
1606 Object *o, uint64_t p,
1607 uint64_t data_offset,
1608 direction_t direction,
1609 Object **ret, uint64_t *offset) {
1610
1611 uint64_t n, i;
cec736d2 1612 int r;
de190aef 1613 Object *d;
cec736d2
LP
1614
1615 assert(f);
de190aef 1616 assert(p > 0 || !o);
cec736d2 1617
de190aef 1618 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 1619 if (r < 0)
de190aef 1620 return r;
cec736d2 1621
de190aef
LP
1622 n = le64toh(d->data.n_entries);
1623 if (n <= 0)
1624 return n;
cec736d2 1625
de190aef
LP
1626 if (!o)
1627 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1628 else {
1629 if (o->object.type != OBJECT_ENTRY)
1630 return -EINVAL;
cec736d2 1631
de190aef
LP
1632 r = generic_array_bisect_plus_one(f,
1633 le64toh(d->data.entry_offset),
1634 le64toh(d->data.entry_array_offset),
1635 le64toh(d->data.n_entries),
1636 p,
1637 test_object_offset,
1638 DIRECTION_DOWN,
1639 NULL, NULL,
1640 &i);
1641
1642 if (r <= 0)
cec736d2
LP
1643 return r;
1644
de190aef
LP
1645 if (direction == DIRECTION_DOWN) {
1646 if (i >= n - 1)
1647 return 0;
cec736d2 1648
de190aef
LP
1649 i++;
1650 } else {
1651 if (i <= 0)
1652 return 0;
cec736d2 1653
de190aef
LP
1654 i--;
1655 }
cec736d2 1656
de190aef 1657 }
cec736d2 1658
de190aef
LP
1659 return generic_array_get_plus_one(f,
1660 le64toh(d->data.entry_offset),
1661 le64toh(d->data.entry_array_offset),
1662 i,
1663 ret, offset);
1664}
cec736d2 1665
cbdca852
LP
1666int journal_file_move_to_entry_by_offset_for_data(
1667 JournalFile *f,
1668 uint64_t data_offset,
1669 uint64_t p,
1670 direction_t direction,
1671 Object **ret, uint64_t *offset) {
1672
1673 int r;
1674 Object *d;
1675
1676 assert(f);
1677
1678 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1679 if (r < 0)
1680 return r;
1681
1682 return generic_array_bisect_plus_one(f,
1683 le64toh(d->data.entry_offset),
1684 le64toh(d->data.entry_array_offset),
1685 le64toh(d->data.n_entries),
1686 p,
1687 test_object_offset,
1688 direction,
1689 ret, offset, NULL);
1690}
1691
1692int journal_file_move_to_entry_by_monotonic_for_data(
1693 JournalFile *f,
1694 uint64_t data_offset,
1695 sd_id128_t boot_id,
1696 uint64_t monotonic,
1697 direction_t direction,
1698 Object **ret, uint64_t *offset) {
1699
1700 char t[9+32+1] = "_BOOT_ID=";
1701 Object *o, *d;
1702 int r;
1703 uint64_t b, z;
1704
1705 assert(f);
1706
1707 /* First, seek by time */
1708 sd_id128_to_string(boot_id, t + 9);
1709 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1710 if (r < 0)
1711 return r;
1712 if (r == 0)
1713 return -ENOENT;
1714
1715 r = generic_array_bisect_plus_one(f,
1716 le64toh(o->data.entry_offset),
1717 le64toh(o->data.entry_array_offset),
1718 le64toh(o->data.n_entries),
1719 monotonic,
1720 test_object_monotonic,
1721 direction,
1722 NULL, &z, NULL);
1723 if (r <= 0)
1724 return r;
1725
1726 /* And now, continue seeking until we find an entry that
1727 * exists in both bisection arrays */
1728
1729 for (;;) {
1730 Object *qo;
1731 uint64_t p, q;
1732
1733 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1734 if (r < 0)
1735 return r;
1736
1737 r = generic_array_bisect_plus_one(f,
1738 le64toh(d->data.entry_offset),
1739 le64toh(d->data.entry_array_offset),
1740 le64toh(d->data.n_entries),
1741 z,
1742 test_object_offset,
1743 direction,
1744 NULL, &p, NULL);
1745 if (r <= 0)
1746 return r;
1747
1748 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1749 if (r < 0)
1750 return r;
1751
1752 r = generic_array_bisect_plus_one(f,
1753 le64toh(o->data.entry_offset),
1754 le64toh(o->data.entry_array_offset),
1755 le64toh(o->data.n_entries),
1756 p,
1757 test_object_offset,
1758 direction,
1759 &qo, &q, NULL);
1760
1761 if (r <= 0)
1762 return r;
1763
1764 if (p == q) {
1765 if (ret)
1766 *ret = qo;
1767 if (offset)
1768 *offset = q;
1769
1770 return 1;
1771 }
1772
1773 z = q;
1774 }
1775
1776 return 0;
1777}
1778
de190aef
LP
1779int journal_file_move_to_entry_by_seqnum_for_data(
1780 JournalFile *f,
1781 uint64_t data_offset,
1782 uint64_t seqnum,
1783 direction_t direction,
1784 Object **ret, uint64_t *offset) {
cec736d2 1785
de190aef
LP
1786 Object *d;
1787 int r;
cec736d2 1788
91a31dde
LP
1789 assert(f);
1790
de190aef 1791 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1792 if (r < 0)
de190aef 1793 return r;
cec736d2 1794
de190aef
LP
1795 return generic_array_bisect_plus_one(f,
1796 le64toh(d->data.entry_offset),
1797 le64toh(d->data.entry_array_offset),
1798 le64toh(d->data.n_entries),
1799 seqnum,
1800 test_object_seqnum,
1801 direction,
1802 ret, offset, NULL);
1803}
cec736d2 1804
de190aef
LP
1805int journal_file_move_to_entry_by_realtime_for_data(
1806 JournalFile *f,
1807 uint64_t data_offset,
1808 uint64_t realtime,
1809 direction_t direction,
1810 Object **ret, uint64_t *offset) {
1811
1812 Object *d;
1813 int r;
1814
91a31dde
LP
1815 assert(f);
1816
de190aef 1817 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1818 if (r < 0)
de190aef
LP
1819 return r;
1820
1821 return generic_array_bisect_plus_one(f,
1822 le64toh(d->data.entry_offset),
1823 le64toh(d->data.entry_array_offset),
1824 le64toh(d->data.n_entries),
1825 realtime,
1826 test_object_realtime,
1827 direction,
1828 ret, offset, NULL);
cec736d2
LP
1829}
1830
0284adc6 1831void journal_file_dump(JournalFile *f) {
7560fffc 1832 Object *o;
7560fffc 1833 int r;
0284adc6 1834 uint64_t p;
7560fffc
LP
1835
1836 assert(f);
1837
0284adc6 1838 journal_file_print_header(f);
7560fffc 1839
0284adc6
LP
1840 p = le64toh(f->header->header_size);
1841 while (p != 0) {
1842 r = journal_file_move_to_object(f, -1, p, &o);
1843 if (r < 0)
1844 goto fail;
7560fffc 1845
0284adc6 1846 switch (o->object.type) {
d98cc1f2 1847
0284adc6
LP
1848 case OBJECT_UNUSED:
1849 printf("Type: OBJECT_UNUSED\n");
1850 break;
d98cc1f2 1851
0284adc6
LP
1852 case OBJECT_DATA:
1853 printf("Type: OBJECT_DATA\n");
1854 break;
7560fffc 1855
0284adc6 1856 case OBJECT_ENTRY:
f7fab8a5 1857 printf("Type: OBJECT_ENTRY seqnum=%llu monotonic=%llu realtime=%llu\n",
0284adc6
LP
1858 (unsigned long long) le64toh(o->entry.seqnum),
1859 (unsigned long long) le64toh(o->entry.monotonic),
1860 (unsigned long long) le64toh(o->entry.realtime));
1861 break;
7560fffc 1862
0284adc6
LP
1863 case OBJECT_FIELD_HASH_TABLE:
1864 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1865 break;
7560fffc 1866
0284adc6
LP
1867 case OBJECT_DATA_HASH_TABLE:
1868 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1869 break;
7560fffc 1870
0284adc6
LP
1871 case OBJECT_ENTRY_ARRAY:
1872 printf("Type: OBJECT_ENTRY_ARRAY\n");
1873 break;
7560fffc 1874
0284adc6 1875 case OBJECT_TAG:
f7fab8a5
LP
1876 printf("Type: OBJECT_TAG seqnum=%llu epoch=%llu\n",
1877 (unsigned long long) le64toh(o->tag.seqnum),
1878 (unsigned long long) le64toh(o->tag.epoch));
0284adc6
LP
1879 break;
1880 }
7560fffc 1881
0284adc6
LP
1882 if (o->object.flags & OBJECT_COMPRESSED)
1883 printf("Flags: COMPRESSED\n");
7560fffc 1884
0284adc6
LP
1885 if (p == le64toh(f->header->tail_object_offset))
1886 p = 0;
1887 else
1888 p = p + ALIGN64(le64toh(o->object.size));
1889 }
7560fffc 1890
0284adc6
LP
1891 return;
1892fail:
1893 log_error("File corrupt");
7560fffc
LP
1894}
1895
0284adc6
LP
1896void journal_file_print_header(JournalFile *f) {
1897 char a[33], b[33], c[33];
1898 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
7560fffc
LP
1899
1900 assert(f);
7560fffc 1901
0284adc6
LP
1902 printf("File Path: %s\n"
1903 "File ID: %s\n"
1904 "Machine ID: %s\n"
1905 "Boot ID: %s\n"
1906 "Sequential Number ID: %s\n"
1907 "State: %s\n"
1908 "Compatible Flags:%s%s\n"
1909 "Incompatible Flags:%s%s\n"
1910 "Header size: %llu\n"
1911 "Arena size: %llu\n"
1912 "Data Hash Table Size: %llu\n"
1913 "Field Hash Table Size: %llu\n"
0284adc6
LP
1914 "Rotate Suggested: %s\n"
1915 "Head Sequential Number: %llu\n"
1916 "Tail Sequential Number: %llu\n"
1917 "Head Realtime Timestamp: %s\n"
3223f44f
LP
1918 "Tail Realtime Timestamp: %s\n"
1919 "Objects: %llu\n"
1920 "Entry Objects: %llu\n",
0284adc6
LP
1921 f->path,
1922 sd_id128_to_string(f->header->file_id, a),
1923 sd_id128_to_string(f->header->machine_id, b),
1924 sd_id128_to_string(f->header->boot_id, c),
1925 sd_id128_to_string(f->header->seqnum_id, c),
3223f44f
LP
1926 f->header->state == STATE_OFFLINE ? "OFFLINE" :
1927 f->header->state == STATE_ONLINE ? "ONLINE" :
1928 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
8088cbd3
LP
1929 JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
1930 (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
1931 JOURNAL_HEADER_COMPRESSED(f->header) ? " COMPRESSED" : "",
1932 (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
0284adc6
LP
1933 (unsigned long long) le64toh(f->header->header_size),
1934 (unsigned long long) le64toh(f->header->arena_size),
1935 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1936 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
0284adc6
LP
1937 yes_no(journal_file_rotate_suggested(f)),
1938 (unsigned long long) le64toh(f->header->head_entry_seqnum),
1939 (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1940 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
3223f44f
LP
1941 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1942 (unsigned long long) le64toh(f->header->n_objects),
1943 (unsigned long long) le64toh(f->header->n_entries));
7560fffc 1944
0284adc6
LP
1945 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1946 printf("Data Objects: %llu\n"
1947 "Data Hash Table Fill: %.1f%%\n",
1948 (unsigned long long) le64toh(f->header->n_data),
1949 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
7560fffc 1950
0284adc6
LP
1951 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1952 printf("Field Objects: %llu\n"
1953 "Field Hash Table Fill: %.1f%%\n",
1954 (unsigned long long) le64toh(f->header->n_fields),
1955 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
3223f44f
LP
1956
1957 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
1958 printf("Tag Objects: %llu\n",
1959 (unsigned long long) le64toh(f->header->n_tags));
1960 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1961 printf("Entry Array Objects: %llu\n",
1962 (unsigned long long) le64toh(f->header->n_entry_arrays));
7560fffc
LP
1963}
1964
0284adc6
LP
1965int journal_file_open(
1966 const char *fname,
1967 int flags,
1968 mode_t mode,
1969 bool compress,
baed47c3 1970 bool seal,
0284adc6
LP
1971 JournalMetrics *metrics,
1972 MMapCache *mmap_cache,
1973 JournalFile *template,
1974 JournalFile **ret) {
7560fffc 1975
0284adc6
LP
1976 JournalFile *f;
1977 int r;
1978 bool newly_created = false;
7560fffc 1979
0284adc6 1980 assert(fname);
7560fffc 1981
0284adc6
LP
1982 if ((flags & O_ACCMODE) != O_RDONLY &&
1983 (flags & O_ACCMODE) != O_RDWR)
1984 return -EINVAL;
7560fffc 1985
a0108012
LP
1986 if (!endswith(fname, ".journal") &&
1987 !endswith(fname, ".journal~"))
0284adc6 1988 return -EINVAL;
7560fffc 1989
0284adc6
LP
1990 f = new0(JournalFile, 1);
1991 if (!f)
1992 return -ENOMEM;
7560fffc 1993
0284adc6
LP
1994 f->fd = -1;
1995 f->mode = mode;
7560fffc 1996
0284adc6
LP
1997 f->flags = flags;
1998 f->prot = prot_from_flags(flags);
1999 f->writable = (flags & O_ACCMODE) != O_RDONLY;
2000 f->compress = compress;
baed47c3 2001 f->seal = seal;
7560fffc 2002
0284adc6
LP
2003 if (mmap_cache)
2004 f->mmap = mmap_cache_ref(mmap_cache);
2005 else {
84168d80 2006 f->mmap = mmap_cache_new();
0284adc6
LP
2007 if (!f->mmap) {
2008 r = -ENOMEM;
2009 goto fail;
2010 }
2011 }
7560fffc 2012
0284adc6
LP
2013 f->path = strdup(fname);
2014 if (!f->path) {
2015 r = -ENOMEM;
2016 goto fail;
2017 }
7560fffc 2018
0284adc6
LP
2019 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2020 if (f->fd < 0) {
2021 r = -errno;
2022 goto fail;
7560fffc 2023 }
7560fffc 2024
0284adc6
LP
2025 if (fstat(f->fd, &f->last_stat) < 0) {
2026 r = -errno;
2027 goto fail;
2028 }
7560fffc 2029
0284adc6
LP
2030 if (f->last_stat.st_size == 0 && f->writable) {
2031 newly_created = true;
7560fffc 2032
0284adc6 2033 /* Try to load the FSPRG state, and if we can't, then
baed47c3
LP
2034 * just don't do sealing */
2035 r = journal_file_fss_load(f);
0284adc6 2036 if (r < 0)
baed47c3 2037 f->seal = false;
7560fffc 2038
0284adc6
LP
2039 r = journal_file_init_header(f, template);
2040 if (r < 0)
2041 goto fail;
7560fffc 2042
0284adc6
LP
2043 if (fstat(f->fd, &f->last_stat) < 0) {
2044 r = -errno;
2045 goto fail;
2046 }
2047 }
7560fffc 2048
0284adc6
LP
2049 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2050 r = -EIO;
2051 goto fail;
2052 }
7560fffc 2053
0284adc6
LP
2054 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2055 if (f->header == MAP_FAILED) {
2056 f->header = NULL;
2057 r = -errno;
2058 goto fail;
2059 }
7560fffc 2060
0284adc6
LP
2061 if (!newly_created) {
2062 r = journal_file_verify_header(f);
2063 if (r < 0)
2064 goto fail;
2065 }
7560fffc 2066
0284adc6 2067 if (!newly_created && f->writable) {
baed47c3 2068 r = journal_file_fss_load(f);
0284adc6
LP
2069 if (r < 0)
2070 goto fail;
2071 }
cec736d2
LP
2072
2073 if (f->writable) {
4a92baf3
LP
2074 if (metrics) {
2075 journal_default_metrics(metrics, f->fd);
2076 f->metrics = *metrics;
2077 } else if (template)
2078 f->metrics = template->metrics;
2079
cec736d2
LP
2080 r = journal_file_refresh_header(f);
2081 if (r < 0)
2082 goto fail;
2083 }
2084
baed47c3 2085 r = journal_file_hmac_setup(f);
14d10188
LP
2086 if (r < 0)
2087 goto fail;
2088
cec736d2 2089 if (newly_created) {
de190aef 2090 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
2091 if (r < 0)
2092 goto fail;
2093
de190aef 2094 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
2095 if (r < 0)
2096 goto fail;
7560fffc
LP
2097
2098 r = journal_file_append_first_tag(f);
2099 if (r < 0)
2100 goto fail;
cec736d2
LP
2101 }
2102
de190aef 2103 r = journal_file_map_field_hash_table(f);
cec736d2
LP
2104 if (r < 0)
2105 goto fail;
2106
de190aef 2107 r = journal_file_map_data_hash_table(f);
cec736d2
LP
2108 if (r < 0)
2109 goto fail;
2110
2111 if (ret)
2112 *ret = f;
2113
2114 return 0;
2115
2116fail:
2117 journal_file_close(f);
2118
2119 return r;
2120}
0ac38b70 2121
baed47c3 2122int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
0ac38b70
LP
2123 char *p;
2124 size_t l;
2125 JournalFile *old_file, *new_file = NULL;
2126 int r;
2127
2128 assert(f);
2129 assert(*f);
2130
2131 old_file = *f;
2132
2133 if (!old_file->writable)
2134 return -EINVAL;
2135
2136 if (!endswith(old_file->path, ".journal"))
2137 return -EINVAL;
2138
2139 l = strlen(old_file->path);
2140
9447a7f1 2141 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
0ac38b70
LP
2142 if (!p)
2143 return -ENOMEM;
2144
2145 memcpy(p, old_file->path, l - 8);
2146 p[l-8] = '@';
2147 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2148 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2149 "-%016llx-%016llx.journal",
beec0085 2150 (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
0ac38b70
LP
2151 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2152
2153 r = rename(old_file->path, p);
2154 free(p);
2155
2156 if (r < 0)
2157 return -errno;
2158
ccdbaf91 2159 old_file->header->state = STATE_ARCHIVED;
0ac38b70 2160
baed47c3 2161 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
0ac38b70
LP
2162 journal_file_close(old_file);
2163
2164 *f = new_file;
2165 return r;
2166}
2167
9447a7f1
LP
2168int journal_file_open_reliably(
2169 const char *fname,
2170 int flags,
2171 mode_t mode,
7560fffc 2172 bool compress,
baed47c3 2173 bool seal,
4a92baf3 2174 JournalMetrics *metrics,
27370278 2175 MMapCache *mmap_cache,
9447a7f1
LP
2176 JournalFile *template,
2177 JournalFile **ret) {
2178
2179 int r;
2180 size_t l;
2181 char *p;
2182
baed47c3 2183 r = journal_file_open(fname, flags, mode, compress, seal,
27370278 2184 metrics, mmap_cache, template, ret);
0071d9f1
LP
2185 if (r != -EBADMSG && /* corrupted */
2186 r != -ENODATA && /* truncated */
2187 r != -EHOSTDOWN && /* other machine */
a1a1898f
LP
2188 r != -EPROTONOSUPPORT && /* incompatible feature */
2189 r != -EBUSY && /* unclean shutdown */
2190 r != -ESHUTDOWN /* already archived */)
9447a7f1
LP
2191 return r;
2192
2193 if ((flags & O_ACCMODE) == O_RDONLY)
2194 return r;
2195
2196 if (!(flags & O_CREAT))
2197 return r;
2198
7560fffc
LP
2199 if (!endswith(fname, ".journal"))
2200 return r;
2201
5c70eab4
LP
2202 /* The file is corrupted. Rotate it away and try it again (but only once) */
2203
9447a7f1
LP
2204 l = strlen(fname);
2205 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2206 (int) (l-8), fname,
2207 (unsigned long long) now(CLOCK_REALTIME),
2208 random_ull()) < 0)
2209 return -ENOMEM;
2210
2211 r = rename(fname, p);
2212 free(p);
2213 if (r < 0)
2214 return -errno;
2215
a1a1898f 2216 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
9447a7f1 2217
baed47c3 2218 return journal_file_open(fname, flags, mode, compress, seal,
27370278 2219 metrics, mmap_cache, template, ret);
9447a7f1
LP
2220}
2221
cf244689
LP
2222
2223int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2224 uint64_t i, n;
2225 uint64_t q, xor_hash = 0;
2226 int r;
2227 EntryItem *items;
2228 dual_timestamp ts;
2229
2230 assert(from);
2231 assert(to);
2232 assert(o);
2233 assert(p);
2234
2235 if (!to->writable)
2236 return -EPERM;
2237
2238 ts.monotonic = le64toh(o->entry.monotonic);
2239 ts.realtime = le64toh(o->entry.realtime);
2240
2241 if (to->tail_entry_monotonic_valid &&
2242 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2243 return -EINVAL;
2244
cf244689
LP
2245 n = journal_file_entry_n_items(o);
2246 items = alloca(sizeof(EntryItem) * n);
2247
2248 for (i = 0; i < n; i++) {
4fd052ae
FC
2249 uint64_t l, h;
2250 le64_t le_hash;
cf244689
LP
2251 size_t t;
2252 void *data;
2253 Object *u;
2254
2255 q = le64toh(o->entry.items[i].object_offset);
2256 le_hash = o->entry.items[i].hash;
2257
2258 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2259 if (r < 0)
2260 return r;
2261
2262 if (le_hash != o->data.hash)
2263 return -EBADMSG;
2264
2265 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2266 t = (size_t) l;
2267
2268 /* We hit the limit on 32bit machines */
2269 if ((uint64_t) t != l)
2270 return -E2BIG;
2271
2272 if (o->object.flags & OBJECT_COMPRESSED) {
2273#ifdef HAVE_XZ
2274 uint64_t rsize;
2275
2276 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2277 return -EBADMSG;
2278
2279 data = from->compress_buffer;
2280 l = rsize;
2281#else
2282 return -EPROTONOSUPPORT;
2283#endif
2284 } else
2285 data = o->data.payload;
2286
2287 r = journal_file_append_data(to, data, l, &u, &h);
2288 if (r < 0)
2289 return r;
2290
2291 xor_hash ^= le64toh(u->data.hash);
2292 items[i].object_offset = htole64(h);
2293 items[i].hash = u->data.hash;
2294
2295 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2296 if (r < 0)
2297 return r;
2298 }
2299
2300 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2301}
babfc091
LP
2302
2303void journal_default_metrics(JournalMetrics *m, int fd) {
2304 uint64_t fs_size = 0;
2305 struct statvfs ss;
a7bc2c2a 2306 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2307
2308 assert(m);
2309 assert(fd >= 0);
2310
2311 if (fstatvfs(fd, &ss) >= 0)
2312 fs_size = ss.f_frsize * ss.f_blocks;
2313
2314 if (m->max_use == (uint64_t) -1) {
2315
2316 if (fs_size > 0) {
2317 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2318
2319 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2320 m->max_use = DEFAULT_MAX_USE_UPPER;
2321
2322 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2323 m->max_use = DEFAULT_MAX_USE_LOWER;
2324 } else
2325 m->max_use = DEFAULT_MAX_USE_LOWER;
2326 } else {
2327 m->max_use = PAGE_ALIGN(m->max_use);
2328
2329 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2330 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2331 }
2332
2333 if (m->max_size == (uint64_t) -1) {
2334 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2335
2336 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2337 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2338 } else
2339 m->max_size = PAGE_ALIGN(m->max_size);
2340
2341 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2342 m->max_size = JOURNAL_FILE_SIZE_MIN;
2343
2344 if (m->max_size*2 > m->max_use)
2345 m->max_use = m->max_size*2;
2346
2347 if (m->min_size == (uint64_t) -1)
2348 m->min_size = JOURNAL_FILE_SIZE_MIN;
2349 else {
2350 m->min_size = PAGE_ALIGN(m->min_size);
2351
2352 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2353 m->min_size = JOURNAL_FILE_SIZE_MIN;
2354
2355 if (m->min_size > m->max_size)
2356 m->max_size = m->min_size;
2357 }
2358
2359 if (m->keep_free == (uint64_t) -1) {
2360
2361 if (fs_size > 0) {
2362 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2363
2364 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2365 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2366
2367 } else
2368 m->keep_free = DEFAULT_KEEP_FREE;
2369 }
2370
e7bf07b3
LP
2371 log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2372 format_bytes(a, sizeof(a), m->max_use),
2373 format_bytes(b, sizeof(b), m->max_size),
2374 format_bytes(c, sizeof(c), m->min_size),
2375 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2376}
08984293
LP
2377
2378int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
08984293
LP
2379 assert(f);
2380 assert(from || to);
2381
2382 if (from) {
162566a4
LP
2383 if (f->header->head_entry_realtime == 0)
2384 return -ENOENT;
08984293 2385
162566a4 2386 *from = le64toh(f->header->head_entry_realtime);
08984293
LP
2387 }
2388
2389 if (to) {
162566a4
LP
2390 if (f->header->tail_entry_realtime == 0)
2391 return -ENOENT;
08984293 2392
162566a4 2393 *to = le64toh(f->header->tail_entry_realtime);
08984293
LP
2394 }
2395
2396 return 1;
2397}
2398
2399int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2400 char t[9+32+1] = "_BOOT_ID=";
2401 Object *o;
2402 uint64_t p;
2403 int r;
2404
2405 assert(f);
2406 assert(from || to);
2407
2408 sd_id128_to_string(boot_id, t + 9);
2409
2410 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2411 if (r <= 0)
2412 return r;
2413
2414 if (le64toh(o->data.n_entries) <= 0)
2415 return 0;
2416
2417 if (from) {
2418 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2419 if (r < 0)
2420 return r;
2421
2422 *from = le64toh(o->entry.monotonic);
2423 }
2424
2425 if (to) {
2426 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2427 if (r < 0)
2428 return r;
2429
2430 r = generic_array_get_plus_one(f,
2431 le64toh(o->data.entry_offset),
2432 le64toh(o->data.entry_array_offset),
2433 le64toh(o->data.n_entries)-1,
2434 &o, NULL);
2435 if (r <= 0)
2436 return r;
2437
2438 *to = le64toh(o->entry.monotonic);
2439 }
2440
2441 return 1;
2442}
dca6219e
LP
2443
2444bool journal_file_rotate_suggested(JournalFile *f) {
2445 assert(f);
2446
2447 /* If we gained new header fields we gained new features,
2448 * hence suggest a rotation */
361f9cbc
LP
2449 if (le64toh(f->header->header_size) < sizeof(Header)) {
2450 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
dca6219e 2451 return true;
361f9cbc 2452 }
dca6219e
LP
2453
2454 /* Let's check if the hash tables grew over a certain fill
2455 * level (75%, borrowing this value from Java's hash table
2456 * implementation), and if so suggest a rotation. To calculate
2457 * the fill level we need the n_data field, which only exists
2458 * in newer versions. */
2459
2460 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
361f9cbc
LP
2461 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2462 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2463 f->path,
2464 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2465 (unsigned long long) le64toh(f->header->n_data),
2466 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2467 (unsigned long long) (f->last_stat.st_size),
2468 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
dca6219e 2469 return true;
361f9cbc 2470 }
dca6219e
LP
2471
2472 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
361f9cbc
LP
2473 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2474 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2475 f->path,
2476 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2477 (unsigned long long) le64toh(f->header->n_fields),
2478 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
dca6219e 2479 return true;
361f9cbc 2480 }
dca6219e
LP
2481
2482 return false;
2483}