]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
journal: be fine with opening rotated/corrupted journal files
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
30#include "journal-def.h"
31#include "journal-file.h"
0284adc6 32#include "journal-authenticate.h"
cec736d2 33#include "lookup3.h"
807e17f0 34#include "compress.h"
7560fffc 35#include "fsprg.h"
cec736d2 36
4a92baf3
LP
37#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
cec736d2 39
be19b7df 40#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 41
babfc091 42/* This is the minimum journal file size */
b47ffcfd 43#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
babfc091
LP
44
45/* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
49
50/* This is the upper bound if we deduce max_size from max_use */
71100051 51#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
52
53/* This is the upper bound if we deduce the keep_free value from the
54 * file system size */
55#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57/* This is the keep_free value when we can't determine the system
58 * size */
59#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
60
dca6219e
LP
61/* n_data was the first entry we added after the initial file format design */
62#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
cec736d2 63
cec736d2 64void journal_file_close(JournalFile *f) {
de190aef 65 assert(f);
cec736d2 66
b0af6f41 67 /* Write the final tag */
baed47c3 68 if (f->seal)
b0af6f41
LP
69 journal_file_append_tag(f);
70
7560fffc 71 /* Sync everything to disk, before we mark the file offline */
16e9f408
LP
72 if (f->mmap && f->fd >= 0)
73 mmap_cache_close_fd(f->mmap, f->fd);
7560fffc
LP
74
75 if (f->writable && f->fd >= 0)
76 fdatasync(f->fd);
77
d384c7a8 78 if (f->header) {
cd96b3b8
LP
79 /* Mark the file offline. Don't override the archived state if it already is set */
80 if (f->writable && f->header->state == STATE_ONLINE)
d384c7a8 81 f->header->state = STATE_OFFLINE;
cec736d2 82
d384c7a8
MS
83 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
84 }
cec736d2 85
0ac38b70
LP
86 if (f->fd >= 0)
87 close_nointr_nofail(f->fd);
88
cec736d2 89 free(f->path);
807e17f0 90
16e9f408
LP
91 if (f->mmap)
92 mmap_cache_unref(f->mmap);
93
807e17f0
LP
94#ifdef HAVE_XZ
95 free(f->compress_buffer);
96#endif
97
7560fffc 98#ifdef HAVE_GCRYPT
baed47c3
LP
99 if (f->fss_file)
100 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
b7c9ae91
LP
101 else if (f->fsprg_state)
102 free(f->fsprg_state);
103
104 free(f->fsprg_seed);
7560fffc
LP
105
106 if (f->hmac)
107 gcry_md_close(f->hmac);
108#endif
109
cec736d2
LP
110 free(f);
111}
112
0ac38b70 113static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
114 Header h;
115 ssize_t k;
116 int r;
117
118 assert(f);
119
120 zero(h);
7560fffc 121 memcpy(h.signature, HEADER_SIGNATURE, 8);
23b0b2b2 122 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2 123
7560fffc
LP
124 h.incompatible_flags =
125 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
126
127 h.compatible_flags =
baed47c3 128 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
7560fffc 129
cec736d2
LP
130 r = sd_id128_randomize(&h.file_id);
131 if (r < 0)
132 return r;
133
0ac38b70
LP
134 if (template) {
135 h.seqnum_id = template->header->seqnum_id;
beec0085 136 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
0ac38b70
LP
137 } else
138 h.seqnum_id = h.file_id;
cec736d2
LP
139
140 k = pwrite(f->fd, &h, sizeof(h), 0);
141 if (k < 0)
142 return -errno;
143
144 if (k != sizeof(h))
145 return -EIO;
146
147 return 0;
148}
149
150static int journal_file_refresh_header(JournalFile *f) {
151 int r;
de190aef 152 sd_id128_t boot_id;
cec736d2
LP
153
154 assert(f);
155
156 r = sd_id128_get_machine(&f->header->machine_id);
157 if (r < 0)
158 return r;
159
de190aef 160 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
161 if (r < 0)
162 return r;
163
de190aef
LP
164 if (sd_id128_equal(boot_id, f->header->boot_id))
165 f->tail_entry_monotonic_valid = true;
166
167 f->header->boot_id = boot_id;
168
169 f->header->state = STATE_ONLINE;
b788cc23 170
7560fffc
LP
171 /* Sync the online state to disk */
172 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
173 fdatasync(f->fd);
b788cc23 174
cec736d2
LP
175 return 0;
176}
177
178static int journal_file_verify_header(JournalFile *f) {
179 assert(f);
180
7560fffc 181 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
cec736d2
LP
182 return -EBADMSG;
183
7560fffc
LP
184 /* In both read and write mode we refuse to open files with
185 * incompatible flags we don't know */
807e17f0 186#ifdef HAVE_XZ
7560fffc 187 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
807e17f0
LP
188 return -EPROTONOSUPPORT;
189#else
cec736d2
LP
190 if (f->header->incompatible_flags != 0)
191 return -EPROTONOSUPPORT;
807e17f0 192#endif
cec736d2 193
7560fffc
LP
194 /* When open for writing we refuse to open files with
195 * compatible flags, too */
196 if (f->writable) {
197#ifdef HAVE_GCRYPT
baed47c3 198 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
7560fffc
LP
199 return -EPROTONOSUPPORT;
200#else
201 if (f->header->compatible_flags != 0)
202 return -EPROTONOSUPPORT;
203#endif
204 }
205
dca6219e
LP
206 /* The first addition was n_data, so check that we are at least this large */
207 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
23b0b2b2
LP
208 return -EBADMSG;
209
baed47c3
LP
210 if ((le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_SEALED) &&
211 !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
beec0085
LP
212 return -EBADMSG;
213
23b0b2b2 214 if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
cec736d2
LP
215 return -ENODATA;
216
217 if (f->writable) {
ccdbaf91 218 uint8_t state;
cec736d2
LP
219 sd_id128_t machine_id;
220 int r;
221
222 r = sd_id128_get_machine(&machine_id);
223 if (r < 0)
224 return r;
225
226 if (!sd_id128_equal(machine_id, f->header->machine_id))
227 return -EHOSTDOWN;
228
de190aef 229 state = f->header->state;
cec736d2 230
71fa6f00
LP
231 if (state == STATE_ONLINE) {
232 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
233 return -EBUSY;
234 } else if (state == STATE_ARCHIVED)
cec736d2 235 return -ESHUTDOWN;
71fa6f00
LP
236 else if (state != STATE_OFFLINE) {
237 log_debug("Journal file %s has unknown state %u.", f->path, state);
238 return -EBUSY;
239 }
cec736d2
LP
240 }
241
7560fffc 242 f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
baed47c3 243 f->seal = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_SEALED);
7560fffc 244
cec736d2
LP
245 return 0;
246}
247
248static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
cec736d2 249 uint64_t old_size, new_size;
fec2aa2f 250 int r;
cec736d2
LP
251
252 assert(f);
253
cec736d2 254 /* We assume that this file is not sparse, and we know that
38ac38b2 255 * for sure, since we always call posix_fallocate()
cec736d2
LP
256 * ourselves */
257
258 old_size =
23b0b2b2 259 le64toh(f->header->header_size) +
cec736d2
LP
260 le64toh(f->header->arena_size);
261
bc85bfee 262 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
263 if (new_size < le64toh(f->header->header_size))
264 new_size = le64toh(f->header->header_size);
bc85bfee
LP
265
266 if (new_size <= old_size)
cec736d2
LP
267 return 0;
268
bc85bfee
LP
269 if (f->metrics.max_size > 0 &&
270 new_size > f->metrics.max_size)
271 return -E2BIG;
cec736d2 272
bc85bfee
LP
273 if (new_size > f->metrics.min_size &&
274 f->metrics.keep_free > 0) {
cec736d2
LP
275 struct statvfs svfs;
276
277 if (fstatvfs(f->fd, &svfs) >= 0) {
278 uint64_t available;
279
280 available = svfs.f_bfree * svfs.f_bsize;
281
bc85bfee
LP
282 if (available >= f->metrics.keep_free)
283 available -= f->metrics.keep_free;
cec736d2
LP
284 else
285 available = 0;
286
287 if (new_size - old_size > available)
288 return -E2BIG;
289 }
290 }
291
bc85bfee
LP
292 /* Note that the glibc fallocate() fallback is very
293 inefficient, hence we try to minimize the allocation area
294 as we can. */
fec2aa2f
GV
295 r = posix_fallocate(f->fd, old_size, new_size - old_size);
296 if (r != 0)
297 return -r;
cec736d2 298
f65425cb
LP
299 mmap_cache_close_fd_range(f->mmap, f->fd, old_size);
300
cec736d2
LP
301 if (fstat(f->fd, &f->last_stat) < 0)
302 return -errno;
303
23b0b2b2 304 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
305
306 return 0;
307}
308
16e9f408 309static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, uint64_t size, void **ret) {
cec736d2 310 assert(f);
cec736d2
LP
311 assert(ret);
312
2a59ea54 313 /* Avoid SIGBUS on invalid accesses */
4bbdcdb3
LP
314 if (offset + size > (uint64_t) f->last_stat.st_size) {
315 /* Hmm, out of range? Let's refresh the fstat() data
316 * first, before we trust that check. */
317
318 if (fstat(f->fd, &f->last_stat) < 0 ||
319 offset + size > (uint64_t) f->last_stat.st_size)
320 return -EADDRNOTAVAIL;
321 }
322
16e9f408 323 return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
cec736d2
LP
324}
325
16e9f408
LP
326static uint64_t minimum_header_size(Object *o) {
327
328 static uint64_t table[] = {
329 [OBJECT_DATA] = sizeof(DataObject),
330 [OBJECT_FIELD] = sizeof(FieldObject),
331 [OBJECT_ENTRY] = sizeof(EntryObject),
332 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
333 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
334 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
335 [OBJECT_TAG] = sizeof(TagObject),
336 };
337
338 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
339 return sizeof(ObjectHeader);
340
341 return table[o->object.type];
342}
343
de190aef 344int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
345 int r;
346 void *t;
347 Object *o;
348 uint64_t s;
16e9f408 349 unsigned context;
cec736d2
LP
350
351 assert(f);
352 assert(ret);
353
16e9f408
LP
354 /* One context for each type, plus one catch-all for the rest */
355 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
356
357 r = journal_file_move_to(f, context, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
358 if (r < 0)
359 return r;
360
361 o = (Object*) t;
362 s = le64toh(o->object.size);
363
364 if (s < sizeof(ObjectHeader))
365 return -EBADMSG;
366
16e9f408
LP
367 if (o->object.type <= OBJECT_UNUSED)
368 return -EBADMSG;
369
370 if (s < minimum_header_size(o))
371 return -EBADMSG;
372
de190aef 373 if (type >= 0 && o->object.type != type)
cec736d2
LP
374 return -EBADMSG;
375
376 if (s > sizeof(ObjectHeader)) {
de190aef 377 r = journal_file_move_to(f, o->object.type, offset, s, &t);
cec736d2
LP
378 if (r < 0)
379 return r;
380
381 o = (Object*) t;
382 }
383
cec736d2
LP
384 *ret = o;
385 return 0;
386}
387
d98cc1f2 388static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
389 uint64_t r;
390
391 assert(f);
392
beec0085 393 r = le64toh(f->header->tail_entry_seqnum) + 1;
c2373f84
LP
394
395 if (seqnum) {
de190aef 396 /* If an external seqnum counter was passed, we update
c2373f84
LP
397 * both the local and the external one, and set it to
398 * the maximum of both */
399
400 if (*seqnum + 1 > r)
401 r = *seqnum + 1;
402
403 *seqnum = r;
404 }
405
beec0085 406 f->header->tail_entry_seqnum = htole64(r);
cec736d2 407
beec0085
LP
408 if (f->header->head_entry_seqnum == 0)
409 f->header->head_entry_seqnum = htole64(r);
de190aef 410
cec736d2
LP
411 return r;
412}
413
0284adc6 414int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
415 int r;
416 uint64_t p;
417 Object *tail, *o;
418 void *t;
419
420 assert(f);
16e9f408 421 assert(type > 0 && type < _OBJECT_TYPE_MAX);
cec736d2
LP
422 assert(size >= sizeof(ObjectHeader));
423 assert(offset);
424 assert(ret);
425
426 p = le64toh(f->header->tail_object_offset);
cec736d2 427 if (p == 0)
23b0b2b2 428 p = le64toh(f->header->header_size);
cec736d2 429 else {
de190aef 430 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
431 if (r < 0)
432 return r;
433
434 p += ALIGN64(le64toh(tail->object.size));
435 }
436
437 r = journal_file_allocate(f, p, size);
438 if (r < 0)
439 return r;
440
de190aef 441 r = journal_file_move_to(f, type, p, size, &t);
cec736d2
LP
442 if (r < 0)
443 return r;
444
445 o = (Object*) t;
446
447 zero(o->object);
de190aef 448 o->object.type = type;
cec736d2
LP
449 o->object.size = htole64(size);
450
451 f->header->tail_object_offset = htole64(p);
cec736d2
LP
452 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
453
454 *ret = o;
455 *offset = p;
456
457 return 0;
458}
459
de190aef 460static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
461 uint64_t s, p;
462 Object *o;
463 int r;
464
465 assert(f);
466
dfabe643 467 /* We estimate that we need 1 hash table entry per 768 of
4a92baf3
LP
468 journal file and we want to make sure we never get beyond
469 75% fill level. Calculate the hash table size for the
470 maximum file size based on these metrics. */
471
dfabe643 472 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
4a92baf3
LP
473 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
474 s = DEFAULT_DATA_HASH_TABLE_SIZE;
475
dfabe643 476 log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
4a92baf3 477
de190aef
LP
478 r = journal_file_append_object(f,
479 OBJECT_DATA_HASH_TABLE,
480 offsetof(Object, hash_table.items) + s,
481 &o, &p);
cec736d2
LP
482 if (r < 0)
483 return r;
484
de190aef 485 memset(o->hash_table.items, 0, s);
cec736d2 486
de190aef
LP
487 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
488 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
489
490 return 0;
491}
492
de190aef 493static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
494 uint64_t s, p;
495 Object *o;
496 int r;
497
498 assert(f);
499
de190aef
LP
500 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
501 r = journal_file_append_object(f,
502 OBJECT_FIELD_HASH_TABLE,
503 offsetof(Object, hash_table.items) + s,
504 &o, &p);
cec736d2
LP
505 if (r < 0)
506 return r;
507
de190aef 508 memset(o->hash_table.items, 0, s);
cec736d2 509
de190aef
LP
510 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
511 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
512
513 return 0;
514}
515
de190aef 516static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
517 uint64_t s, p;
518 void *t;
519 int r;
520
521 assert(f);
522
de190aef
LP
523 p = le64toh(f->header->data_hash_table_offset);
524 s = le64toh(f->header->data_hash_table_size);
cec736d2 525
de190aef 526 r = journal_file_move_to(f,
16e9f408 527 OBJECT_DATA_HASH_TABLE,
de190aef
LP
528 p, s,
529 &t);
cec736d2
LP
530 if (r < 0)
531 return r;
532
de190aef 533 f->data_hash_table = t;
cec736d2
LP
534 return 0;
535}
536
de190aef 537static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
538 uint64_t s, p;
539 void *t;
540 int r;
541
542 assert(f);
543
de190aef
LP
544 p = le64toh(f->header->field_hash_table_offset);
545 s = le64toh(f->header->field_hash_table_size);
cec736d2 546
de190aef 547 r = journal_file_move_to(f,
16e9f408 548 OBJECT_FIELD_HASH_TABLE,
de190aef
LP
549 p, s,
550 &t);
cec736d2
LP
551 if (r < 0)
552 return r;
553
de190aef 554 f->field_hash_table = t;
cec736d2
LP
555 return 0;
556}
557
de190aef
LP
558static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
559 uint64_t p, h;
cec736d2
LP
560 int r;
561
562 assert(f);
563 assert(o);
564 assert(offset > 0);
de190aef 565 assert(o->object.type == OBJECT_DATA);
cec736d2 566
48496df6
LP
567 /* This might alter the window we are looking at */
568
de190aef
LP
569 o->data.next_hash_offset = o->data.next_field_offset = 0;
570 o->data.entry_offset = o->data.entry_array_offset = 0;
571 o->data.n_entries = 0;
cec736d2 572
de190aef 573 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
8db4213e 574 p = le64toh(f->data_hash_table[h].tail_hash_offset);
cec736d2
LP
575 if (p == 0) {
576 /* Only entry in the hash table is easy */
de190aef 577 f->data_hash_table[h].head_hash_offset = htole64(offset);
cec736d2 578 } else {
48496df6
LP
579 /* Move back to the previous data object, to patch in
580 * pointer */
cec736d2 581
de190aef 582 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
583 if (r < 0)
584 return r;
585
de190aef 586 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
587 }
588
de190aef 589 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2 590
dca6219e
LP
591 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
592 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
593
cec736d2
LP
594 return 0;
595}
596
de190aef
LP
597int journal_file_find_data_object_with_hash(
598 JournalFile *f,
599 const void *data, uint64_t size, uint64_t hash,
600 Object **ret, uint64_t *offset) {
48496df6 601
de190aef 602 uint64_t p, osize, h;
cec736d2
LP
603 int r;
604
605 assert(f);
606 assert(data || size == 0);
607
608 osize = offsetof(Object, data.payload) + size;
609
bc85bfee
LP
610 if (f->header->data_hash_table_size == 0)
611 return -EBADMSG;
612
de190aef
LP
613 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
614 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 615
de190aef
LP
616 while (p > 0) {
617 Object *o;
cec736d2 618
de190aef 619 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
620 if (r < 0)
621 return r;
622
807e17f0 623 if (le64toh(o->data.hash) != hash)
85a131e8 624 goto next;
807e17f0
LP
625
626 if (o->object.flags & OBJECT_COMPRESSED) {
627#ifdef HAVE_XZ
b785c858 628 uint64_t l, rsize;
cec736d2 629
807e17f0
LP
630 l = le64toh(o->object.size);
631 if (l <= offsetof(Object, data.payload))
cec736d2
LP
632 return -EBADMSG;
633
807e17f0
LP
634 l -= offsetof(Object, data.payload);
635
636 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
637 return -EBADMSG;
638
b785c858 639 if (rsize == size &&
807e17f0
LP
640 memcmp(f->compress_buffer, data, size) == 0) {
641
642 if (ret)
643 *ret = o;
644
645 if (offset)
646 *offset = p;
647
648 return 1;
649 }
650#else
651 return -EPROTONOSUPPORT;
652#endif
653
654 } else if (le64toh(o->object.size) == osize &&
655 memcmp(o->data.payload, data, size) == 0) {
656
cec736d2
LP
657 if (ret)
658 *ret = o;
659
660 if (offset)
661 *offset = p;
662
de190aef 663 return 1;
cec736d2
LP
664 }
665
85a131e8 666 next:
cec736d2
LP
667 p = le64toh(o->data.next_hash_offset);
668 }
669
de190aef
LP
670 return 0;
671}
672
673int journal_file_find_data_object(
674 JournalFile *f,
675 const void *data, uint64_t size,
676 Object **ret, uint64_t *offset) {
677
678 uint64_t hash;
679
680 assert(f);
681 assert(data || size == 0);
682
683 hash = hash64(data, size);
684
685 return journal_file_find_data_object_with_hash(f,
686 data, size, hash,
687 ret, offset);
688}
689
48496df6
LP
690static int journal_file_append_data(
691 JournalFile *f,
692 const void *data, uint64_t size,
693 Object **ret, uint64_t *offset) {
694
de190aef
LP
695 uint64_t hash, p;
696 uint64_t osize;
697 Object *o;
698 int r;
807e17f0 699 bool compressed = false;
de190aef
LP
700
701 assert(f);
702 assert(data || size == 0);
703
704 hash = hash64(data, size);
705
706 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
707 if (r < 0)
708 return r;
709 else if (r > 0) {
710
711 if (ret)
712 *ret = o;
713
714 if (offset)
715 *offset = p;
716
717 return 0;
718 }
719
720 osize = offsetof(Object, data.payload) + size;
721 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
722 if (r < 0)
723 return r;
724
cec736d2 725 o->data.hash = htole64(hash);
807e17f0
LP
726
727#ifdef HAVE_XZ
728 if (f->compress &&
729 size >= COMPRESSION_SIZE_THRESHOLD) {
730 uint64_t rsize;
731
732 compressed = compress_blob(data, size, o->data.payload, &rsize);
733
734 if (compressed) {
735 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
736 o->object.flags |= OBJECT_COMPRESSED;
737
807e17f0
LP
738 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
739 }
740 }
741#endif
742
64825d3c 743 if (!compressed && size > 0)
807e17f0 744 memcpy(o->data.payload, data, size);
cec736d2 745
de190aef 746 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
747 if (r < 0)
748 return r;
749
b0af6f41
LP
750 r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
751 if (r < 0)
752 return r;
753
48496df6
LP
754 /* The linking might have altered the window, so let's
755 * refresh our pointer */
756 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
757 if (r < 0)
758 return r;
759
cec736d2
LP
760 if (ret)
761 *ret = o;
762
763 if (offset)
de190aef 764 *offset = p;
cec736d2
LP
765
766 return 0;
767}
768
769uint64_t journal_file_entry_n_items(Object *o) {
770 assert(o);
7be3aa17 771 assert(o->object.type == OBJECT_ENTRY);
cec736d2
LP
772
773 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
774}
775
0284adc6 776uint64_t journal_file_entry_array_n_items(Object *o) {
de190aef 777 assert(o);
7be3aa17 778 assert(o->object.type == OBJECT_ENTRY_ARRAY);
de190aef
LP
779
780 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
781}
782
783static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
784 le64_t *first,
785 le64_t *idx,
de190aef 786 uint64_t p) {
cec736d2 787 int r;
de190aef
LP
788 uint64_t n = 0, ap = 0, q, i, a, hidx;
789 Object *o;
790
cec736d2 791 assert(f);
de190aef
LP
792 assert(first);
793 assert(idx);
794 assert(p > 0);
cec736d2 795
de190aef
LP
796 a = le64toh(*first);
797 i = hidx = le64toh(*idx);
798 while (a > 0) {
799
800 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
801 if (r < 0)
802 return r;
cec736d2 803
de190aef
LP
804 n = journal_file_entry_array_n_items(o);
805 if (i < n) {
806 o->entry_array.items[i] = htole64(p);
807 *idx = htole64(hidx + 1);
808 return 0;
809 }
cec736d2 810
de190aef
LP
811 i -= n;
812 ap = a;
813 a = le64toh(o->entry_array.next_entry_array_offset);
814 }
815
816 if (hidx > n)
817 n = (hidx+1) * 2;
818 else
819 n = n * 2;
820
821 if (n < 4)
822 n = 4;
823
824 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
825 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
826 &o, &q);
cec736d2
LP
827 if (r < 0)
828 return r;
829
b0af6f41
LP
830 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
831 if (r < 0)
832 return r;
833
de190aef 834 o->entry_array.items[i] = htole64(p);
cec736d2 835
de190aef 836 if (ap == 0)
7be3aa17 837 *first = htole64(q);
cec736d2 838 else {
de190aef 839 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
840 if (r < 0)
841 return r;
842
de190aef
LP
843 o->entry_array.next_entry_array_offset = htole64(q);
844 }
cec736d2 845
2dee23eb
LP
846 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
847 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
848
de190aef
LP
849 *idx = htole64(hidx + 1);
850
851 return 0;
852}
cec736d2 853
de190aef 854static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
855 le64_t *extra,
856 le64_t *first,
857 le64_t *idx,
de190aef
LP
858 uint64_t p) {
859
860 int r;
861
862 assert(f);
863 assert(extra);
864 assert(first);
865 assert(idx);
866 assert(p > 0);
867
868 if (*idx == 0)
869 *extra = htole64(p);
870 else {
4fd052ae 871 le64_t i;
de190aef 872
7be3aa17 873 i = htole64(le64toh(*idx) - 1);
de190aef
LP
874 r = link_entry_into_array(f, first, &i, p);
875 if (r < 0)
876 return r;
cec736d2
LP
877 }
878
de190aef
LP
879 *idx = htole64(le64toh(*idx) + 1);
880 return 0;
881}
882
883static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
884 uint64_t p;
885 int r;
886 assert(f);
887 assert(o);
888 assert(offset > 0);
889
890 p = le64toh(o->entry.items[i].object_offset);
891 if (p == 0)
892 return -EINVAL;
893
894 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
895 if (r < 0)
896 return r;
897
de190aef
LP
898 return link_entry_into_array_plus_one(f,
899 &o->data.entry_offset,
900 &o->data.entry_array_offset,
901 &o->data.n_entries,
902 offset);
cec736d2
LP
903}
904
905static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 906 uint64_t n, i;
cec736d2
LP
907 int r;
908
909 assert(f);
910 assert(o);
911 assert(offset > 0);
de190aef 912 assert(o->object.type == OBJECT_ENTRY);
cec736d2 913
b788cc23
LP
914 __sync_synchronize();
915
cec736d2 916 /* Link up the entry itself */
de190aef
LP
917 r = link_entry_into_array(f,
918 &f->header->entry_array_offset,
919 &f->header->n_entries,
920 offset);
921 if (r < 0)
922 return r;
cec736d2 923
aaf53376 924 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
cec736d2 925
de190aef 926 if (f->header->head_entry_realtime == 0)
0ac38b70 927 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 928
0ac38b70 929 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
930 f->header->tail_entry_monotonic = o->entry.monotonic;
931
932 f->tail_entry_monotonic_valid = true;
cec736d2
LP
933
934 /* Link up the items */
935 n = journal_file_entry_n_items(o);
936 for (i = 0; i < n; i++) {
937 r = journal_file_link_entry_item(f, o, offset, i);
938 if (r < 0)
939 return r;
940 }
941
cec736d2
LP
942 return 0;
943}
944
945static int journal_file_append_entry_internal(
946 JournalFile *f,
947 const dual_timestamp *ts,
948 uint64_t xor_hash,
949 const EntryItem items[], unsigned n_items,
de190aef 950 uint64_t *seqnum,
cec736d2
LP
951 Object **ret, uint64_t *offset) {
952 uint64_t np;
953 uint64_t osize;
954 Object *o;
955 int r;
956
957 assert(f);
958 assert(items || n_items == 0);
de190aef 959 assert(ts);
cec736d2
LP
960
961 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
962
de190aef 963 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
964 if (r < 0)
965 return r;
966
d98cc1f2 967 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
cec736d2 968 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
969 o->entry.realtime = htole64(ts->realtime);
970 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
971 o->entry.xor_hash = htole64(xor_hash);
972 o->entry.boot_id = f->header->boot_id;
973
b0af6f41
LP
974 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
975 if (r < 0)
976 return r;
977
cec736d2
LP
978 r = journal_file_link_entry(f, o, np);
979 if (r < 0)
980 return r;
981
982 if (ret)
983 *ret = o;
984
985 if (offset)
986 *offset = np;
987
988 return 0;
989}
990
cf244689 991void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
992 assert(f);
993
994 /* inotify() does not receive IN_MODIFY events from file
995 * accesses done via mmap(). After each access we hence
996 * trigger IN_MODIFY by truncating the journal file to its
997 * current size which triggers IN_MODIFY. */
998
bc85bfee
LP
999 __sync_synchronize();
1000
50f20cfd
LP
1001 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1002 log_error("Failed to to truncate file to its own size: %m");
1003}
1004
de190aef 1005int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1006 unsigned i;
1007 EntryItem *items;
1008 int r;
1009 uint64_t xor_hash = 0;
de190aef 1010 struct dual_timestamp _ts;
cec736d2
LP
1011
1012 assert(f);
1013 assert(iovec || n_iovec == 0);
1014
de190aef
LP
1015 if (!f->writable)
1016 return -EPERM;
1017
1018 if (!ts) {
1019 dual_timestamp_get(&_ts);
1020 ts = &_ts;
1021 }
1022
1023 if (f->tail_entry_monotonic_valid &&
1024 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1025 return -EINVAL;
1026
7560fffc
LP
1027 r = journal_file_maybe_append_tag(f, ts->realtime);
1028 if (r < 0)
1029 return r;
1030
64825d3c
LP
1031 /* alloca() can't take 0, hence let's allocate at least one */
1032 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
cec736d2
LP
1033
1034 for (i = 0; i < n_iovec; i++) {
1035 uint64_t p;
1036 Object *o;
1037
1038 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1039 if (r < 0)
cf244689 1040 return r;
cec736d2
LP
1041
1042 xor_hash ^= le64toh(o->data.hash);
1043 items[i].object_offset = htole64(p);
de7b95cd 1044 items[i].hash = o->data.hash;
cec736d2
LP
1045 }
1046
de190aef 1047 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1048
50f20cfd
LP
1049 journal_file_post_change(f);
1050
cec736d2
LP
1051 return r;
1052}
1053
de190aef
LP
1054static int generic_array_get(JournalFile *f,
1055 uint64_t first,
1056 uint64_t i,
1057 Object **ret, uint64_t *offset) {
1058
cec736d2 1059 Object *o;
6c8a39b8 1060 uint64_t p = 0, a;
cec736d2
LP
1061 int r;
1062
1063 assert(f);
1064
de190aef
LP
1065 a = first;
1066 while (a > 0) {
1067 uint64_t n;
cec736d2 1068
de190aef
LP
1069 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1070 if (r < 0)
1071 return r;
cec736d2 1072
de190aef
LP
1073 n = journal_file_entry_array_n_items(o);
1074 if (i < n) {
1075 p = le64toh(o->entry_array.items[i]);
1076 break;
cec736d2
LP
1077 }
1078
de190aef
LP
1079 i -= n;
1080 a = le64toh(o->entry_array.next_entry_array_offset);
1081 }
1082
1083 if (a <= 0 || p <= 0)
1084 return 0;
1085
1086 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1087 if (r < 0)
1088 return r;
1089
1090 if (ret)
1091 *ret = o;
1092
1093 if (offset)
1094 *offset = p;
1095
1096 return 1;
1097}
1098
1099static int generic_array_get_plus_one(JournalFile *f,
1100 uint64_t extra,
1101 uint64_t first,
1102 uint64_t i,
1103 Object **ret, uint64_t *offset) {
1104
1105 Object *o;
1106
1107 assert(f);
1108
1109 if (i == 0) {
1110 int r;
1111
1112 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1113 if (r < 0)
1114 return r;
1115
de190aef
LP
1116 if (ret)
1117 *ret = o;
cec736d2 1118
de190aef
LP
1119 if (offset)
1120 *offset = extra;
cec736d2 1121
de190aef 1122 return 1;
cec736d2
LP
1123 }
1124
de190aef
LP
1125 return generic_array_get(f, first, i-1, ret, offset);
1126}
cec736d2 1127
de190aef
LP
1128enum {
1129 TEST_FOUND,
1130 TEST_LEFT,
1131 TEST_RIGHT
1132};
cec736d2 1133
de190aef
LP
1134static int generic_array_bisect(JournalFile *f,
1135 uint64_t first,
1136 uint64_t n,
1137 uint64_t needle,
1138 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1139 direction_t direction,
1140 Object **ret,
1141 uint64_t *offset,
1142 uint64_t *idx) {
1143
1144 uint64_t a, p, t = 0, i = 0, last_p = 0;
1145 bool subtract_one = false;
1146 Object *o, *array = NULL;
1147 int r;
cec736d2 1148
de190aef
LP
1149 assert(f);
1150 assert(test_object);
cec736d2 1151
de190aef
LP
1152 a = first;
1153 while (a > 0) {
1154 uint64_t left, right, k, lp;
1155
1156 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1157 if (r < 0)
1158 return r;
1159
de190aef
LP
1160 k = journal_file_entry_array_n_items(array);
1161 right = MIN(k, n);
1162 if (right <= 0)
1163 return 0;
cec736d2 1164
de190aef
LP
1165 i = right - 1;
1166 lp = p = le64toh(array->entry_array.items[i]);
1167 if (p <= 0)
1168 return -EBADMSG;
cec736d2 1169
de190aef
LP
1170 r = test_object(f, p, needle);
1171 if (r < 0)
1172 return r;
cec736d2 1173
de190aef
LP
1174 if (r == TEST_FOUND)
1175 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1176
1177 if (r == TEST_RIGHT) {
1178 left = 0;
1179 right -= 1;
1180 for (;;) {
1181 if (left == right) {
1182 if (direction == DIRECTION_UP)
1183 subtract_one = true;
1184
1185 i = left;
1186 goto found;
1187 }
1188
1189 assert(left < right);
1190
1191 i = (left + right) / 2;
1192 p = le64toh(array->entry_array.items[i]);
1193 if (p <= 0)
1194 return -EBADMSG;
1195
1196 r = test_object(f, p, needle);
1197 if (r < 0)
1198 return r;
cec736d2 1199
de190aef
LP
1200 if (r == TEST_FOUND)
1201 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1202
1203 if (r == TEST_RIGHT)
1204 right = i;
1205 else
1206 left = i + 1;
1207 }
1208 }
1209
cbdca852
LP
1210 if (k > n) {
1211 if (direction == DIRECTION_UP) {
1212 i = n;
1213 subtract_one = true;
1214 goto found;
1215 }
1216
cec736d2 1217 return 0;
cbdca852 1218 }
cec736d2 1219
de190aef
LP
1220 last_p = lp;
1221
1222 n -= k;
1223 t += k;
1224 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1225 }
1226
1227 return 0;
de190aef
LP
1228
1229found:
1230 if (subtract_one && t == 0 && i == 0)
1231 return 0;
1232
1233 if (subtract_one && i == 0)
1234 p = last_p;
1235 else if (subtract_one)
1236 p = le64toh(array->entry_array.items[i-1]);
1237 else
1238 p = le64toh(array->entry_array.items[i]);
1239
1240 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1241 if (r < 0)
1242 return r;
1243
1244 if (ret)
1245 *ret = o;
1246
1247 if (offset)
1248 *offset = p;
1249
1250 if (idx)
cbdca852 1251 *idx = t + i + (subtract_one ? -1 : 0);
de190aef
LP
1252
1253 return 1;
cec736d2
LP
1254}
1255
de190aef
LP
1256static int generic_array_bisect_plus_one(JournalFile *f,
1257 uint64_t extra,
1258 uint64_t first,
1259 uint64_t n,
1260 uint64_t needle,
1261 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1262 direction_t direction,
1263 Object **ret,
1264 uint64_t *offset,
1265 uint64_t *idx) {
1266
cec736d2 1267 int r;
cbdca852
LP
1268 bool step_back = false;
1269 Object *o;
cec736d2
LP
1270
1271 assert(f);
de190aef 1272 assert(test_object);
cec736d2 1273
de190aef
LP
1274 if (n <= 0)
1275 return 0;
cec736d2 1276
de190aef
LP
1277 /* This bisects the array in object 'first', but first checks
1278 * an extra */
de190aef
LP
1279 r = test_object(f, extra, needle);
1280 if (r < 0)
1281 return r;
a536e261
LP
1282
1283 if (r == TEST_FOUND)
1284 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1285
cbdca852
LP
1286 /* if we are looking with DIRECTION_UP then we need to first
1287 see if in the actual array there is a matching entry, and
1288 return the last one of that. But if there isn't any we need
1289 to return this one. Hence remember this, and return it
1290 below. */
1291 if (r == TEST_LEFT)
1292 step_back = direction == DIRECTION_UP;
de190aef 1293
cbdca852
LP
1294 if (r == TEST_RIGHT) {
1295 if (direction == DIRECTION_DOWN)
1296 goto found;
1297 else
1298 return 0;
a536e261 1299 }
cec736d2 1300
de190aef
LP
1301 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1302
cbdca852
LP
1303 if (r == 0 && step_back)
1304 goto found;
1305
ecf68b1d 1306 if (r > 0 && idx)
de190aef
LP
1307 (*idx) ++;
1308
1309 return r;
cbdca852
LP
1310
1311found:
1312 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1313 if (r < 0)
1314 return r;
1315
1316 if (ret)
1317 *ret = o;
1318
1319 if (offset)
1320 *offset = extra;
1321
1322 if (idx)
1323 *idx = 0;
1324
1325 return 1;
1326}
1327
1328static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1329 assert(f);
1330 assert(p > 0);
1331
1332 if (p == needle)
1333 return TEST_FOUND;
1334 else if (p < needle)
1335 return TEST_LEFT;
1336 else
1337 return TEST_RIGHT;
1338}
1339
1340int journal_file_move_to_entry_by_offset(
1341 JournalFile *f,
1342 uint64_t p,
1343 direction_t direction,
1344 Object **ret,
1345 uint64_t *offset) {
1346
1347 return generic_array_bisect(f,
1348 le64toh(f->header->entry_array_offset),
1349 le64toh(f->header->n_entries),
1350 p,
1351 test_object_offset,
1352 direction,
1353 ret, offset, NULL);
de190aef
LP
1354}
1355
cbdca852 1356
de190aef
LP
1357static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1358 Object *o;
1359 int r;
1360
1361 assert(f);
1362 assert(p > 0);
1363
1364 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1365 if (r < 0)
1366 return r;
1367
de190aef
LP
1368 if (le64toh(o->entry.seqnum) == needle)
1369 return TEST_FOUND;
1370 else if (le64toh(o->entry.seqnum) < needle)
1371 return TEST_LEFT;
1372 else
1373 return TEST_RIGHT;
1374}
cec736d2 1375
de190aef
LP
1376int journal_file_move_to_entry_by_seqnum(
1377 JournalFile *f,
1378 uint64_t seqnum,
1379 direction_t direction,
1380 Object **ret,
1381 uint64_t *offset) {
1382
1383 return generic_array_bisect(f,
1384 le64toh(f->header->entry_array_offset),
1385 le64toh(f->header->n_entries),
1386 seqnum,
1387 test_object_seqnum,
1388 direction,
1389 ret, offset, NULL);
1390}
cec736d2 1391
de190aef
LP
1392static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1393 Object *o;
1394 int r;
1395
1396 assert(f);
1397 assert(p > 0);
1398
1399 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1400 if (r < 0)
1401 return r;
1402
1403 if (le64toh(o->entry.realtime) == needle)
1404 return TEST_FOUND;
1405 else if (le64toh(o->entry.realtime) < needle)
1406 return TEST_LEFT;
1407 else
1408 return TEST_RIGHT;
cec736d2
LP
1409}
1410
de190aef
LP
1411int journal_file_move_to_entry_by_realtime(
1412 JournalFile *f,
1413 uint64_t realtime,
1414 direction_t direction,
1415 Object **ret,
1416 uint64_t *offset) {
1417
1418 return generic_array_bisect(f,
1419 le64toh(f->header->entry_array_offset),
1420 le64toh(f->header->n_entries),
1421 realtime,
1422 test_object_realtime,
1423 direction,
1424 ret, offset, NULL);
1425}
1426
1427static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1428 Object *o;
1429 int r;
1430
1431 assert(f);
1432 assert(p > 0);
1433
1434 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1435 if (r < 0)
1436 return r;
1437
1438 if (le64toh(o->entry.monotonic) == needle)
1439 return TEST_FOUND;
1440 else if (le64toh(o->entry.monotonic) < needle)
1441 return TEST_LEFT;
1442 else
1443 return TEST_RIGHT;
1444}
1445
1446int journal_file_move_to_entry_by_monotonic(
1447 JournalFile *f,
1448 sd_id128_t boot_id,
1449 uint64_t monotonic,
1450 direction_t direction,
1451 Object **ret,
1452 uint64_t *offset) {
1453
10b6f904 1454 char t[9+32+1] = "_BOOT_ID=";
de190aef
LP
1455 Object *o;
1456 int r;
1457
cbdca852 1458 assert(f);
de190aef 1459
cbdca852 1460 sd_id128_to_string(boot_id, t + 9);
de190aef
LP
1461 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1462 if (r < 0)
1463 return r;
cbdca852 1464 if (r == 0)
de190aef
LP
1465 return -ENOENT;
1466
1467 return generic_array_bisect_plus_one(f,
1468 le64toh(o->data.entry_offset),
1469 le64toh(o->data.entry_array_offset),
1470 le64toh(o->data.n_entries),
1471 monotonic,
1472 test_object_monotonic,
1473 direction,
1474 ret, offset, NULL);
1475}
1476
de190aef
LP
1477int journal_file_next_entry(
1478 JournalFile *f,
1479 Object *o, uint64_t p,
1480 direction_t direction,
1481 Object **ret, uint64_t *offset) {
1482
1483 uint64_t i, n;
cec736d2
LP
1484 int r;
1485
1486 assert(f);
de190aef
LP
1487 assert(p > 0 || !o);
1488
1489 n = le64toh(f->header->n_entries);
1490 if (n <= 0)
1491 return 0;
cec736d2
LP
1492
1493 if (!o)
de190aef 1494 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1495 else {
de190aef 1496 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1497 return -EINVAL;
1498
de190aef
LP
1499 r = generic_array_bisect(f,
1500 le64toh(f->header->entry_array_offset),
1501 le64toh(f->header->n_entries),
1502 p,
1503 test_object_offset,
1504 DIRECTION_DOWN,
1505 NULL, NULL,
1506 &i);
1507 if (r <= 0)
1508 return r;
1509
1510 if (direction == DIRECTION_DOWN) {
1511 if (i >= n - 1)
1512 return 0;
1513
1514 i++;
1515 } else {
1516 if (i <= 0)
1517 return 0;
1518
1519 i--;
1520 }
cec736d2
LP
1521 }
1522
de190aef
LP
1523 /* And jump to it */
1524 return generic_array_get(f,
1525 le64toh(f->header->entry_array_offset),
1526 i,
1527 ret, offset);
1528}
cec736d2 1529
de190aef
LP
1530int journal_file_skip_entry(
1531 JournalFile *f,
1532 Object *o, uint64_t p,
1533 int64_t skip,
1534 Object **ret, uint64_t *offset) {
1535
1536 uint64_t i, n;
1537 int r;
1538
1539 assert(f);
1540 assert(o);
1541 assert(p > 0);
1542
1543 if (o->object.type != OBJECT_ENTRY)
1544 return -EINVAL;
1545
1546 r = generic_array_bisect(f,
1547 le64toh(f->header->entry_array_offset),
1548 le64toh(f->header->n_entries),
1549 p,
1550 test_object_offset,
1551 DIRECTION_DOWN,
1552 NULL, NULL,
1553 &i);
1554 if (r <= 0)
cec736d2
LP
1555 return r;
1556
de190aef
LP
1557 /* Calculate new index */
1558 if (skip < 0) {
1559 if ((uint64_t) -skip >= i)
1560 i = 0;
1561 else
1562 i = i - (uint64_t) -skip;
1563 } else
1564 i += (uint64_t) skip;
cec736d2 1565
de190aef
LP
1566 n = le64toh(f->header->n_entries);
1567 if (n <= 0)
1568 return -EBADMSG;
cec736d2 1569
de190aef
LP
1570 if (i >= n)
1571 i = n-1;
1572
1573 return generic_array_get(f,
1574 le64toh(f->header->entry_array_offset),
1575 i,
1576 ret, offset);
cec736d2
LP
1577}
1578
de190aef
LP
1579int journal_file_next_entry_for_data(
1580 JournalFile *f,
1581 Object *o, uint64_t p,
1582 uint64_t data_offset,
1583 direction_t direction,
1584 Object **ret, uint64_t *offset) {
1585
1586 uint64_t n, i;
cec736d2 1587 int r;
de190aef 1588 Object *d;
cec736d2
LP
1589
1590 assert(f);
de190aef 1591 assert(p > 0 || !o);
cec736d2 1592
de190aef 1593 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 1594 if (r < 0)
de190aef 1595 return r;
cec736d2 1596
de190aef
LP
1597 n = le64toh(d->data.n_entries);
1598 if (n <= 0)
1599 return n;
cec736d2 1600
de190aef
LP
1601 if (!o)
1602 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1603 else {
1604 if (o->object.type != OBJECT_ENTRY)
1605 return -EINVAL;
cec736d2 1606
de190aef
LP
1607 r = generic_array_bisect_plus_one(f,
1608 le64toh(d->data.entry_offset),
1609 le64toh(d->data.entry_array_offset),
1610 le64toh(d->data.n_entries),
1611 p,
1612 test_object_offset,
1613 DIRECTION_DOWN,
1614 NULL, NULL,
1615 &i);
1616
1617 if (r <= 0)
cec736d2
LP
1618 return r;
1619
de190aef
LP
1620 if (direction == DIRECTION_DOWN) {
1621 if (i >= n - 1)
1622 return 0;
cec736d2 1623
de190aef
LP
1624 i++;
1625 } else {
1626 if (i <= 0)
1627 return 0;
cec736d2 1628
de190aef
LP
1629 i--;
1630 }
cec736d2 1631
de190aef 1632 }
cec736d2 1633
de190aef
LP
1634 return generic_array_get_plus_one(f,
1635 le64toh(d->data.entry_offset),
1636 le64toh(d->data.entry_array_offset),
1637 i,
1638 ret, offset);
1639}
cec736d2 1640
cbdca852
LP
1641int journal_file_move_to_entry_by_offset_for_data(
1642 JournalFile *f,
1643 uint64_t data_offset,
1644 uint64_t p,
1645 direction_t direction,
1646 Object **ret, uint64_t *offset) {
1647
1648 int r;
1649 Object *d;
1650
1651 assert(f);
1652
1653 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1654 if (r < 0)
1655 return r;
1656
1657 return generic_array_bisect_plus_one(f,
1658 le64toh(d->data.entry_offset),
1659 le64toh(d->data.entry_array_offset),
1660 le64toh(d->data.n_entries),
1661 p,
1662 test_object_offset,
1663 direction,
1664 ret, offset, NULL);
1665}
1666
1667int journal_file_move_to_entry_by_monotonic_for_data(
1668 JournalFile *f,
1669 uint64_t data_offset,
1670 sd_id128_t boot_id,
1671 uint64_t monotonic,
1672 direction_t direction,
1673 Object **ret, uint64_t *offset) {
1674
1675 char t[9+32+1] = "_BOOT_ID=";
1676 Object *o, *d;
1677 int r;
1678 uint64_t b, z;
1679
1680 assert(f);
1681
1682 /* First, seek by time */
1683 sd_id128_to_string(boot_id, t + 9);
1684 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1685 if (r < 0)
1686 return r;
1687 if (r == 0)
1688 return -ENOENT;
1689
1690 r = generic_array_bisect_plus_one(f,
1691 le64toh(o->data.entry_offset),
1692 le64toh(o->data.entry_array_offset),
1693 le64toh(o->data.n_entries),
1694 monotonic,
1695 test_object_monotonic,
1696 direction,
1697 NULL, &z, NULL);
1698 if (r <= 0)
1699 return r;
1700
1701 /* And now, continue seeking until we find an entry that
1702 * exists in both bisection arrays */
1703
1704 for (;;) {
1705 Object *qo;
1706 uint64_t p, q;
1707
1708 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1709 if (r < 0)
1710 return r;
1711
1712 r = generic_array_bisect_plus_one(f,
1713 le64toh(d->data.entry_offset),
1714 le64toh(d->data.entry_array_offset),
1715 le64toh(d->data.n_entries),
1716 z,
1717 test_object_offset,
1718 direction,
1719 NULL, &p, NULL);
1720 if (r <= 0)
1721 return r;
1722
1723 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1724 if (r < 0)
1725 return r;
1726
1727 r = generic_array_bisect_plus_one(f,
1728 le64toh(o->data.entry_offset),
1729 le64toh(o->data.entry_array_offset),
1730 le64toh(o->data.n_entries),
1731 p,
1732 test_object_offset,
1733 direction,
1734 &qo, &q, NULL);
1735
1736 if (r <= 0)
1737 return r;
1738
1739 if (p == q) {
1740 if (ret)
1741 *ret = qo;
1742 if (offset)
1743 *offset = q;
1744
1745 return 1;
1746 }
1747
1748 z = q;
1749 }
1750
1751 return 0;
1752}
1753
de190aef
LP
1754int journal_file_move_to_entry_by_seqnum_for_data(
1755 JournalFile *f,
1756 uint64_t data_offset,
1757 uint64_t seqnum,
1758 direction_t direction,
1759 Object **ret, uint64_t *offset) {
cec736d2 1760
de190aef
LP
1761 Object *d;
1762 int r;
cec736d2 1763
91a31dde
LP
1764 assert(f);
1765
de190aef 1766 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1767 if (r < 0)
de190aef 1768 return r;
cec736d2 1769
de190aef
LP
1770 return generic_array_bisect_plus_one(f,
1771 le64toh(d->data.entry_offset),
1772 le64toh(d->data.entry_array_offset),
1773 le64toh(d->data.n_entries),
1774 seqnum,
1775 test_object_seqnum,
1776 direction,
1777 ret, offset, NULL);
1778}
cec736d2 1779
de190aef
LP
1780int journal_file_move_to_entry_by_realtime_for_data(
1781 JournalFile *f,
1782 uint64_t data_offset,
1783 uint64_t realtime,
1784 direction_t direction,
1785 Object **ret, uint64_t *offset) {
1786
1787 Object *d;
1788 int r;
1789
91a31dde
LP
1790 assert(f);
1791
de190aef 1792 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1793 if (r < 0)
de190aef
LP
1794 return r;
1795
1796 return generic_array_bisect_plus_one(f,
1797 le64toh(d->data.entry_offset),
1798 le64toh(d->data.entry_array_offset),
1799 le64toh(d->data.n_entries),
1800 realtime,
1801 test_object_realtime,
1802 direction,
1803 ret, offset, NULL);
cec736d2
LP
1804}
1805
0284adc6 1806void journal_file_dump(JournalFile *f) {
7560fffc 1807 Object *o;
7560fffc 1808 int r;
0284adc6 1809 uint64_t p;
7560fffc
LP
1810
1811 assert(f);
1812
0284adc6 1813 journal_file_print_header(f);
7560fffc 1814
0284adc6
LP
1815 p = le64toh(f->header->header_size);
1816 while (p != 0) {
1817 r = journal_file_move_to_object(f, -1, p, &o);
1818 if (r < 0)
1819 goto fail;
7560fffc 1820
0284adc6 1821 switch (o->object.type) {
d98cc1f2 1822
0284adc6
LP
1823 case OBJECT_UNUSED:
1824 printf("Type: OBJECT_UNUSED\n");
1825 break;
d98cc1f2 1826
0284adc6
LP
1827 case OBJECT_DATA:
1828 printf("Type: OBJECT_DATA\n");
1829 break;
7560fffc 1830
0284adc6
LP
1831 case OBJECT_ENTRY:
1832 printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
1833 (unsigned long long) le64toh(o->entry.seqnum),
1834 (unsigned long long) le64toh(o->entry.monotonic),
1835 (unsigned long long) le64toh(o->entry.realtime));
1836 break;
7560fffc 1837
0284adc6
LP
1838 case OBJECT_FIELD_HASH_TABLE:
1839 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1840 break;
7560fffc 1841
0284adc6
LP
1842 case OBJECT_DATA_HASH_TABLE:
1843 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1844 break;
7560fffc 1845
0284adc6
LP
1846 case OBJECT_ENTRY_ARRAY:
1847 printf("Type: OBJECT_ENTRY_ARRAY\n");
1848 break;
7560fffc 1849
0284adc6
LP
1850 case OBJECT_TAG:
1851 printf("Type: OBJECT_TAG %llu\n",
1852 (unsigned long long) le64toh(o->tag.seqnum));
1853 break;
1854 }
7560fffc 1855
0284adc6
LP
1856 if (o->object.flags & OBJECT_COMPRESSED)
1857 printf("Flags: COMPRESSED\n");
7560fffc 1858
0284adc6
LP
1859 if (p == le64toh(f->header->tail_object_offset))
1860 p = 0;
1861 else
1862 p = p + ALIGN64(le64toh(o->object.size));
1863 }
7560fffc 1864
0284adc6
LP
1865 return;
1866fail:
1867 log_error("File corrupt");
7560fffc
LP
1868}
1869
0284adc6
LP
1870void journal_file_print_header(JournalFile *f) {
1871 char a[33], b[33], c[33];
1872 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
7560fffc
LP
1873
1874 assert(f);
7560fffc 1875
0284adc6
LP
1876 printf("File Path: %s\n"
1877 "File ID: %s\n"
1878 "Machine ID: %s\n"
1879 "Boot ID: %s\n"
1880 "Sequential Number ID: %s\n"
1881 "State: %s\n"
1882 "Compatible Flags:%s%s\n"
1883 "Incompatible Flags:%s%s\n"
1884 "Header size: %llu\n"
1885 "Arena size: %llu\n"
1886 "Data Hash Table Size: %llu\n"
1887 "Field Hash Table Size: %llu\n"
0284adc6
LP
1888 "Rotate Suggested: %s\n"
1889 "Head Sequential Number: %llu\n"
1890 "Tail Sequential Number: %llu\n"
1891 "Head Realtime Timestamp: %s\n"
3223f44f
LP
1892 "Tail Realtime Timestamp: %s\n"
1893 "Objects: %llu\n"
1894 "Entry Objects: %llu\n",
0284adc6
LP
1895 f->path,
1896 sd_id128_to_string(f->header->file_id, a),
1897 sd_id128_to_string(f->header->machine_id, b),
1898 sd_id128_to_string(f->header->boot_id, c),
1899 sd_id128_to_string(f->header->seqnum_id, c),
3223f44f
LP
1900 f->header->state == STATE_OFFLINE ? "OFFLINE" :
1901 f->header->state == STATE_ONLINE ? "ONLINE" :
1902 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
baed47c3
LP
1903 (f->header->compatible_flags & HEADER_COMPATIBLE_SEALED) ? " SEALED" : "",
1904 (f->header->compatible_flags & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
0284adc6
LP
1905 (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
1906 (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1907 (unsigned long long) le64toh(f->header->header_size),
1908 (unsigned long long) le64toh(f->header->arena_size),
1909 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1910 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
0284adc6
LP
1911 yes_no(journal_file_rotate_suggested(f)),
1912 (unsigned long long) le64toh(f->header->head_entry_seqnum),
1913 (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1914 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
3223f44f
LP
1915 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1916 (unsigned long long) le64toh(f->header->n_objects),
1917 (unsigned long long) le64toh(f->header->n_entries));
7560fffc 1918
0284adc6
LP
1919 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1920 printf("Data Objects: %llu\n"
1921 "Data Hash Table Fill: %.1f%%\n",
1922 (unsigned long long) le64toh(f->header->n_data),
1923 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
7560fffc 1924
0284adc6
LP
1925 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1926 printf("Field Objects: %llu\n"
1927 "Field Hash Table Fill: %.1f%%\n",
1928 (unsigned long long) le64toh(f->header->n_fields),
1929 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
3223f44f
LP
1930
1931 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
1932 printf("Tag Objects: %llu\n",
1933 (unsigned long long) le64toh(f->header->n_tags));
1934 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1935 printf("Entry Array Objects: %llu\n",
1936 (unsigned long long) le64toh(f->header->n_entry_arrays));
7560fffc
LP
1937}
1938
0284adc6
LP
1939int journal_file_open(
1940 const char *fname,
1941 int flags,
1942 mode_t mode,
1943 bool compress,
baed47c3 1944 bool seal,
0284adc6
LP
1945 JournalMetrics *metrics,
1946 MMapCache *mmap_cache,
1947 JournalFile *template,
1948 JournalFile **ret) {
7560fffc 1949
0284adc6
LP
1950 JournalFile *f;
1951 int r;
1952 bool newly_created = false;
7560fffc 1953
0284adc6 1954 assert(fname);
7560fffc 1955
0284adc6
LP
1956 if ((flags & O_ACCMODE) != O_RDONLY &&
1957 (flags & O_ACCMODE) != O_RDWR)
1958 return -EINVAL;
7560fffc 1959
a0108012
LP
1960 if (!endswith(fname, ".journal") &&
1961 !endswith(fname, ".journal~"))
0284adc6 1962 return -EINVAL;
7560fffc 1963
0284adc6
LP
1964 f = new0(JournalFile, 1);
1965 if (!f)
1966 return -ENOMEM;
7560fffc 1967
0284adc6
LP
1968 f->fd = -1;
1969 f->mode = mode;
7560fffc 1970
0284adc6
LP
1971 f->flags = flags;
1972 f->prot = prot_from_flags(flags);
1973 f->writable = (flags & O_ACCMODE) != O_RDONLY;
1974 f->compress = compress;
baed47c3 1975 f->seal = seal;
7560fffc 1976
0284adc6
LP
1977 if (mmap_cache)
1978 f->mmap = mmap_cache_ref(mmap_cache);
1979 else {
1980 /* One context for each type, plus the zeroth catchall
1981 * context. One fd for the file plus one for each type
1982 * (which we need during verification */
1983 f->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, 1 + _OBJECT_TYPE_MAX);
1984 if (!f->mmap) {
1985 r = -ENOMEM;
1986 goto fail;
1987 }
1988 }
7560fffc 1989
0284adc6
LP
1990 f->path = strdup(fname);
1991 if (!f->path) {
1992 r = -ENOMEM;
1993 goto fail;
1994 }
7560fffc 1995
0284adc6
LP
1996 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
1997 if (f->fd < 0) {
1998 r = -errno;
1999 goto fail;
7560fffc 2000 }
7560fffc 2001
0284adc6
LP
2002 if (fstat(f->fd, &f->last_stat) < 0) {
2003 r = -errno;
2004 goto fail;
2005 }
7560fffc 2006
0284adc6
LP
2007 if (f->last_stat.st_size == 0 && f->writable) {
2008 newly_created = true;
7560fffc 2009
0284adc6 2010 /* Try to load the FSPRG state, and if we can't, then
baed47c3
LP
2011 * just don't do sealing */
2012 r = journal_file_fss_load(f);
0284adc6 2013 if (r < 0)
baed47c3 2014 f->seal = false;
7560fffc 2015
0284adc6
LP
2016 r = journal_file_init_header(f, template);
2017 if (r < 0)
2018 goto fail;
7560fffc 2019
0284adc6
LP
2020 if (fstat(f->fd, &f->last_stat) < 0) {
2021 r = -errno;
2022 goto fail;
2023 }
2024 }
7560fffc 2025
0284adc6
LP
2026 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2027 r = -EIO;
2028 goto fail;
2029 }
7560fffc 2030
0284adc6
LP
2031 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2032 if (f->header == MAP_FAILED) {
2033 f->header = NULL;
2034 r = -errno;
2035 goto fail;
2036 }
7560fffc 2037
0284adc6
LP
2038 if (!newly_created) {
2039 r = journal_file_verify_header(f);
2040 if (r < 0)
2041 goto fail;
2042 }
7560fffc 2043
0284adc6 2044 if (!newly_created && f->writable) {
baed47c3 2045 r = journal_file_fss_load(f);
0284adc6
LP
2046 if (r < 0)
2047 goto fail;
2048 }
cec736d2
LP
2049
2050 if (f->writable) {
4a92baf3
LP
2051 if (metrics) {
2052 journal_default_metrics(metrics, f->fd);
2053 f->metrics = *metrics;
2054 } else if (template)
2055 f->metrics = template->metrics;
2056
cec736d2
LP
2057 r = journal_file_refresh_header(f);
2058 if (r < 0)
2059 goto fail;
2060 }
2061
baed47c3 2062 r = journal_file_hmac_setup(f);
14d10188
LP
2063 if (r < 0)
2064 goto fail;
2065
cec736d2 2066 if (newly_created) {
de190aef 2067 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
2068 if (r < 0)
2069 goto fail;
2070
de190aef 2071 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
2072 if (r < 0)
2073 goto fail;
7560fffc
LP
2074
2075 r = journal_file_append_first_tag(f);
2076 if (r < 0)
2077 goto fail;
cec736d2
LP
2078 }
2079
de190aef 2080 r = journal_file_map_field_hash_table(f);
cec736d2
LP
2081 if (r < 0)
2082 goto fail;
2083
de190aef 2084 r = journal_file_map_data_hash_table(f);
cec736d2
LP
2085 if (r < 0)
2086 goto fail;
2087
2088 if (ret)
2089 *ret = f;
2090
2091 return 0;
2092
2093fail:
2094 journal_file_close(f);
2095
2096 return r;
2097}
0ac38b70 2098
baed47c3 2099int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
0ac38b70
LP
2100 char *p;
2101 size_t l;
2102 JournalFile *old_file, *new_file = NULL;
2103 int r;
2104
2105 assert(f);
2106 assert(*f);
2107
2108 old_file = *f;
2109
2110 if (!old_file->writable)
2111 return -EINVAL;
2112
2113 if (!endswith(old_file->path, ".journal"))
2114 return -EINVAL;
2115
2116 l = strlen(old_file->path);
2117
9447a7f1 2118 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
0ac38b70
LP
2119 if (!p)
2120 return -ENOMEM;
2121
2122 memcpy(p, old_file->path, l - 8);
2123 p[l-8] = '@';
2124 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2125 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2126 "-%016llx-%016llx.journal",
beec0085 2127 (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
0ac38b70
LP
2128 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2129
2130 r = rename(old_file->path, p);
2131 free(p);
2132
2133 if (r < 0)
2134 return -errno;
2135
ccdbaf91 2136 old_file->header->state = STATE_ARCHIVED;
0ac38b70 2137
baed47c3 2138 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
0ac38b70
LP
2139 journal_file_close(old_file);
2140
2141 *f = new_file;
2142 return r;
2143}
2144
9447a7f1
LP
2145int journal_file_open_reliably(
2146 const char *fname,
2147 int flags,
2148 mode_t mode,
7560fffc 2149 bool compress,
baed47c3 2150 bool seal,
4a92baf3 2151 JournalMetrics *metrics,
27370278 2152 MMapCache *mmap_cache,
9447a7f1
LP
2153 JournalFile *template,
2154 JournalFile **ret) {
2155
2156 int r;
2157 size_t l;
2158 char *p;
2159
baed47c3 2160 r = journal_file_open(fname, flags, mode, compress, seal,
27370278 2161 metrics, mmap_cache, template, ret);
0071d9f1
LP
2162 if (r != -EBADMSG && /* corrupted */
2163 r != -ENODATA && /* truncated */
2164 r != -EHOSTDOWN && /* other machine */
a1a1898f
LP
2165 r != -EPROTONOSUPPORT && /* incompatible feature */
2166 r != -EBUSY && /* unclean shutdown */
2167 r != -ESHUTDOWN /* already archived */)
9447a7f1
LP
2168 return r;
2169
2170 if ((flags & O_ACCMODE) == O_RDONLY)
2171 return r;
2172
2173 if (!(flags & O_CREAT))
2174 return r;
2175
7560fffc
LP
2176 if (!endswith(fname, ".journal"))
2177 return r;
2178
5c70eab4
LP
2179 /* The file is corrupted. Rotate it away and try it again (but only once) */
2180
9447a7f1
LP
2181 l = strlen(fname);
2182 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2183 (int) (l-8), fname,
2184 (unsigned long long) now(CLOCK_REALTIME),
2185 random_ull()) < 0)
2186 return -ENOMEM;
2187
2188 r = rename(fname, p);
2189 free(p);
2190 if (r < 0)
2191 return -errno;
2192
a1a1898f 2193 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
9447a7f1 2194
baed47c3 2195 return journal_file_open(fname, flags, mode, compress, seal,
27370278 2196 metrics, mmap_cache, template, ret);
9447a7f1
LP
2197}
2198
cf244689
LP
2199
2200int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2201 uint64_t i, n;
2202 uint64_t q, xor_hash = 0;
2203 int r;
2204 EntryItem *items;
2205 dual_timestamp ts;
2206
2207 assert(from);
2208 assert(to);
2209 assert(o);
2210 assert(p);
2211
2212 if (!to->writable)
2213 return -EPERM;
2214
2215 ts.monotonic = le64toh(o->entry.monotonic);
2216 ts.realtime = le64toh(o->entry.realtime);
2217
2218 if (to->tail_entry_monotonic_valid &&
2219 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2220 return -EINVAL;
2221
cf244689
LP
2222 n = journal_file_entry_n_items(o);
2223 items = alloca(sizeof(EntryItem) * n);
2224
2225 for (i = 0; i < n; i++) {
4fd052ae
FC
2226 uint64_t l, h;
2227 le64_t le_hash;
cf244689
LP
2228 size_t t;
2229 void *data;
2230 Object *u;
2231
2232 q = le64toh(o->entry.items[i].object_offset);
2233 le_hash = o->entry.items[i].hash;
2234
2235 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2236 if (r < 0)
2237 return r;
2238
2239 if (le_hash != o->data.hash)
2240 return -EBADMSG;
2241
2242 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2243 t = (size_t) l;
2244
2245 /* We hit the limit on 32bit machines */
2246 if ((uint64_t) t != l)
2247 return -E2BIG;
2248
2249 if (o->object.flags & OBJECT_COMPRESSED) {
2250#ifdef HAVE_XZ
2251 uint64_t rsize;
2252
2253 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2254 return -EBADMSG;
2255
2256 data = from->compress_buffer;
2257 l = rsize;
2258#else
2259 return -EPROTONOSUPPORT;
2260#endif
2261 } else
2262 data = o->data.payload;
2263
2264 r = journal_file_append_data(to, data, l, &u, &h);
2265 if (r < 0)
2266 return r;
2267
2268 xor_hash ^= le64toh(u->data.hash);
2269 items[i].object_offset = htole64(h);
2270 items[i].hash = u->data.hash;
2271
2272 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2273 if (r < 0)
2274 return r;
2275 }
2276
2277 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2278}
babfc091
LP
2279
2280void journal_default_metrics(JournalMetrics *m, int fd) {
2281 uint64_t fs_size = 0;
2282 struct statvfs ss;
a7bc2c2a 2283 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2284
2285 assert(m);
2286 assert(fd >= 0);
2287
2288 if (fstatvfs(fd, &ss) >= 0)
2289 fs_size = ss.f_frsize * ss.f_blocks;
2290
2291 if (m->max_use == (uint64_t) -1) {
2292
2293 if (fs_size > 0) {
2294 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2295
2296 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2297 m->max_use = DEFAULT_MAX_USE_UPPER;
2298
2299 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2300 m->max_use = DEFAULT_MAX_USE_LOWER;
2301 } else
2302 m->max_use = DEFAULT_MAX_USE_LOWER;
2303 } else {
2304 m->max_use = PAGE_ALIGN(m->max_use);
2305
2306 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2307 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2308 }
2309
2310 if (m->max_size == (uint64_t) -1) {
2311 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2312
2313 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2314 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2315 } else
2316 m->max_size = PAGE_ALIGN(m->max_size);
2317
2318 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2319 m->max_size = JOURNAL_FILE_SIZE_MIN;
2320
2321 if (m->max_size*2 > m->max_use)
2322 m->max_use = m->max_size*2;
2323
2324 if (m->min_size == (uint64_t) -1)
2325 m->min_size = JOURNAL_FILE_SIZE_MIN;
2326 else {
2327 m->min_size = PAGE_ALIGN(m->min_size);
2328
2329 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2330 m->min_size = JOURNAL_FILE_SIZE_MIN;
2331
2332 if (m->min_size > m->max_size)
2333 m->max_size = m->min_size;
2334 }
2335
2336 if (m->keep_free == (uint64_t) -1) {
2337
2338 if (fs_size > 0) {
2339 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2340
2341 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2342 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2343
2344 } else
2345 m->keep_free = DEFAULT_KEEP_FREE;
2346 }
2347
e7bf07b3
LP
2348 log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2349 format_bytes(a, sizeof(a), m->max_use),
2350 format_bytes(b, sizeof(b), m->max_size),
2351 format_bytes(c, sizeof(c), m->min_size),
2352 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2353}
08984293
LP
2354
2355int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
08984293
LP
2356 assert(f);
2357 assert(from || to);
2358
2359 if (from) {
162566a4
LP
2360 if (f->header->head_entry_realtime == 0)
2361 return -ENOENT;
08984293 2362
162566a4 2363 *from = le64toh(f->header->head_entry_realtime);
08984293
LP
2364 }
2365
2366 if (to) {
162566a4
LP
2367 if (f->header->tail_entry_realtime == 0)
2368 return -ENOENT;
08984293 2369
162566a4 2370 *to = le64toh(f->header->tail_entry_realtime);
08984293
LP
2371 }
2372
2373 return 1;
2374}
2375
2376int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2377 char t[9+32+1] = "_BOOT_ID=";
2378 Object *o;
2379 uint64_t p;
2380 int r;
2381
2382 assert(f);
2383 assert(from || to);
2384
2385 sd_id128_to_string(boot_id, t + 9);
2386
2387 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2388 if (r <= 0)
2389 return r;
2390
2391 if (le64toh(o->data.n_entries) <= 0)
2392 return 0;
2393
2394 if (from) {
2395 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2396 if (r < 0)
2397 return r;
2398
2399 *from = le64toh(o->entry.monotonic);
2400 }
2401
2402 if (to) {
2403 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2404 if (r < 0)
2405 return r;
2406
2407 r = generic_array_get_plus_one(f,
2408 le64toh(o->data.entry_offset),
2409 le64toh(o->data.entry_array_offset),
2410 le64toh(o->data.n_entries)-1,
2411 &o, NULL);
2412 if (r <= 0)
2413 return r;
2414
2415 *to = le64toh(o->entry.monotonic);
2416 }
2417
2418 return 1;
2419}
dca6219e
LP
2420
2421bool journal_file_rotate_suggested(JournalFile *f) {
2422 assert(f);
2423
2424 /* If we gained new header fields we gained new features,
2425 * hence suggest a rotation */
361f9cbc
LP
2426 if (le64toh(f->header->header_size) < sizeof(Header)) {
2427 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
dca6219e 2428 return true;
361f9cbc 2429 }
dca6219e
LP
2430
2431 /* Let's check if the hash tables grew over a certain fill
2432 * level (75%, borrowing this value from Java's hash table
2433 * implementation), and if so suggest a rotation. To calculate
2434 * the fill level we need the n_data field, which only exists
2435 * in newer versions. */
2436
2437 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
361f9cbc
LP
2438 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2439 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2440 f->path,
2441 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2442 (unsigned long long) le64toh(f->header->n_data),
2443 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2444 (unsigned long long) (f->last_stat.st_size),
2445 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
dca6219e 2446 return true;
361f9cbc 2447 }
dca6219e
LP
2448
2449 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
361f9cbc
LP
2450 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2451 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2452 f->path,
2453 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2454 (unsigned long long) le64toh(f->header->n_fields),
2455 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
dca6219e 2456 return true;
361f9cbc 2457 }
dca6219e
LP
2458
2459 return false;
2460}