]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
journald: add additional simple static tests to verifier
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
30#include "journal-def.h"
31#include "journal-file.h"
0284adc6 32#include "journal-authenticate.h"
cec736d2 33#include "lookup3.h"
807e17f0 34#include "compress.h"
7560fffc 35#include "fsprg.h"
cec736d2 36
4a92baf3
LP
37#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
cec736d2 39
be19b7df 40#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 41
babfc091 42/* This is the minimum journal file size */
b47ffcfd 43#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
babfc091
LP
44
45/* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
49
50/* This is the upper bound if we deduce max_size from max_use */
71100051 51#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
52
53/* This is the upper bound if we deduce the keep_free value from the
54 * file system size */
55#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57/* This is the keep_free value when we can't determine the system
58 * size */
59#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
60
dca6219e
LP
61/* n_data was the first entry we added after the initial file format design */
62#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
cec736d2 63
cec736d2 64void journal_file_close(JournalFile *f) {
de190aef 65 assert(f);
cec736d2 66
b0af6f41 67 /* Write the final tag */
baed47c3 68 if (f->seal)
b0af6f41
LP
69 journal_file_append_tag(f);
70
7560fffc 71 /* Sync everything to disk, before we mark the file offline */
16e9f408
LP
72 if (f->mmap && f->fd >= 0)
73 mmap_cache_close_fd(f->mmap, f->fd);
7560fffc
LP
74
75 if (f->writable && f->fd >= 0)
76 fdatasync(f->fd);
77
d384c7a8 78 if (f->header) {
cd96b3b8
LP
79 /* Mark the file offline. Don't override the archived state if it already is set */
80 if (f->writable && f->header->state == STATE_ONLINE)
d384c7a8 81 f->header->state = STATE_OFFLINE;
cec736d2 82
d384c7a8
MS
83 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
84 }
cec736d2 85
0ac38b70
LP
86 if (f->fd >= 0)
87 close_nointr_nofail(f->fd);
88
cec736d2 89 free(f->path);
807e17f0 90
16e9f408
LP
91 if (f->mmap)
92 mmap_cache_unref(f->mmap);
93
807e17f0
LP
94#ifdef HAVE_XZ
95 free(f->compress_buffer);
96#endif
97
7560fffc 98#ifdef HAVE_GCRYPT
baed47c3
LP
99 if (f->fss_file)
100 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
b7c9ae91
LP
101 else if (f->fsprg_state)
102 free(f->fsprg_state);
103
104 free(f->fsprg_seed);
7560fffc
LP
105
106 if (f->hmac)
107 gcry_md_close(f->hmac);
108#endif
109
cec736d2
LP
110 free(f);
111}
112
0ac38b70 113static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
114 Header h;
115 ssize_t k;
116 int r;
117
118 assert(f);
119
120 zero(h);
7560fffc 121 memcpy(h.signature, HEADER_SIGNATURE, 8);
23b0b2b2 122 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2 123
7560fffc
LP
124 h.incompatible_flags =
125 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
126
127 h.compatible_flags =
baed47c3 128 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
7560fffc 129
cec736d2
LP
130 r = sd_id128_randomize(&h.file_id);
131 if (r < 0)
132 return r;
133
0ac38b70
LP
134 if (template) {
135 h.seqnum_id = template->header->seqnum_id;
beec0085 136 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
0ac38b70
LP
137 } else
138 h.seqnum_id = h.file_id;
cec736d2
LP
139
140 k = pwrite(f->fd, &h, sizeof(h), 0);
141 if (k < 0)
142 return -errno;
143
144 if (k != sizeof(h))
145 return -EIO;
146
147 return 0;
148}
149
150static int journal_file_refresh_header(JournalFile *f) {
151 int r;
de190aef 152 sd_id128_t boot_id;
cec736d2
LP
153
154 assert(f);
155
156 r = sd_id128_get_machine(&f->header->machine_id);
157 if (r < 0)
158 return r;
159
de190aef 160 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
161 if (r < 0)
162 return r;
163
de190aef
LP
164 if (sd_id128_equal(boot_id, f->header->boot_id))
165 f->tail_entry_monotonic_valid = true;
166
167 f->header->boot_id = boot_id;
168
169 f->header->state = STATE_ONLINE;
b788cc23 170
7560fffc
LP
171 /* Sync the online state to disk */
172 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
173 fdatasync(f->fd);
b788cc23 174
cec736d2
LP
175 return 0;
176}
177
178static int journal_file_verify_header(JournalFile *f) {
179 assert(f);
180
7560fffc 181 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
cec736d2
LP
182 return -EBADMSG;
183
7560fffc
LP
184 /* In both read and write mode we refuse to open files with
185 * incompatible flags we don't know */
807e17f0 186#ifdef HAVE_XZ
7560fffc 187 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
807e17f0
LP
188 return -EPROTONOSUPPORT;
189#else
cec736d2
LP
190 if (f->header->incompatible_flags != 0)
191 return -EPROTONOSUPPORT;
807e17f0 192#endif
cec736d2 193
7560fffc
LP
194 /* When open for writing we refuse to open files with
195 * compatible flags, too */
196 if (f->writable) {
197#ifdef HAVE_GCRYPT
baed47c3 198 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
7560fffc
LP
199 return -EPROTONOSUPPORT;
200#else
201 if (f->header->compatible_flags != 0)
202 return -EPROTONOSUPPORT;
203#endif
204 }
205
db11ac1a
LP
206 if (f->header->state >= _STATE_MAX)
207 return -EBADMSG;
208
dca6219e
LP
209 /* The first addition was n_data, so check that we are at least this large */
210 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
23b0b2b2
LP
211 return -EBADMSG;
212
baed47c3
LP
213 if ((le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_SEALED) &&
214 !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
beec0085
LP
215 return -EBADMSG;
216
db11ac1a
LP
217 if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
218 return -ENODATA;
219
220 if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
221 return -ENODATA;
222
223 if (!VALID64(f->header->data_hash_table_offset) ||
224 !VALID64(f->header->field_hash_table_offset) ||
225 !VALID64(f->header->tail_object_offset) ||
226 !VALID64(f->header->entry_array_offset))
cec736d2
LP
227 return -ENODATA;
228
229 if (f->writable) {
ccdbaf91 230 uint8_t state;
cec736d2
LP
231 sd_id128_t machine_id;
232 int r;
233
234 r = sd_id128_get_machine(&machine_id);
235 if (r < 0)
236 return r;
237
238 if (!sd_id128_equal(machine_id, f->header->machine_id))
239 return -EHOSTDOWN;
240
de190aef 241 state = f->header->state;
cec736d2 242
71fa6f00
LP
243 if (state == STATE_ONLINE) {
244 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
245 return -EBUSY;
246 } else if (state == STATE_ARCHIVED)
cec736d2 247 return -ESHUTDOWN;
71fa6f00
LP
248 else if (state != STATE_OFFLINE) {
249 log_debug("Journal file %s has unknown state %u.", f->path, state);
250 return -EBUSY;
251 }
cec736d2
LP
252 }
253
7560fffc 254 f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
baed47c3 255 f->seal = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_SEALED);
7560fffc 256
cec736d2
LP
257 return 0;
258}
259
260static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
cec736d2 261 uint64_t old_size, new_size;
fec2aa2f 262 int r;
cec736d2
LP
263
264 assert(f);
265
cec736d2 266 /* We assume that this file is not sparse, and we know that
38ac38b2 267 * for sure, since we always call posix_fallocate()
cec736d2
LP
268 * ourselves */
269
270 old_size =
23b0b2b2 271 le64toh(f->header->header_size) +
cec736d2
LP
272 le64toh(f->header->arena_size);
273
bc85bfee 274 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
275 if (new_size < le64toh(f->header->header_size))
276 new_size = le64toh(f->header->header_size);
bc85bfee
LP
277
278 if (new_size <= old_size)
cec736d2
LP
279 return 0;
280
bc85bfee
LP
281 if (f->metrics.max_size > 0 &&
282 new_size > f->metrics.max_size)
283 return -E2BIG;
cec736d2 284
bc85bfee
LP
285 if (new_size > f->metrics.min_size &&
286 f->metrics.keep_free > 0) {
cec736d2
LP
287 struct statvfs svfs;
288
289 if (fstatvfs(f->fd, &svfs) >= 0) {
290 uint64_t available;
291
292 available = svfs.f_bfree * svfs.f_bsize;
293
bc85bfee
LP
294 if (available >= f->metrics.keep_free)
295 available -= f->metrics.keep_free;
cec736d2
LP
296 else
297 available = 0;
298
299 if (new_size - old_size > available)
300 return -E2BIG;
301 }
302 }
303
bc85bfee
LP
304 /* Note that the glibc fallocate() fallback is very
305 inefficient, hence we try to minimize the allocation area
306 as we can. */
fec2aa2f
GV
307 r = posix_fallocate(f->fd, old_size, new_size - old_size);
308 if (r != 0)
309 return -r;
cec736d2 310
f65425cb
LP
311 mmap_cache_close_fd_range(f->mmap, f->fd, old_size);
312
cec736d2
LP
313 if (fstat(f->fd, &f->last_stat) < 0)
314 return -errno;
315
23b0b2b2 316 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
317
318 return 0;
319}
320
16e9f408 321static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, uint64_t size, void **ret) {
cec736d2 322 assert(f);
cec736d2
LP
323 assert(ret);
324
2a59ea54 325 /* Avoid SIGBUS on invalid accesses */
4bbdcdb3
LP
326 if (offset + size > (uint64_t) f->last_stat.st_size) {
327 /* Hmm, out of range? Let's refresh the fstat() data
328 * first, before we trust that check. */
329
330 if (fstat(f->fd, &f->last_stat) < 0 ||
331 offset + size > (uint64_t) f->last_stat.st_size)
332 return -EADDRNOTAVAIL;
333 }
334
16e9f408 335 return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
cec736d2
LP
336}
337
16e9f408
LP
338static uint64_t minimum_header_size(Object *o) {
339
340 static uint64_t table[] = {
341 [OBJECT_DATA] = sizeof(DataObject),
342 [OBJECT_FIELD] = sizeof(FieldObject),
343 [OBJECT_ENTRY] = sizeof(EntryObject),
344 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
345 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
346 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
347 [OBJECT_TAG] = sizeof(TagObject),
348 };
349
350 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
351 return sizeof(ObjectHeader);
352
353 return table[o->object.type];
354}
355
de190aef 356int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
357 int r;
358 void *t;
359 Object *o;
360 uint64_t s;
16e9f408 361 unsigned context;
cec736d2
LP
362
363 assert(f);
364 assert(ret);
365
db11ac1a
LP
366 /* Objects may only be located at multiple of 64 bit */
367 if (!VALID64(offset))
368 return -EFAULT;
369
16e9f408
LP
370 /* One context for each type, plus one catch-all for the rest */
371 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
372
373 r = journal_file_move_to(f, context, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
374 if (r < 0)
375 return r;
376
377 o = (Object*) t;
378 s = le64toh(o->object.size);
379
380 if (s < sizeof(ObjectHeader))
381 return -EBADMSG;
382
16e9f408
LP
383 if (o->object.type <= OBJECT_UNUSED)
384 return -EBADMSG;
385
386 if (s < minimum_header_size(o))
387 return -EBADMSG;
388
de190aef 389 if (type >= 0 && o->object.type != type)
cec736d2
LP
390 return -EBADMSG;
391
392 if (s > sizeof(ObjectHeader)) {
de190aef 393 r = journal_file_move_to(f, o->object.type, offset, s, &t);
cec736d2
LP
394 if (r < 0)
395 return r;
396
397 o = (Object*) t;
398 }
399
cec736d2
LP
400 *ret = o;
401 return 0;
402}
403
d98cc1f2 404static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
405 uint64_t r;
406
407 assert(f);
408
beec0085 409 r = le64toh(f->header->tail_entry_seqnum) + 1;
c2373f84
LP
410
411 if (seqnum) {
de190aef 412 /* If an external seqnum counter was passed, we update
c2373f84
LP
413 * both the local and the external one, and set it to
414 * the maximum of both */
415
416 if (*seqnum + 1 > r)
417 r = *seqnum + 1;
418
419 *seqnum = r;
420 }
421
beec0085 422 f->header->tail_entry_seqnum = htole64(r);
cec736d2 423
beec0085
LP
424 if (f->header->head_entry_seqnum == 0)
425 f->header->head_entry_seqnum = htole64(r);
de190aef 426
cec736d2
LP
427 return r;
428}
429
0284adc6 430int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
431 int r;
432 uint64_t p;
433 Object *tail, *o;
434 void *t;
435
436 assert(f);
16e9f408 437 assert(type > 0 && type < _OBJECT_TYPE_MAX);
cec736d2
LP
438 assert(size >= sizeof(ObjectHeader));
439 assert(offset);
440 assert(ret);
441
442 p = le64toh(f->header->tail_object_offset);
cec736d2 443 if (p == 0)
23b0b2b2 444 p = le64toh(f->header->header_size);
cec736d2 445 else {
de190aef 446 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
447 if (r < 0)
448 return r;
449
450 p += ALIGN64(le64toh(tail->object.size));
451 }
452
453 r = journal_file_allocate(f, p, size);
454 if (r < 0)
455 return r;
456
de190aef 457 r = journal_file_move_to(f, type, p, size, &t);
cec736d2
LP
458 if (r < 0)
459 return r;
460
461 o = (Object*) t;
462
463 zero(o->object);
de190aef 464 o->object.type = type;
cec736d2
LP
465 o->object.size = htole64(size);
466
467 f->header->tail_object_offset = htole64(p);
cec736d2
LP
468 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
469
470 *ret = o;
471 *offset = p;
472
473 return 0;
474}
475
de190aef 476static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
477 uint64_t s, p;
478 Object *o;
479 int r;
480
481 assert(f);
482
dfabe643 483 /* We estimate that we need 1 hash table entry per 768 of
4a92baf3
LP
484 journal file and we want to make sure we never get beyond
485 75% fill level. Calculate the hash table size for the
486 maximum file size based on these metrics. */
487
dfabe643 488 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
4a92baf3
LP
489 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
490 s = DEFAULT_DATA_HASH_TABLE_SIZE;
491
dfabe643 492 log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
4a92baf3 493
de190aef
LP
494 r = journal_file_append_object(f,
495 OBJECT_DATA_HASH_TABLE,
496 offsetof(Object, hash_table.items) + s,
497 &o, &p);
cec736d2
LP
498 if (r < 0)
499 return r;
500
de190aef 501 memset(o->hash_table.items, 0, s);
cec736d2 502
de190aef
LP
503 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
504 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
505
506 return 0;
507}
508
de190aef 509static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
510 uint64_t s, p;
511 Object *o;
512 int r;
513
514 assert(f);
515
de190aef
LP
516 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
517 r = journal_file_append_object(f,
518 OBJECT_FIELD_HASH_TABLE,
519 offsetof(Object, hash_table.items) + s,
520 &o, &p);
cec736d2
LP
521 if (r < 0)
522 return r;
523
de190aef 524 memset(o->hash_table.items, 0, s);
cec736d2 525
de190aef
LP
526 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
527 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
528
529 return 0;
530}
531
de190aef 532static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
533 uint64_t s, p;
534 void *t;
535 int r;
536
537 assert(f);
538
de190aef
LP
539 p = le64toh(f->header->data_hash_table_offset);
540 s = le64toh(f->header->data_hash_table_size);
cec736d2 541
de190aef 542 r = journal_file_move_to(f,
16e9f408 543 OBJECT_DATA_HASH_TABLE,
de190aef
LP
544 p, s,
545 &t);
cec736d2
LP
546 if (r < 0)
547 return r;
548
de190aef 549 f->data_hash_table = t;
cec736d2
LP
550 return 0;
551}
552
de190aef 553static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
554 uint64_t s, p;
555 void *t;
556 int r;
557
558 assert(f);
559
de190aef
LP
560 p = le64toh(f->header->field_hash_table_offset);
561 s = le64toh(f->header->field_hash_table_size);
cec736d2 562
de190aef 563 r = journal_file_move_to(f,
16e9f408 564 OBJECT_FIELD_HASH_TABLE,
de190aef
LP
565 p, s,
566 &t);
cec736d2
LP
567 if (r < 0)
568 return r;
569
de190aef 570 f->field_hash_table = t;
cec736d2
LP
571 return 0;
572}
573
de190aef
LP
574static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
575 uint64_t p, h;
cec736d2
LP
576 int r;
577
578 assert(f);
579 assert(o);
580 assert(offset > 0);
de190aef 581 assert(o->object.type == OBJECT_DATA);
cec736d2 582
48496df6
LP
583 /* This might alter the window we are looking at */
584
de190aef
LP
585 o->data.next_hash_offset = o->data.next_field_offset = 0;
586 o->data.entry_offset = o->data.entry_array_offset = 0;
587 o->data.n_entries = 0;
cec736d2 588
de190aef 589 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
8db4213e 590 p = le64toh(f->data_hash_table[h].tail_hash_offset);
cec736d2
LP
591 if (p == 0) {
592 /* Only entry in the hash table is easy */
de190aef 593 f->data_hash_table[h].head_hash_offset = htole64(offset);
cec736d2 594 } else {
48496df6
LP
595 /* Move back to the previous data object, to patch in
596 * pointer */
cec736d2 597
de190aef 598 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
599 if (r < 0)
600 return r;
601
de190aef 602 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
603 }
604
de190aef 605 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2 606
dca6219e
LP
607 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
608 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
609
cec736d2
LP
610 return 0;
611}
612
de190aef
LP
613int journal_file_find_data_object_with_hash(
614 JournalFile *f,
615 const void *data, uint64_t size, uint64_t hash,
616 Object **ret, uint64_t *offset) {
48496df6 617
de190aef 618 uint64_t p, osize, h;
cec736d2
LP
619 int r;
620
621 assert(f);
622 assert(data || size == 0);
623
624 osize = offsetof(Object, data.payload) + size;
625
bc85bfee
LP
626 if (f->header->data_hash_table_size == 0)
627 return -EBADMSG;
628
de190aef
LP
629 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
630 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 631
de190aef
LP
632 while (p > 0) {
633 Object *o;
cec736d2 634
de190aef 635 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
636 if (r < 0)
637 return r;
638
807e17f0 639 if (le64toh(o->data.hash) != hash)
85a131e8 640 goto next;
807e17f0
LP
641
642 if (o->object.flags & OBJECT_COMPRESSED) {
643#ifdef HAVE_XZ
b785c858 644 uint64_t l, rsize;
cec736d2 645
807e17f0
LP
646 l = le64toh(o->object.size);
647 if (l <= offsetof(Object, data.payload))
cec736d2
LP
648 return -EBADMSG;
649
807e17f0
LP
650 l -= offsetof(Object, data.payload);
651
652 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
653 return -EBADMSG;
654
b785c858 655 if (rsize == size &&
807e17f0
LP
656 memcmp(f->compress_buffer, data, size) == 0) {
657
658 if (ret)
659 *ret = o;
660
661 if (offset)
662 *offset = p;
663
664 return 1;
665 }
666#else
667 return -EPROTONOSUPPORT;
668#endif
669
670 } else if (le64toh(o->object.size) == osize &&
671 memcmp(o->data.payload, data, size) == 0) {
672
cec736d2
LP
673 if (ret)
674 *ret = o;
675
676 if (offset)
677 *offset = p;
678
de190aef 679 return 1;
cec736d2
LP
680 }
681
85a131e8 682 next:
cec736d2
LP
683 p = le64toh(o->data.next_hash_offset);
684 }
685
de190aef
LP
686 return 0;
687}
688
689int journal_file_find_data_object(
690 JournalFile *f,
691 const void *data, uint64_t size,
692 Object **ret, uint64_t *offset) {
693
694 uint64_t hash;
695
696 assert(f);
697 assert(data || size == 0);
698
699 hash = hash64(data, size);
700
701 return journal_file_find_data_object_with_hash(f,
702 data, size, hash,
703 ret, offset);
704}
705
48496df6
LP
706static int journal_file_append_data(
707 JournalFile *f,
708 const void *data, uint64_t size,
709 Object **ret, uint64_t *offset) {
710
de190aef
LP
711 uint64_t hash, p;
712 uint64_t osize;
713 Object *o;
714 int r;
807e17f0 715 bool compressed = false;
de190aef
LP
716
717 assert(f);
718 assert(data || size == 0);
719
720 hash = hash64(data, size);
721
722 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
723 if (r < 0)
724 return r;
725 else if (r > 0) {
726
727 if (ret)
728 *ret = o;
729
730 if (offset)
731 *offset = p;
732
733 return 0;
734 }
735
736 osize = offsetof(Object, data.payload) + size;
737 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
738 if (r < 0)
739 return r;
740
cec736d2 741 o->data.hash = htole64(hash);
807e17f0
LP
742
743#ifdef HAVE_XZ
744 if (f->compress &&
745 size >= COMPRESSION_SIZE_THRESHOLD) {
746 uint64_t rsize;
747
748 compressed = compress_blob(data, size, o->data.payload, &rsize);
749
750 if (compressed) {
751 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
752 o->object.flags |= OBJECT_COMPRESSED;
753
807e17f0
LP
754 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
755 }
756 }
757#endif
758
64825d3c 759 if (!compressed && size > 0)
807e17f0 760 memcpy(o->data.payload, data, size);
cec736d2 761
de190aef 762 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
763 if (r < 0)
764 return r;
765
b0af6f41
LP
766 r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
767 if (r < 0)
768 return r;
769
48496df6
LP
770 /* The linking might have altered the window, so let's
771 * refresh our pointer */
772 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
773 if (r < 0)
774 return r;
775
cec736d2
LP
776 if (ret)
777 *ret = o;
778
779 if (offset)
de190aef 780 *offset = p;
cec736d2
LP
781
782 return 0;
783}
784
785uint64_t journal_file_entry_n_items(Object *o) {
786 assert(o);
7be3aa17 787 assert(o->object.type == OBJECT_ENTRY);
cec736d2
LP
788
789 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
790}
791
0284adc6 792uint64_t journal_file_entry_array_n_items(Object *o) {
de190aef 793 assert(o);
7be3aa17 794 assert(o->object.type == OBJECT_ENTRY_ARRAY);
de190aef
LP
795
796 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
797}
798
799static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
800 le64_t *first,
801 le64_t *idx,
de190aef 802 uint64_t p) {
cec736d2 803 int r;
de190aef
LP
804 uint64_t n = 0, ap = 0, q, i, a, hidx;
805 Object *o;
806
cec736d2 807 assert(f);
de190aef
LP
808 assert(first);
809 assert(idx);
810 assert(p > 0);
cec736d2 811
de190aef
LP
812 a = le64toh(*first);
813 i = hidx = le64toh(*idx);
814 while (a > 0) {
815
816 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
817 if (r < 0)
818 return r;
cec736d2 819
de190aef
LP
820 n = journal_file_entry_array_n_items(o);
821 if (i < n) {
822 o->entry_array.items[i] = htole64(p);
823 *idx = htole64(hidx + 1);
824 return 0;
825 }
cec736d2 826
de190aef
LP
827 i -= n;
828 ap = a;
829 a = le64toh(o->entry_array.next_entry_array_offset);
830 }
831
832 if (hidx > n)
833 n = (hidx+1) * 2;
834 else
835 n = n * 2;
836
837 if (n < 4)
838 n = 4;
839
840 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
841 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
842 &o, &q);
cec736d2
LP
843 if (r < 0)
844 return r;
845
b0af6f41
LP
846 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
847 if (r < 0)
848 return r;
849
de190aef 850 o->entry_array.items[i] = htole64(p);
cec736d2 851
de190aef 852 if (ap == 0)
7be3aa17 853 *first = htole64(q);
cec736d2 854 else {
de190aef 855 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
856 if (r < 0)
857 return r;
858
de190aef
LP
859 o->entry_array.next_entry_array_offset = htole64(q);
860 }
cec736d2 861
2dee23eb
LP
862 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
863 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
864
de190aef
LP
865 *idx = htole64(hidx + 1);
866
867 return 0;
868}
cec736d2 869
de190aef 870static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
871 le64_t *extra,
872 le64_t *first,
873 le64_t *idx,
de190aef
LP
874 uint64_t p) {
875
876 int r;
877
878 assert(f);
879 assert(extra);
880 assert(first);
881 assert(idx);
882 assert(p > 0);
883
884 if (*idx == 0)
885 *extra = htole64(p);
886 else {
4fd052ae 887 le64_t i;
de190aef 888
7be3aa17 889 i = htole64(le64toh(*idx) - 1);
de190aef
LP
890 r = link_entry_into_array(f, first, &i, p);
891 if (r < 0)
892 return r;
cec736d2
LP
893 }
894
de190aef
LP
895 *idx = htole64(le64toh(*idx) + 1);
896 return 0;
897}
898
899static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
900 uint64_t p;
901 int r;
902 assert(f);
903 assert(o);
904 assert(offset > 0);
905
906 p = le64toh(o->entry.items[i].object_offset);
907 if (p == 0)
908 return -EINVAL;
909
910 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
911 if (r < 0)
912 return r;
913
de190aef
LP
914 return link_entry_into_array_plus_one(f,
915 &o->data.entry_offset,
916 &o->data.entry_array_offset,
917 &o->data.n_entries,
918 offset);
cec736d2
LP
919}
920
921static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 922 uint64_t n, i;
cec736d2
LP
923 int r;
924
925 assert(f);
926 assert(o);
927 assert(offset > 0);
de190aef 928 assert(o->object.type == OBJECT_ENTRY);
cec736d2 929
b788cc23
LP
930 __sync_synchronize();
931
cec736d2 932 /* Link up the entry itself */
de190aef
LP
933 r = link_entry_into_array(f,
934 &f->header->entry_array_offset,
935 &f->header->n_entries,
936 offset);
937 if (r < 0)
938 return r;
cec736d2 939
aaf53376 940 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
cec736d2 941
de190aef 942 if (f->header->head_entry_realtime == 0)
0ac38b70 943 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 944
0ac38b70 945 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
946 f->header->tail_entry_monotonic = o->entry.monotonic;
947
948 f->tail_entry_monotonic_valid = true;
cec736d2
LP
949
950 /* Link up the items */
951 n = journal_file_entry_n_items(o);
952 for (i = 0; i < n; i++) {
953 r = journal_file_link_entry_item(f, o, offset, i);
954 if (r < 0)
955 return r;
956 }
957
cec736d2
LP
958 return 0;
959}
960
961static int journal_file_append_entry_internal(
962 JournalFile *f,
963 const dual_timestamp *ts,
964 uint64_t xor_hash,
965 const EntryItem items[], unsigned n_items,
de190aef 966 uint64_t *seqnum,
cec736d2
LP
967 Object **ret, uint64_t *offset) {
968 uint64_t np;
969 uint64_t osize;
970 Object *o;
971 int r;
972
973 assert(f);
974 assert(items || n_items == 0);
de190aef 975 assert(ts);
cec736d2
LP
976
977 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
978
de190aef 979 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
980 if (r < 0)
981 return r;
982
d98cc1f2 983 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
cec736d2 984 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
985 o->entry.realtime = htole64(ts->realtime);
986 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
987 o->entry.xor_hash = htole64(xor_hash);
988 o->entry.boot_id = f->header->boot_id;
989
b0af6f41
LP
990 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
991 if (r < 0)
992 return r;
993
cec736d2
LP
994 r = journal_file_link_entry(f, o, np);
995 if (r < 0)
996 return r;
997
998 if (ret)
999 *ret = o;
1000
1001 if (offset)
1002 *offset = np;
1003
1004 return 0;
1005}
1006
cf244689 1007void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
1008 assert(f);
1009
1010 /* inotify() does not receive IN_MODIFY events from file
1011 * accesses done via mmap(). After each access we hence
1012 * trigger IN_MODIFY by truncating the journal file to its
1013 * current size which triggers IN_MODIFY. */
1014
bc85bfee
LP
1015 __sync_synchronize();
1016
50f20cfd
LP
1017 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1018 log_error("Failed to to truncate file to its own size: %m");
1019}
1020
de190aef 1021int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1022 unsigned i;
1023 EntryItem *items;
1024 int r;
1025 uint64_t xor_hash = 0;
de190aef 1026 struct dual_timestamp _ts;
cec736d2
LP
1027
1028 assert(f);
1029 assert(iovec || n_iovec == 0);
1030
de190aef
LP
1031 if (!f->writable)
1032 return -EPERM;
1033
1034 if (!ts) {
1035 dual_timestamp_get(&_ts);
1036 ts = &_ts;
1037 }
1038
1039 if (f->tail_entry_monotonic_valid &&
1040 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1041 return -EINVAL;
1042
7560fffc
LP
1043 r = journal_file_maybe_append_tag(f, ts->realtime);
1044 if (r < 0)
1045 return r;
1046
64825d3c
LP
1047 /* alloca() can't take 0, hence let's allocate at least one */
1048 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
cec736d2
LP
1049
1050 for (i = 0; i < n_iovec; i++) {
1051 uint64_t p;
1052 Object *o;
1053
1054 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1055 if (r < 0)
cf244689 1056 return r;
cec736d2
LP
1057
1058 xor_hash ^= le64toh(o->data.hash);
1059 items[i].object_offset = htole64(p);
de7b95cd 1060 items[i].hash = o->data.hash;
cec736d2
LP
1061 }
1062
de190aef 1063 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1064
50f20cfd
LP
1065 journal_file_post_change(f);
1066
cec736d2
LP
1067 return r;
1068}
1069
de190aef
LP
1070static int generic_array_get(JournalFile *f,
1071 uint64_t first,
1072 uint64_t i,
1073 Object **ret, uint64_t *offset) {
1074
cec736d2 1075 Object *o;
6c8a39b8 1076 uint64_t p = 0, a;
cec736d2
LP
1077 int r;
1078
1079 assert(f);
1080
de190aef
LP
1081 a = first;
1082 while (a > 0) {
1083 uint64_t n;
cec736d2 1084
de190aef
LP
1085 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1086 if (r < 0)
1087 return r;
cec736d2 1088
de190aef
LP
1089 n = journal_file_entry_array_n_items(o);
1090 if (i < n) {
1091 p = le64toh(o->entry_array.items[i]);
1092 break;
cec736d2
LP
1093 }
1094
de190aef
LP
1095 i -= n;
1096 a = le64toh(o->entry_array.next_entry_array_offset);
1097 }
1098
1099 if (a <= 0 || p <= 0)
1100 return 0;
1101
1102 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1103 if (r < 0)
1104 return r;
1105
1106 if (ret)
1107 *ret = o;
1108
1109 if (offset)
1110 *offset = p;
1111
1112 return 1;
1113}
1114
1115static int generic_array_get_plus_one(JournalFile *f,
1116 uint64_t extra,
1117 uint64_t first,
1118 uint64_t i,
1119 Object **ret, uint64_t *offset) {
1120
1121 Object *o;
1122
1123 assert(f);
1124
1125 if (i == 0) {
1126 int r;
1127
1128 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1129 if (r < 0)
1130 return r;
1131
de190aef
LP
1132 if (ret)
1133 *ret = o;
cec736d2 1134
de190aef
LP
1135 if (offset)
1136 *offset = extra;
cec736d2 1137
de190aef 1138 return 1;
cec736d2
LP
1139 }
1140
de190aef
LP
1141 return generic_array_get(f, first, i-1, ret, offset);
1142}
cec736d2 1143
de190aef
LP
1144enum {
1145 TEST_FOUND,
1146 TEST_LEFT,
1147 TEST_RIGHT
1148};
cec736d2 1149
de190aef
LP
1150static int generic_array_bisect(JournalFile *f,
1151 uint64_t first,
1152 uint64_t n,
1153 uint64_t needle,
1154 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1155 direction_t direction,
1156 Object **ret,
1157 uint64_t *offset,
1158 uint64_t *idx) {
1159
1160 uint64_t a, p, t = 0, i = 0, last_p = 0;
1161 bool subtract_one = false;
1162 Object *o, *array = NULL;
1163 int r;
cec736d2 1164
de190aef
LP
1165 assert(f);
1166 assert(test_object);
cec736d2 1167
de190aef
LP
1168 a = first;
1169 while (a > 0) {
1170 uint64_t left, right, k, lp;
1171
1172 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1173 if (r < 0)
1174 return r;
1175
de190aef
LP
1176 k = journal_file_entry_array_n_items(array);
1177 right = MIN(k, n);
1178 if (right <= 0)
1179 return 0;
cec736d2 1180
de190aef
LP
1181 i = right - 1;
1182 lp = p = le64toh(array->entry_array.items[i]);
1183 if (p <= 0)
1184 return -EBADMSG;
cec736d2 1185
de190aef
LP
1186 r = test_object(f, p, needle);
1187 if (r < 0)
1188 return r;
cec736d2 1189
de190aef
LP
1190 if (r == TEST_FOUND)
1191 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1192
1193 if (r == TEST_RIGHT) {
1194 left = 0;
1195 right -= 1;
1196 for (;;) {
1197 if (left == right) {
1198 if (direction == DIRECTION_UP)
1199 subtract_one = true;
1200
1201 i = left;
1202 goto found;
1203 }
1204
1205 assert(left < right);
1206
1207 i = (left + right) / 2;
1208 p = le64toh(array->entry_array.items[i]);
1209 if (p <= 0)
1210 return -EBADMSG;
1211
1212 r = test_object(f, p, needle);
1213 if (r < 0)
1214 return r;
cec736d2 1215
de190aef
LP
1216 if (r == TEST_FOUND)
1217 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1218
1219 if (r == TEST_RIGHT)
1220 right = i;
1221 else
1222 left = i + 1;
1223 }
1224 }
1225
cbdca852
LP
1226 if (k > n) {
1227 if (direction == DIRECTION_UP) {
1228 i = n;
1229 subtract_one = true;
1230 goto found;
1231 }
1232
cec736d2 1233 return 0;
cbdca852 1234 }
cec736d2 1235
de190aef
LP
1236 last_p = lp;
1237
1238 n -= k;
1239 t += k;
1240 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1241 }
1242
1243 return 0;
de190aef
LP
1244
1245found:
1246 if (subtract_one && t == 0 && i == 0)
1247 return 0;
1248
1249 if (subtract_one && i == 0)
1250 p = last_p;
1251 else if (subtract_one)
1252 p = le64toh(array->entry_array.items[i-1]);
1253 else
1254 p = le64toh(array->entry_array.items[i]);
1255
1256 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1257 if (r < 0)
1258 return r;
1259
1260 if (ret)
1261 *ret = o;
1262
1263 if (offset)
1264 *offset = p;
1265
1266 if (idx)
cbdca852 1267 *idx = t + i + (subtract_one ? -1 : 0);
de190aef
LP
1268
1269 return 1;
cec736d2
LP
1270}
1271
de190aef
LP
1272static int generic_array_bisect_plus_one(JournalFile *f,
1273 uint64_t extra,
1274 uint64_t first,
1275 uint64_t n,
1276 uint64_t needle,
1277 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1278 direction_t direction,
1279 Object **ret,
1280 uint64_t *offset,
1281 uint64_t *idx) {
1282
cec736d2 1283 int r;
cbdca852
LP
1284 bool step_back = false;
1285 Object *o;
cec736d2
LP
1286
1287 assert(f);
de190aef 1288 assert(test_object);
cec736d2 1289
de190aef
LP
1290 if (n <= 0)
1291 return 0;
cec736d2 1292
de190aef
LP
1293 /* This bisects the array in object 'first', but first checks
1294 * an extra */
de190aef
LP
1295 r = test_object(f, extra, needle);
1296 if (r < 0)
1297 return r;
a536e261
LP
1298
1299 if (r == TEST_FOUND)
1300 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1301
cbdca852
LP
1302 /* if we are looking with DIRECTION_UP then we need to first
1303 see if in the actual array there is a matching entry, and
1304 return the last one of that. But if there isn't any we need
1305 to return this one. Hence remember this, and return it
1306 below. */
1307 if (r == TEST_LEFT)
1308 step_back = direction == DIRECTION_UP;
de190aef 1309
cbdca852
LP
1310 if (r == TEST_RIGHT) {
1311 if (direction == DIRECTION_DOWN)
1312 goto found;
1313 else
1314 return 0;
a536e261 1315 }
cec736d2 1316
de190aef
LP
1317 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1318
cbdca852
LP
1319 if (r == 0 && step_back)
1320 goto found;
1321
ecf68b1d 1322 if (r > 0 && idx)
de190aef
LP
1323 (*idx) ++;
1324
1325 return r;
cbdca852
LP
1326
1327found:
1328 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1329 if (r < 0)
1330 return r;
1331
1332 if (ret)
1333 *ret = o;
1334
1335 if (offset)
1336 *offset = extra;
1337
1338 if (idx)
1339 *idx = 0;
1340
1341 return 1;
1342}
1343
1344static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1345 assert(f);
1346 assert(p > 0);
1347
1348 if (p == needle)
1349 return TEST_FOUND;
1350 else if (p < needle)
1351 return TEST_LEFT;
1352 else
1353 return TEST_RIGHT;
1354}
1355
1356int journal_file_move_to_entry_by_offset(
1357 JournalFile *f,
1358 uint64_t p,
1359 direction_t direction,
1360 Object **ret,
1361 uint64_t *offset) {
1362
1363 return generic_array_bisect(f,
1364 le64toh(f->header->entry_array_offset),
1365 le64toh(f->header->n_entries),
1366 p,
1367 test_object_offset,
1368 direction,
1369 ret, offset, NULL);
de190aef
LP
1370}
1371
cbdca852 1372
de190aef
LP
1373static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1374 Object *o;
1375 int r;
1376
1377 assert(f);
1378 assert(p > 0);
1379
1380 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1381 if (r < 0)
1382 return r;
1383
de190aef
LP
1384 if (le64toh(o->entry.seqnum) == needle)
1385 return TEST_FOUND;
1386 else if (le64toh(o->entry.seqnum) < needle)
1387 return TEST_LEFT;
1388 else
1389 return TEST_RIGHT;
1390}
cec736d2 1391
de190aef
LP
1392int journal_file_move_to_entry_by_seqnum(
1393 JournalFile *f,
1394 uint64_t seqnum,
1395 direction_t direction,
1396 Object **ret,
1397 uint64_t *offset) {
1398
1399 return generic_array_bisect(f,
1400 le64toh(f->header->entry_array_offset),
1401 le64toh(f->header->n_entries),
1402 seqnum,
1403 test_object_seqnum,
1404 direction,
1405 ret, offset, NULL);
1406}
cec736d2 1407
de190aef
LP
1408static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1409 Object *o;
1410 int r;
1411
1412 assert(f);
1413 assert(p > 0);
1414
1415 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1416 if (r < 0)
1417 return r;
1418
1419 if (le64toh(o->entry.realtime) == needle)
1420 return TEST_FOUND;
1421 else if (le64toh(o->entry.realtime) < needle)
1422 return TEST_LEFT;
1423 else
1424 return TEST_RIGHT;
cec736d2
LP
1425}
1426
de190aef
LP
1427int journal_file_move_to_entry_by_realtime(
1428 JournalFile *f,
1429 uint64_t realtime,
1430 direction_t direction,
1431 Object **ret,
1432 uint64_t *offset) {
1433
1434 return generic_array_bisect(f,
1435 le64toh(f->header->entry_array_offset),
1436 le64toh(f->header->n_entries),
1437 realtime,
1438 test_object_realtime,
1439 direction,
1440 ret, offset, NULL);
1441}
1442
1443static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1444 Object *o;
1445 int r;
1446
1447 assert(f);
1448 assert(p > 0);
1449
1450 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1451 if (r < 0)
1452 return r;
1453
1454 if (le64toh(o->entry.monotonic) == needle)
1455 return TEST_FOUND;
1456 else if (le64toh(o->entry.monotonic) < needle)
1457 return TEST_LEFT;
1458 else
1459 return TEST_RIGHT;
1460}
1461
1462int journal_file_move_to_entry_by_monotonic(
1463 JournalFile *f,
1464 sd_id128_t boot_id,
1465 uint64_t monotonic,
1466 direction_t direction,
1467 Object **ret,
1468 uint64_t *offset) {
1469
10b6f904 1470 char t[9+32+1] = "_BOOT_ID=";
de190aef
LP
1471 Object *o;
1472 int r;
1473
cbdca852 1474 assert(f);
de190aef 1475
cbdca852 1476 sd_id128_to_string(boot_id, t + 9);
de190aef
LP
1477 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1478 if (r < 0)
1479 return r;
cbdca852 1480 if (r == 0)
de190aef
LP
1481 return -ENOENT;
1482
1483 return generic_array_bisect_plus_one(f,
1484 le64toh(o->data.entry_offset),
1485 le64toh(o->data.entry_array_offset),
1486 le64toh(o->data.n_entries),
1487 monotonic,
1488 test_object_monotonic,
1489 direction,
1490 ret, offset, NULL);
1491}
1492
de190aef
LP
1493int journal_file_next_entry(
1494 JournalFile *f,
1495 Object *o, uint64_t p,
1496 direction_t direction,
1497 Object **ret, uint64_t *offset) {
1498
1499 uint64_t i, n;
cec736d2
LP
1500 int r;
1501
1502 assert(f);
de190aef
LP
1503 assert(p > 0 || !o);
1504
1505 n = le64toh(f->header->n_entries);
1506 if (n <= 0)
1507 return 0;
cec736d2
LP
1508
1509 if (!o)
de190aef 1510 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1511 else {
de190aef 1512 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1513 return -EINVAL;
1514
de190aef
LP
1515 r = generic_array_bisect(f,
1516 le64toh(f->header->entry_array_offset),
1517 le64toh(f->header->n_entries),
1518 p,
1519 test_object_offset,
1520 DIRECTION_DOWN,
1521 NULL, NULL,
1522 &i);
1523 if (r <= 0)
1524 return r;
1525
1526 if (direction == DIRECTION_DOWN) {
1527 if (i >= n - 1)
1528 return 0;
1529
1530 i++;
1531 } else {
1532 if (i <= 0)
1533 return 0;
1534
1535 i--;
1536 }
cec736d2
LP
1537 }
1538
de190aef
LP
1539 /* And jump to it */
1540 return generic_array_get(f,
1541 le64toh(f->header->entry_array_offset),
1542 i,
1543 ret, offset);
1544}
cec736d2 1545
de190aef
LP
1546int journal_file_skip_entry(
1547 JournalFile *f,
1548 Object *o, uint64_t p,
1549 int64_t skip,
1550 Object **ret, uint64_t *offset) {
1551
1552 uint64_t i, n;
1553 int r;
1554
1555 assert(f);
1556 assert(o);
1557 assert(p > 0);
1558
1559 if (o->object.type != OBJECT_ENTRY)
1560 return -EINVAL;
1561
1562 r = generic_array_bisect(f,
1563 le64toh(f->header->entry_array_offset),
1564 le64toh(f->header->n_entries),
1565 p,
1566 test_object_offset,
1567 DIRECTION_DOWN,
1568 NULL, NULL,
1569 &i);
1570 if (r <= 0)
cec736d2
LP
1571 return r;
1572
de190aef
LP
1573 /* Calculate new index */
1574 if (skip < 0) {
1575 if ((uint64_t) -skip >= i)
1576 i = 0;
1577 else
1578 i = i - (uint64_t) -skip;
1579 } else
1580 i += (uint64_t) skip;
cec736d2 1581
de190aef
LP
1582 n = le64toh(f->header->n_entries);
1583 if (n <= 0)
1584 return -EBADMSG;
cec736d2 1585
de190aef
LP
1586 if (i >= n)
1587 i = n-1;
1588
1589 return generic_array_get(f,
1590 le64toh(f->header->entry_array_offset),
1591 i,
1592 ret, offset);
cec736d2
LP
1593}
1594
de190aef
LP
1595int journal_file_next_entry_for_data(
1596 JournalFile *f,
1597 Object *o, uint64_t p,
1598 uint64_t data_offset,
1599 direction_t direction,
1600 Object **ret, uint64_t *offset) {
1601
1602 uint64_t n, i;
cec736d2 1603 int r;
de190aef 1604 Object *d;
cec736d2
LP
1605
1606 assert(f);
de190aef 1607 assert(p > 0 || !o);
cec736d2 1608
de190aef 1609 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 1610 if (r < 0)
de190aef 1611 return r;
cec736d2 1612
de190aef
LP
1613 n = le64toh(d->data.n_entries);
1614 if (n <= 0)
1615 return n;
cec736d2 1616
de190aef
LP
1617 if (!o)
1618 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1619 else {
1620 if (o->object.type != OBJECT_ENTRY)
1621 return -EINVAL;
cec736d2 1622
de190aef
LP
1623 r = generic_array_bisect_plus_one(f,
1624 le64toh(d->data.entry_offset),
1625 le64toh(d->data.entry_array_offset),
1626 le64toh(d->data.n_entries),
1627 p,
1628 test_object_offset,
1629 DIRECTION_DOWN,
1630 NULL, NULL,
1631 &i);
1632
1633 if (r <= 0)
cec736d2
LP
1634 return r;
1635
de190aef
LP
1636 if (direction == DIRECTION_DOWN) {
1637 if (i >= n - 1)
1638 return 0;
cec736d2 1639
de190aef
LP
1640 i++;
1641 } else {
1642 if (i <= 0)
1643 return 0;
cec736d2 1644
de190aef
LP
1645 i--;
1646 }
cec736d2 1647
de190aef 1648 }
cec736d2 1649
de190aef
LP
1650 return generic_array_get_plus_one(f,
1651 le64toh(d->data.entry_offset),
1652 le64toh(d->data.entry_array_offset),
1653 i,
1654 ret, offset);
1655}
cec736d2 1656
cbdca852
LP
1657int journal_file_move_to_entry_by_offset_for_data(
1658 JournalFile *f,
1659 uint64_t data_offset,
1660 uint64_t p,
1661 direction_t direction,
1662 Object **ret, uint64_t *offset) {
1663
1664 int r;
1665 Object *d;
1666
1667 assert(f);
1668
1669 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1670 if (r < 0)
1671 return r;
1672
1673 return generic_array_bisect_plus_one(f,
1674 le64toh(d->data.entry_offset),
1675 le64toh(d->data.entry_array_offset),
1676 le64toh(d->data.n_entries),
1677 p,
1678 test_object_offset,
1679 direction,
1680 ret, offset, NULL);
1681}
1682
1683int journal_file_move_to_entry_by_monotonic_for_data(
1684 JournalFile *f,
1685 uint64_t data_offset,
1686 sd_id128_t boot_id,
1687 uint64_t monotonic,
1688 direction_t direction,
1689 Object **ret, uint64_t *offset) {
1690
1691 char t[9+32+1] = "_BOOT_ID=";
1692 Object *o, *d;
1693 int r;
1694 uint64_t b, z;
1695
1696 assert(f);
1697
1698 /* First, seek by time */
1699 sd_id128_to_string(boot_id, t + 9);
1700 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1701 if (r < 0)
1702 return r;
1703 if (r == 0)
1704 return -ENOENT;
1705
1706 r = generic_array_bisect_plus_one(f,
1707 le64toh(o->data.entry_offset),
1708 le64toh(o->data.entry_array_offset),
1709 le64toh(o->data.n_entries),
1710 monotonic,
1711 test_object_monotonic,
1712 direction,
1713 NULL, &z, NULL);
1714 if (r <= 0)
1715 return r;
1716
1717 /* And now, continue seeking until we find an entry that
1718 * exists in both bisection arrays */
1719
1720 for (;;) {
1721 Object *qo;
1722 uint64_t p, q;
1723
1724 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1725 if (r < 0)
1726 return r;
1727
1728 r = generic_array_bisect_plus_one(f,
1729 le64toh(d->data.entry_offset),
1730 le64toh(d->data.entry_array_offset),
1731 le64toh(d->data.n_entries),
1732 z,
1733 test_object_offset,
1734 direction,
1735 NULL, &p, NULL);
1736 if (r <= 0)
1737 return r;
1738
1739 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1740 if (r < 0)
1741 return r;
1742
1743 r = generic_array_bisect_plus_one(f,
1744 le64toh(o->data.entry_offset),
1745 le64toh(o->data.entry_array_offset),
1746 le64toh(o->data.n_entries),
1747 p,
1748 test_object_offset,
1749 direction,
1750 &qo, &q, NULL);
1751
1752 if (r <= 0)
1753 return r;
1754
1755 if (p == q) {
1756 if (ret)
1757 *ret = qo;
1758 if (offset)
1759 *offset = q;
1760
1761 return 1;
1762 }
1763
1764 z = q;
1765 }
1766
1767 return 0;
1768}
1769
de190aef
LP
1770int journal_file_move_to_entry_by_seqnum_for_data(
1771 JournalFile *f,
1772 uint64_t data_offset,
1773 uint64_t seqnum,
1774 direction_t direction,
1775 Object **ret, uint64_t *offset) {
cec736d2 1776
de190aef
LP
1777 Object *d;
1778 int r;
cec736d2 1779
91a31dde
LP
1780 assert(f);
1781
de190aef 1782 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1783 if (r < 0)
de190aef 1784 return r;
cec736d2 1785
de190aef
LP
1786 return generic_array_bisect_plus_one(f,
1787 le64toh(d->data.entry_offset),
1788 le64toh(d->data.entry_array_offset),
1789 le64toh(d->data.n_entries),
1790 seqnum,
1791 test_object_seqnum,
1792 direction,
1793 ret, offset, NULL);
1794}
cec736d2 1795
de190aef
LP
1796int journal_file_move_to_entry_by_realtime_for_data(
1797 JournalFile *f,
1798 uint64_t data_offset,
1799 uint64_t realtime,
1800 direction_t direction,
1801 Object **ret, uint64_t *offset) {
1802
1803 Object *d;
1804 int r;
1805
91a31dde
LP
1806 assert(f);
1807
de190aef 1808 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1809 if (r < 0)
de190aef
LP
1810 return r;
1811
1812 return generic_array_bisect_plus_one(f,
1813 le64toh(d->data.entry_offset),
1814 le64toh(d->data.entry_array_offset),
1815 le64toh(d->data.n_entries),
1816 realtime,
1817 test_object_realtime,
1818 direction,
1819 ret, offset, NULL);
cec736d2
LP
1820}
1821
0284adc6 1822void journal_file_dump(JournalFile *f) {
7560fffc 1823 Object *o;
7560fffc 1824 int r;
0284adc6 1825 uint64_t p;
7560fffc
LP
1826
1827 assert(f);
1828
0284adc6 1829 journal_file_print_header(f);
7560fffc 1830
0284adc6
LP
1831 p = le64toh(f->header->header_size);
1832 while (p != 0) {
1833 r = journal_file_move_to_object(f, -1, p, &o);
1834 if (r < 0)
1835 goto fail;
7560fffc 1836
0284adc6 1837 switch (o->object.type) {
d98cc1f2 1838
0284adc6
LP
1839 case OBJECT_UNUSED:
1840 printf("Type: OBJECT_UNUSED\n");
1841 break;
d98cc1f2 1842
0284adc6
LP
1843 case OBJECT_DATA:
1844 printf("Type: OBJECT_DATA\n");
1845 break;
7560fffc 1846
0284adc6
LP
1847 case OBJECT_ENTRY:
1848 printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
1849 (unsigned long long) le64toh(o->entry.seqnum),
1850 (unsigned long long) le64toh(o->entry.monotonic),
1851 (unsigned long long) le64toh(o->entry.realtime));
1852 break;
7560fffc 1853
0284adc6
LP
1854 case OBJECT_FIELD_HASH_TABLE:
1855 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1856 break;
7560fffc 1857
0284adc6
LP
1858 case OBJECT_DATA_HASH_TABLE:
1859 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1860 break;
7560fffc 1861
0284adc6
LP
1862 case OBJECT_ENTRY_ARRAY:
1863 printf("Type: OBJECT_ENTRY_ARRAY\n");
1864 break;
7560fffc 1865
0284adc6
LP
1866 case OBJECT_TAG:
1867 printf("Type: OBJECT_TAG %llu\n",
1868 (unsigned long long) le64toh(o->tag.seqnum));
1869 break;
1870 }
7560fffc 1871
0284adc6
LP
1872 if (o->object.flags & OBJECT_COMPRESSED)
1873 printf("Flags: COMPRESSED\n");
7560fffc 1874
0284adc6
LP
1875 if (p == le64toh(f->header->tail_object_offset))
1876 p = 0;
1877 else
1878 p = p + ALIGN64(le64toh(o->object.size));
1879 }
7560fffc 1880
0284adc6
LP
1881 return;
1882fail:
1883 log_error("File corrupt");
7560fffc
LP
1884}
1885
0284adc6
LP
1886void journal_file_print_header(JournalFile *f) {
1887 char a[33], b[33], c[33];
1888 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
7560fffc
LP
1889
1890 assert(f);
7560fffc 1891
0284adc6
LP
1892 printf("File Path: %s\n"
1893 "File ID: %s\n"
1894 "Machine ID: %s\n"
1895 "Boot ID: %s\n"
1896 "Sequential Number ID: %s\n"
1897 "State: %s\n"
1898 "Compatible Flags:%s%s\n"
1899 "Incompatible Flags:%s%s\n"
1900 "Header size: %llu\n"
1901 "Arena size: %llu\n"
1902 "Data Hash Table Size: %llu\n"
1903 "Field Hash Table Size: %llu\n"
0284adc6
LP
1904 "Rotate Suggested: %s\n"
1905 "Head Sequential Number: %llu\n"
1906 "Tail Sequential Number: %llu\n"
1907 "Head Realtime Timestamp: %s\n"
3223f44f
LP
1908 "Tail Realtime Timestamp: %s\n"
1909 "Objects: %llu\n"
1910 "Entry Objects: %llu\n",
0284adc6
LP
1911 f->path,
1912 sd_id128_to_string(f->header->file_id, a),
1913 sd_id128_to_string(f->header->machine_id, b),
1914 sd_id128_to_string(f->header->boot_id, c),
1915 sd_id128_to_string(f->header->seqnum_id, c),
3223f44f
LP
1916 f->header->state == STATE_OFFLINE ? "OFFLINE" :
1917 f->header->state == STATE_ONLINE ? "ONLINE" :
1918 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
baed47c3
LP
1919 (f->header->compatible_flags & HEADER_COMPATIBLE_SEALED) ? " SEALED" : "",
1920 (f->header->compatible_flags & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
0284adc6
LP
1921 (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
1922 (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1923 (unsigned long long) le64toh(f->header->header_size),
1924 (unsigned long long) le64toh(f->header->arena_size),
1925 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1926 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
0284adc6
LP
1927 yes_no(journal_file_rotate_suggested(f)),
1928 (unsigned long long) le64toh(f->header->head_entry_seqnum),
1929 (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1930 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
3223f44f
LP
1931 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1932 (unsigned long long) le64toh(f->header->n_objects),
1933 (unsigned long long) le64toh(f->header->n_entries));
7560fffc 1934
0284adc6
LP
1935 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1936 printf("Data Objects: %llu\n"
1937 "Data Hash Table Fill: %.1f%%\n",
1938 (unsigned long long) le64toh(f->header->n_data),
1939 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
7560fffc 1940
0284adc6
LP
1941 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1942 printf("Field Objects: %llu\n"
1943 "Field Hash Table Fill: %.1f%%\n",
1944 (unsigned long long) le64toh(f->header->n_fields),
1945 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
3223f44f
LP
1946
1947 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
1948 printf("Tag Objects: %llu\n",
1949 (unsigned long long) le64toh(f->header->n_tags));
1950 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1951 printf("Entry Array Objects: %llu\n",
1952 (unsigned long long) le64toh(f->header->n_entry_arrays));
7560fffc
LP
1953}
1954
0284adc6
LP
1955int journal_file_open(
1956 const char *fname,
1957 int flags,
1958 mode_t mode,
1959 bool compress,
baed47c3 1960 bool seal,
0284adc6
LP
1961 JournalMetrics *metrics,
1962 MMapCache *mmap_cache,
1963 JournalFile *template,
1964 JournalFile **ret) {
7560fffc 1965
0284adc6
LP
1966 JournalFile *f;
1967 int r;
1968 bool newly_created = false;
7560fffc 1969
0284adc6 1970 assert(fname);
7560fffc 1971
0284adc6
LP
1972 if ((flags & O_ACCMODE) != O_RDONLY &&
1973 (flags & O_ACCMODE) != O_RDWR)
1974 return -EINVAL;
7560fffc 1975
a0108012
LP
1976 if (!endswith(fname, ".journal") &&
1977 !endswith(fname, ".journal~"))
0284adc6 1978 return -EINVAL;
7560fffc 1979
0284adc6
LP
1980 f = new0(JournalFile, 1);
1981 if (!f)
1982 return -ENOMEM;
7560fffc 1983
0284adc6
LP
1984 f->fd = -1;
1985 f->mode = mode;
7560fffc 1986
0284adc6
LP
1987 f->flags = flags;
1988 f->prot = prot_from_flags(flags);
1989 f->writable = (flags & O_ACCMODE) != O_RDONLY;
1990 f->compress = compress;
baed47c3 1991 f->seal = seal;
7560fffc 1992
0284adc6
LP
1993 if (mmap_cache)
1994 f->mmap = mmap_cache_ref(mmap_cache);
1995 else {
1996 /* One context for each type, plus the zeroth catchall
1997 * context. One fd for the file plus one for each type
1998 * (which we need during verification */
1999 f->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, 1 + _OBJECT_TYPE_MAX);
2000 if (!f->mmap) {
2001 r = -ENOMEM;
2002 goto fail;
2003 }
2004 }
7560fffc 2005
0284adc6
LP
2006 f->path = strdup(fname);
2007 if (!f->path) {
2008 r = -ENOMEM;
2009 goto fail;
2010 }
7560fffc 2011
0284adc6
LP
2012 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2013 if (f->fd < 0) {
2014 r = -errno;
2015 goto fail;
7560fffc 2016 }
7560fffc 2017
0284adc6
LP
2018 if (fstat(f->fd, &f->last_stat) < 0) {
2019 r = -errno;
2020 goto fail;
2021 }
7560fffc 2022
0284adc6
LP
2023 if (f->last_stat.st_size == 0 && f->writable) {
2024 newly_created = true;
7560fffc 2025
0284adc6 2026 /* Try to load the FSPRG state, and if we can't, then
baed47c3
LP
2027 * just don't do sealing */
2028 r = journal_file_fss_load(f);
0284adc6 2029 if (r < 0)
baed47c3 2030 f->seal = false;
7560fffc 2031
0284adc6
LP
2032 r = journal_file_init_header(f, template);
2033 if (r < 0)
2034 goto fail;
7560fffc 2035
0284adc6
LP
2036 if (fstat(f->fd, &f->last_stat) < 0) {
2037 r = -errno;
2038 goto fail;
2039 }
2040 }
7560fffc 2041
0284adc6
LP
2042 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2043 r = -EIO;
2044 goto fail;
2045 }
7560fffc 2046
0284adc6
LP
2047 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2048 if (f->header == MAP_FAILED) {
2049 f->header = NULL;
2050 r = -errno;
2051 goto fail;
2052 }
7560fffc 2053
0284adc6
LP
2054 if (!newly_created) {
2055 r = journal_file_verify_header(f);
2056 if (r < 0)
2057 goto fail;
2058 }
7560fffc 2059
0284adc6 2060 if (!newly_created && f->writable) {
baed47c3 2061 r = journal_file_fss_load(f);
0284adc6
LP
2062 if (r < 0)
2063 goto fail;
2064 }
cec736d2
LP
2065
2066 if (f->writable) {
4a92baf3
LP
2067 if (metrics) {
2068 journal_default_metrics(metrics, f->fd);
2069 f->metrics = *metrics;
2070 } else if (template)
2071 f->metrics = template->metrics;
2072
cec736d2
LP
2073 r = journal_file_refresh_header(f);
2074 if (r < 0)
2075 goto fail;
2076 }
2077
baed47c3 2078 r = journal_file_hmac_setup(f);
14d10188
LP
2079 if (r < 0)
2080 goto fail;
2081
cec736d2 2082 if (newly_created) {
de190aef 2083 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
2084 if (r < 0)
2085 goto fail;
2086
de190aef 2087 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
2088 if (r < 0)
2089 goto fail;
7560fffc
LP
2090
2091 r = journal_file_append_first_tag(f);
2092 if (r < 0)
2093 goto fail;
cec736d2
LP
2094 }
2095
de190aef 2096 r = journal_file_map_field_hash_table(f);
cec736d2
LP
2097 if (r < 0)
2098 goto fail;
2099
de190aef 2100 r = journal_file_map_data_hash_table(f);
cec736d2
LP
2101 if (r < 0)
2102 goto fail;
2103
2104 if (ret)
2105 *ret = f;
2106
2107 return 0;
2108
2109fail:
2110 journal_file_close(f);
2111
2112 return r;
2113}
0ac38b70 2114
baed47c3 2115int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
0ac38b70
LP
2116 char *p;
2117 size_t l;
2118 JournalFile *old_file, *new_file = NULL;
2119 int r;
2120
2121 assert(f);
2122 assert(*f);
2123
2124 old_file = *f;
2125
2126 if (!old_file->writable)
2127 return -EINVAL;
2128
2129 if (!endswith(old_file->path, ".journal"))
2130 return -EINVAL;
2131
2132 l = strlen(old_file->path);
2133
9447a7f1 2134 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
0ac38b70
LP
2135 if (!p)
2136 return -ENOMEM;
2137
2138 memcpy(p, old_file->path, l - 8);
2139 p[l-8] = '@';
2140 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2141 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2142 "-%016llx-%016llx.journal",
beec0085 2143 (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
0ac38b70
LP
2144 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2145
2146 r = rename(old_file->path, p);
2147 free(p);
2148
2149 if (r < 0)
2150 return -errno;
2151
ccdbaf91 2152 old_file->header->state = STATE_ARCHIVED;
0ac38b70 2153
baed47c3 2154 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
0ac38b70
LP
2155 journal_file_close(old_file);
2156
2157 *f = new_file;
2158 return r;
2159}
2160
9447a7f1
LP
2161int journal_file_open_reliably(
2162 const char *fname,
2163 int flags,
2164 mode_t mode,
7560fffc 2165 bool compress,
baed47c3 2166 bool seal,
4a92baf3 2167 JournalMetrics *metrics,
27370278 2168 MMapCache *mmap_cache,
9447a7f1
LP
2169 JournalFile *template,
2170 JournalFile **ret) {
2171
2172 int r;
2173 size_t l;
2174 char *p;
2175
baed47c3 2176 r = journal_file_open(fname, flags, mode, compress, seal,
27370278 2177 metrics, mmap_cache, template, ret);
0071d9f1
LP
2178 if (r != -EBADMSG && /* corrupted */
2179 r != -ENODATA && /* truncated */
2180 r != -EHOSTDOWN && /* other machine */
a1a1898f
LP
2181 r != -EPROTONOSUPPORT && /* incompatible feature */
2182 r != -EBUSY && /* unclean shutdown */
2183 r != -ESHUTDOWN /* already archived */)
9447a7f1
LP
2184 return r;
2185
2186 if ((flags & O_ACCMODE) == O_RDONLY)
2187 return r;
2188
2189 if (!(flags & O_CREAT))
2190 return r;
2191
7560fffc
LP
2192 if (!endswith(fname, ".journal"))
2193 return r;
2194
5c70eab4
LP
2195 /* The file is corrupted. Rotate it away and try it again (but only once) */
2196
9447a7f1
LP
2197 l = strlen(fname);
2198 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2199 (int) (l-8), fname,
2200 (unsigned long long) now(CLOCK_REALTIME),
2201 random_ull()) < 0)
2202 return -ENOMEM;
2203
2204 r = rename(fname, p);
2205 free(p);
2206 if (r < 0)
2207 return -errno;
2208
a1a1898f 2209 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
9447a7f1 2210
baed47c3 2211 return journal_file_open(fname, flags, mode, compress, seal,
27370278 2212 metrics, mmap_cache, template, ret);
9447a7f1
LP
2213}
2214
cf244689
LP
2215
2216int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2217 uint64_t i, n;
2218 uint64_t q, xor_hash = 0;
2219 int r;
2220 EntryItem *items;
2221 dual_timestamp ts;
2222
2223 assert(from);
2224 assert(to);
2225 assert(o);
2226 assert(p);
2227
2228 if (!to->writable)
2229 return -EPERM;
2230
2231 ts.monotonic = le64toh(o->entry.monotonic);
2232 ts.realtime = le64toh(o->entry.realtime);
2233
2234 if (to->tail_entry_monotonic_valid &&
2235 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2236 return -EINVAL;
2237
cf244689
LP
2238 n = journal_file_entry_n_items(o);
2239 items = alloca(sizeof(EntryItem) * n);
2240
2241 for (i = 0; i < n; i++) {
4fd052ae
FC
2242 uint64_t l, h;
2243 le64_t le_hash;
cf244689
LP
2244 size_t t;
2245 void *data;
2246 Object *u;
2247
2248 q = le64toh(o->entry.items[i].object_offset);
2249 le_hash = o->entry.items[i].hash;
2250
2251 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2252 if (r < 0)
2253 return r;
2254
2255 if (le_hash != o->data.hash)
2256 return -EBADMSG;
2257
2258 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2259 t = (size_t) l;
2260
2261 /* We hit the limit on 32bit machines */
2262 if ((uint64_t) t != l)
2263 return -E2BIG;
2264
2265 if (o->object.flags & OBJECT_COMPRESSED) {
2266#ifdef HAVE_XZ
2267 uint64_t rsize;
2268
2269 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2270 return -EBADMSG;
2271
2272 data = from->compress_buffer;
2273 l = rsize;
2274#else
2275 return -EPROTONOSUPPORT;
2276#endif
2277 } else
2278 data = o->data.payload;
2279
2280 r = journal_file_append_data(to, data, l, &u, &h);
2281 if (r < 0)
2282 return r;
2283
2284 xor_hash ^= le64toh(u->data.hash);
2285 items[i].object_offset = htole64(h);
2286 items[i].hash = u->data.hash;
2287
2288 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2289 if (r < 0)
2290 return r;
2291 }
2292
2293 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2294}
babfc091
LP
2295
2296void journal_default_metrics(JournalMetrics *m, int fd) {
2297 uint64_t fs_size = 0;
2298 struct statvfs ss;
a7bc2c2a 2299 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2300
2301 assert(m);
2302 assert(fd >= 0);
2303
2304 if (fstatvfs(fd, &ss) >= 0)
2305 fs_size = ss.f_frsize * ss.f_blocks;
2306
2307 if (m->max_use == (uint64_t) -1) {
2308
2309 if (fs_size > 0) {
2310 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2311
2312 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2313 m->max_use = DEFAULT_MAX_USE_UPPER;
2314
2315 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2316 m->max_use = DEFAULT_MAX_USE_LOWER;
2317 } else
2318 m->max_use = DEFAULT_MAX_USE_LOWER;
2319 } else {
2320 m->max_use = PAGE_ALIGN(m->max_use);
2321
2322 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2323 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2324 }
2325
2326 if (m->max_size == (uint64_t) -1) {
2327 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2328
2329 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2330 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2331 } else
2332 m->max_size = PAGE_ALIGN(m->max_size);
2333
2334 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2335 m->max_size = JOURNAL_FILE_SIZE_MIN;
2336
2337 if (m->max_size*2 > m->max_use)
2338 m->max_use = m->max_size*2;
2339
2340 if (m->min_size == (uint64_t) -1)
2341 m->min_size = JOURNAL_FILE_SIZE_MIN;
2342 else {
2343 m->min_size = PAGE_ALIGN(m->min_size);
2344
2345 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2346 m->min_size = JOURNAL_FILE_SIZE_MIN;
2347
2348 if (m->min_size > m->max_size)
2349 m->max_size = m->min_size;
2350 }
2351
2352 if (m->keep_free == (uint64_t) -1) {
2353
2354 if (fs_size > 0) {
2355 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2356
2357 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2358 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2359
2360 } else
2361 m->keep_free = DEFAULT_KEEP_FREE;
2362 }
2363
e7bf07b3
LP
2364 log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2365 format_bytes(a, sizeof(a), m->max_use),
2366 format_bytes(b, sizeof(b), m->max_size),
2367 format_bytes(c, sizeof(c), m->min_size),
2368 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2369}
08984293
LP
2370
2371int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
08984293
LP
2372 assert(f);
2373 assert(from || to);
2374
2375 if (from) {
162566a4
LP
2376 if (f->header->head_entry_realtime == 0)
2377 return -ENOENT;
08984293 2378
162566a4 2379 *from = le64toh(f->header->head_entry_realtime);
08984293
LP
2380 }
2381
2382 if (to) {
162566a4
LP
2383 if (f->header->tail_entry_realtime == 0)
2384 return -ENOENT;
08984293 2385
162566a4 2386 *to = le64toh(f->header->tail_entry_realtime);
08984293
LP
2387 }
2388
2389 return 1;
2390}
2391
2392int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2393 char t[9+32+1] = "_BOOT_ID=";
2394 Object *o;
2395 uint64_t p;
2396 int r;
2397
2398 assert(f);
2399 assert(from || to);
2400
2401 sd_id128_to_string(boot_id, t + 9);
2402
2403 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2404 if (r <= 0)
2405 return r;
2406
2407 if (le64toh(o->data.n_entries) <= 0)
2408 return 0;
2409
2410 if (from) {
2411 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2412 if (r < 0)
2413 return r;
2414
2415 *from = le64toh(o->entry.monotonic);
2416 }
2417
2418 if (to) {
2419 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2420 if (r < 0)
2421 return r;
2422
2423 r = generic_array_get_plus_one(f,
2424 le64toh(o->data.entry_offset),
2425 le64toh(o->data.entry_array_offset),
2426 le64toh(o->data.n_entries)-1,
2427 &o, NULL);
2428 if (r <= 0)
2429 return r;
2430
2431 *to = le64toh(o->entry.monotonic);
2432 }
2433
2434 return 1;
2435}
dca6219e
LP
2436
2437bool journal_file_rotate_suggested(JournalFile *f) {
2438 assert(f);
2439
2440 /* If we gained new header fields we gained new features,
2441 * hence suggest a rotation */
361f9cbc
LP
2442 if (le64toh(f->header->header_size) < sizeof(Header)) {
2443 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
dca6219e 2444 return true;
361f9cbc 2445 }
dca6219e
LP
2446
2447 /* Let's check if the hash tables grew over a certain fill
2448 * level (75%, borrowing this value from Java's hash table
2449 * implementation), and if so suggest a rotation. To calculate
2450 * the fill level we need the n_data field, which only exists
2451 * in newer versions. */
2452
2453 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
361f9cbc
LP
2454 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2455 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2456 f->path,
2457 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2458 (unsigned long long) le64toh(f->header->n_data),
2459 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2460 (unsigned long long) (f->last_stat.st_size),
2461 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
dca6219e 2462 return true;
361f9cbc 2463 }
dca6219e
LP
2464
2465 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
361f9cbc
LP
2466 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2467 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2468 f->path,
2469 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2470 (unsigned long long) le64toh(f->header->n_fields),
2471 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
dca6219e 2472 return true;
361f9cbc 2473 }
dca6219e
LP
2474
2475 return false;
2476}