]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
unit: don't allow units to be gc'ed that still are referenced via UnitRef
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
30#include "journal-def.h"
31#include "journal-file.h"
0284adc6 32#include "journal-authenticate.h"
cec736d2 33#include "lookup3.h"
807e17f0 34#include "compress.h"
7560fffc 35#include "fsprg.h"
cec736d2 36
4a92baf3
LP
37#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
cec736d2 39
be19b7df 40#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 41
babfc091 42/* This is the minimum journal file size */
b47ffcfd 43#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
babfc091
LP
44
45/* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
49
50/* This is the upper bound if we deduce max_size from max_use */
71100051 51#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
52
53/* This is the upper bound if we deduce the keep_free value from the
54 * file system size */
55#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57/* This is the keep_free value when we can't determine the system
58 * size */
59#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
60
dca6219e
LP
61/* n_data was the first entry we added after the initial file format design */
62#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
cec736d2 63
cec736d2 64void journal_file_close(JournalFile *f) {
de190aef 65 assert(f);
cec736d2 66
feb12d3e 67#ifdef HAVE_GCRYPT
b0af6f41 68 /* Write the final tag */
c586dbf1 69 if (f->seal && f->writable)
b0af6f41 70 journal_file_append_tag(f);
feb12d3e 71#endif
b0af6f41 72
7560fffc 73 /* Sync everything to disk, before we mark the file offline */
16e9f408
LP
74 if (f->mmap && f->fd >= 0)
75 mmap_cache_close_fd(f->mmap, f->fd);
7560fffc
LP
76
77 if (f->writable && f->fd >= 0)
78 fdatasync(f->fd);
79
d384c7a8 80 if (f->header) {
cd96b3b8
LP
81 /* Mark the file offline. Don't override the archived state if it already is set */
82 if (f->writable && f->header->state == STATE_ONLINE)
d384c7a8 83 f->header->state = STATE_OFFLINE;
cec736d2 84
d384c7a8
MS
85 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
86 }
cec736d2 87
0ac38b70
LP
88 if (f->fd >= 0)
89 close_nointr_nofail(f->fd);
90
cec736d2 91 free(f->path);
807e17f0 92
16e9f408
LP
93 if (f->mmap)
94 mmap_cache_unref(f->mmap);
95
807e17f0
LP
96#ifdef HAVE_XZ
97 free(f->compress_buffer);
98#endif
99
7560fffc 100#ifdef HAVE_GCRYPT
baed47c3
LP
101 if (f->fss_file)
102 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
b7c9ae91
LP
103 else if (f->fsprg_state)
104 free(f->fsprg_state);
105
106 free(f->fsprg_seed);
7560fffc
LP
107
108 if (f->hmac)
109 gcry_md_close(f->hmac);
110#endif
111
cec736d2
LP
112 free(f);
113}
114
0ac38b70 115static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
116 Header h;
117 ssize_t k;
118 int r;
119
120 assert(f);
121
122 zero(h);
7560fffc 123 memcpy(h.signature, HEADER_SIGNATURE, 8);
23b0b2b2 124 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2 125
7560fffc
LP
126 h.incompatible_flags =
127 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
128
129 h.compatible_flags =
baed47c3 130 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
7560fffc 131
cec736d2
LP
132 r = sd_id128_randomize(&h.file_id);
133 if (r < 0)
134 return r;
135
0ac38b70
LP
136 if (template) {
137 h.seqnum_id = template->header->seqnum_id;
beec0085 138 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
0ac38b70
LP
139 } else
140 h.seqnum_id = h.file_id;
cec736d2
LP
141
142 k = pwrite(f->fd, &h, sizeof(h), 0);
143 if (k < 0)
144 return -errno;
145
146 if (k != sizeof(h))
147 return -EIO;
148
149 return 0;
150}
151
152static int journal_file_refresh_header(JournalFile *f) {
153 int r;
de190aef 154 sd_id128_t boot_id;
cec736d2
LP
155
156 assert(f);
157
158 r = sd_id128_get_machine(&f->header->machine_id);
159 if (r < 0)
160 return r;
161
de190aef 162 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
163 if (r < 0)
164 return r;
165
de190aef
LP
166 if (sd_id128_equal(boot_id, f->header->boot_id))
167 f->tail_entry_monotonic_valid = true;
168
169 f->header->boot_id = boot_id;
170
171 f->header->state = STATE_ONLINE;
b788cc23 172
7560fffc
LP
173 /* Sync the online state to disk */
174 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
175 fdatasync(f->fd);
b788cc23 176
cec736d2
LP
177 return 0;
178}
179
180static int journal_file_verify_header(JournalFile *f) {
181 assert(f);
182
7560fffc 183 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
cec736d2
LP
184 return -EBADMSG;
185
7560fffc
LP
186 /* In both read and write mode we refuse to open files with
187 * incompatible flags we don't know */
807e17f0 188#ifdef HAVE_XZ
7560fffc 189 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
807e17f0
LP
190 return -EPROTONOSUPPORT;
191#else
cec736d2
LP
192 if (f->header->incompatible_flags != 0)
193 return -EPROTONOSUPPORT;
807e17f0 194#endif
cec736d2 195
7560fffc
LP
196 /* When open for writing we refuse to open files with
197 * compatible flags, too */
198 if (f->writable) {
199#ifdef HAVE_GCRYPT
baed47c3 200 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
7560fffc
LP
201 return -EPROTONOSUPPORT;
202#else
203 if (f->header->compatible_flags != 0)
204 return -EPROTONOSUPPORT;
205#endif
206 }
207
db11ac1a
LP
208 if (f->header->state >= _STATE_MAX)
209 return -EBADMSG;
210
dca6219e
LP
211 /* The first addition was n_data, so check that we are at least this large */
212 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
23b0b2b2
LP
213 return -EBADMSG;
214
8088cbd3 215 if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
beec0085
LP
216 return -EBADMSG;
217
db11ac1a
LP
218 if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
219 return -ENODATA;
220
221 if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
222 return -ENODATA;
223
224 if (!VALID64(f->header->data_hash_table_offset) ||
225 !VALID64(f->header->field_hash_table_offset) ||
226 !VALID64(f->header->tail_object_offset) ||
227 !VALID64(f->header->entry_array_offset))
cec736d2
LP
228 return -ENODATA;
229
230 if (f->writable) {
ccdbaf91 231 uint8_t state;
cec736d2
LP
232 sd_id128_t machine_id;
233 int r;
234
235 r = sd_id128_get_machine(&machine_id);
236 if (r < 0)
237 return r;
238
239 if (!sd_id128_equal(machine_id, f->header->machine_id))
240 return -EHOSTDOWN;
241
de190aef 242 state = f->header->state;
cec736d2 243
71fa6f00
LP
244 if (state == STATE_ONLINE) {
245 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
246 return -EBUSY;
247 } else if (state == STATE_ARCHIVED)
cec736d2 248 return -ESHUTDOWN;
71fa6f00
LP
249 else if (state != STATE_OFFLINE) {
250 log_debug("Journal file %s has unknown state %u.", f->path, state);
251 return -EBUSY;
252 }
cec736d2
LP
253 }
254
8088cbd3 255 f->compress = JOURNAL_HEADER_COMPRESSED(f->header);
c586dbf1
LP
256
257 if (f->writable)
8088cbd3 258 f->seal = JOURNAL_HEADER_SEALED(f->header);
7560fffc 259
cec736d2
LP
260 return 0;
261}
262
263static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
cec736d2 264 uint64_t old_size, new_size;
fec2aa2f 265 int r;
cec736d2
LP
266
267 assert(f);
268
cec736d2 269 /* We assume that this file is not sparse, and we know that
38ac38b2 270 * for sure, since we always call posix_fallocate()
cec736d2
LP
271 * ourselves */
272
273 old_size =
23b0b2b2 274 le64toh(f->header->header_size) +
cec736d2
LP
275 le64toh(f->header->arena_size);
276
bc85bfee 277 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
278 if (new_size < le64toh(f->header->header_size))
279 new_size = le64toh(f->header->header_size);
bc85bfee
LP
280
281 if (new_size <= old_size)
cec736d2
LP
282 return 0;
283
bc85bfee
LP
284 if (f->metrics.max_size > 0 &&
285 new_size > f->metrics.max_size)
286 return -E2BIG;
cec736d2 287
bc85bfee
LP
288 if (new_size > f->metrics.min_size &&
289 f->metrics.keep_free > 0) {
cec736d2
LP
290 struct statvfs svfs;
291
292 if (fstatvfs(f->fd, &svfs) >= 0) {
293 uint64_t available;
294
295 available = svfs.f_bfree * svfs.f_bsize;
296
bc85bfee
LP
297 if (available >= f->metrics.keep_free)
298 available -= f->metrics.keep_free;
cec736d2
LP
299 else
300 available = 0;
301
302 if (new_size - old_size > available)
303 return -E2BIG;
304 }
305 }
306
bc85bfee
LP
307 /* Note that the glibc fallocate() fallback is very
308 inefficient, hence we try to minimize the allocation area
309 as we can. */
fec2aa2f
GV
310 r = posix_fallocate(f->fd, old_size, new_size - old_size);
311 if (r != 0)
312 return -r;
cec736d2
LP
313
314 if (fstat(f->fd, &f->last_stat) < 0)
315 return -errno;
316
23b0b2b2 317 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
318
319 return 0;
320}
321
fcde2389 322static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
cec736d2 323 assert(f);
cec736d2
LP
324 assert(ret);
325
2a59ea54 326 /* Avoid SIGBUS on invalid accesses */
4bbdcdb3
LP
327 if (offset + size > (uint64_t) f->last_stat.st_size) {
328 /* Hmm, out of range? Let's refresh the fstat() data
329 * first, before we trust that check. */
330
331 if (fstat(f->fd, &f->last_stat) < 0 ||
332 offset + size > (uint64_t) f->last_stat.st_size)
333 return -EADDRNOTAVAIL;
334 }
335
fcde2389 336 return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
cec736d2
LP
337}
338
16e9f408
LP
339static uint64_t minimum_header_size(Object *o) {
340
341 static uint64_t table[] = {
342 [OBJECT_DATA] = sizeof(DataObject),
343 [OBJECT_FIELD] = sizeof(FieldObject),
344 [OBJECT_ENTRY] = sizeof(EntryObject),
345 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
346 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
347 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
348 [OBJECT_TAG] = sizeof(TagObject),
349 };
350
351 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
352 return sizeof(ObjectHeader);
353
354 return table[o->object.type];
355}
356
de190aef 357int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
358 int r;
359 void *t;
360 Object *o;
361 uint64_t s;
16e9f408 362 unsigned context;
cec736d2
LP
363
364 assert(f);
365 assert(ret);
366
db11ac1a
LP
367 /* Objects may only be located at multiple of 64 bit */
368 if (!VALID64(offset))
369 return -EFAULT;
370
16e9f408
LP
371 /* One context for each type, plus one catch-all for the rest */
372 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
373
fcde2389 374 r = journal_file_move_to(f, context, false, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
375 if (r < 0)
376 return r;
377
378 o = (Object*) t;
379 s = le64toh(o->object.size);
380
381 if (s < sizeof(ObjectHeader))
382 return -EBADMSG;
383
16e9f408
LP
384 if (o->object.type <= OBJECT_UNUSED)
385 return -EBADMSG;
386
387 if (s < minimum_header_size(o))
388 return -EBADMSG;
389
de190aef 390 if (type >= 0 && o->object.type != type)
cec736d2
LP
391 return -EBADMSG;
392
393 if (s > sizeof(ObjectHeader)) {
fcde2389 394 r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
cec736d2
LP
395 if (r < 0)
396 return r;
397
398 o = (Object*) t;
399 }
400
cec736d2
LP
401 *ret = o;
402 return 0;
403}
404
d98cc1f2 405static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
406 uint64_t r;
407
408 assert(f);
409
beec0085 410 r = le64toh(f->header->tail_entry_seqnum) + 1;
c2373f84
LP
411
412 if (seqnum) {
de190aef 413 /* If an external seqnum counter was passed, we update
c2373f84
LP
414 * both the local and the external one, and set it to
415 * the maximum of both */
416
417 if (*seqnum + 1 > r)
418 r = *seqnum + 1;
419
420 *seqnum = r;
421 }
422
beec0085 423 f->header->tail_entry_seqnum = htole64(r);
cec736d2 424
beec0085
LP
425 if (f->header->head_entry_seqnum == 0)
426 f->header->head_entry_seqnum = htole64(r);
de190aef 427
cec736d2
LP
428 return r;
429}
430
0284adc6 431int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
432 int r;
433 uint64_t p;
434 Object *tail, *o;
435 void *t;
436
437 assert(f);
16e9f408 438 assert(type > 0 && type < _OBJECT_TYPE_MAX);
cec736d2
LP
439 assert(size >= sizeof(ObjectHeader));
440 assert(offset);
441 assert(ret);
442
443 p = le64toh(f->header->tail_object_offset);
cec736d2 444 if (p == 0)
23b0b2b2 445 p = le64toh(f->header->header_size);
cec736d2 446 else {
de190aef 447 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
448 if (r < 0)
449 return r;
450
451 p += ALIGN64(le64toh(tail->object.size));
452 }
453
454 r = journal_file_allocate(f, p, size);
455 if (r < 0)
456 return r;
457
fcde2389 458 r = journal_file_move_to(f, type, false, p, size, &t);
cec736d2
LP
459 if (r < 0)
460 return r;
461
462 o = (Object*) t;
463
464 zero(o->object);
de190aef 465 o->object.type = type;
cec736d2
LP
466 o->object.size = htole64(size);
467
468 f->header->tail_object_offset = htole64(p);
cec736d2
LP
469 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
470
471 *ret = o;
472 *offset = p;
473
474 return 0;
475}
476
de190aef 477static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
478 uint64_t s, p;
479 Object *o;
480 int r;
481
482 assert(f);
483
dfabe643 484 /* We estimate that we need 1 hash table entry per 768 of
4a92baf3
LP
485 journal file and we want to make sure we never get beyond
486 75% fill level. Calculate the hash table size for the
487 maximum file size based on these metrics. */
488
dfabe643 489 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
4a92baf3
LP
490 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
491 s = DEFAULT_DATA_HASH_TABLE_SIZE;
492
2b43f939 493 log_debug("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
4a92baf3 494
de190aef
LP
495 r = journal_file_append_object(f,
496 OBJECT_DATA_HASH_TABLE,
497 offsetof(Object, hash_table.items) + s,
498 &o, &p);
cec736d2
LP
499 if (r < 0)
500 return r;
501
de190aef 502 memset(o->hash_table.items, 0, s);
cec736d2 503
de190aef
LP
504 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
505 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
506
507 return 0;
508}
509
de190aef 510static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
511 uint64_t s, p;
512 Object *o;
513 int r;
514
515 assert(f);
516
de190aef
LP
517 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
518 r = journal_file_append_object(f,
519 OBJECT_FIELD_HASH_TABLE,
520 offsetof(Object, hash_table.items) + s,
521 &o, &p);
cec736d2
LP
522 if (r < 0)
523 return r;
524
de190aef 525 memset(o->hash_table.items, 0, s);
cec736d2 526
de190aef
LP
527 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
528 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
529
530 return 0;
531}
532
de190aef 533static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
534 uint64_t s, p;
535 void *t;
536 int r;
537
538 assert(f);
539
de190aef
LP
540 p = le64toh(f->header->data_hash_table_offset);
541 s = le64toh(f->header->data_hash_table_size);
cec736d2 542
de190aef 543 r = journal_file_move_to(f,
16e9f408 544 OBJECT_DATA_HASH_TABLE,
fcde2389 545 true,
de190aef
LP
546 p, s,
547 &t);
cec736d2
LP
548 if (r < 0)
549 return r;
550
de190aef 551 f->data_hash_table = t;
cec736d2
LP
552 return 0;
553}
554
de190aef 555static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
556 uint64_t s, p;
557 void *t;
558 int r;
559
560 assert(f);
561
de190aef
LP
562 p = le64toh(f->header->field_hash_table_offset);
563 s = le64toh(f->header->field_hash_table_size);
cec736d2 564
de190aef 565 r = journal_file_move_to(f,
16e9f408 566 OBJECT_FIELD_HASH_TABLE,
fcde2389 567 true,
de190aef
LP
568 p, s,
569 &t);
cec736d2
LP
570 if (r < 0)
571 return r;
572
de190aef 573 f->field_hash_table = t;
cec736d2
LP
574 return 0;
575}
576
de190aef
LP
577static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
578 uint64_t p, h;
cec736d2
LP
579 int r;
580
581 assert(f);
582 assert(o);
583 assert(offset > 0);
de190aef 584 assert(o->object.type == OBJECT_DATA);
cec736d2 585
48496df6
LP
586 /* This might alter the window we are looking at */
587
de190aef
LP
588 o->data.next_hash_offset = o->data.next_field_offset = 0;
589 o->data.entry_offset = o->data.entry_array_offset = 0;
590 o->data.n_entries = 0;
cec736d2 591
de190aef 592 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
8db4213e 593 p = le64toh(f->data_hash_table[h].tail_hash_offset);
cec736d2
LP
594 if (p == 0) {
595 /* Only entry in the hash table is easy */
de190aef 596 f->data_hash_table[h].head_hash_offset = htole64(offset);
cec736d2 597 } else {
48496df6
LP
598 /* Move back to the previous data object, to patch in
599 * pointer */
cec736d2 600
de190aef 601 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
602 if (r < 0)
603 return r;
604
de190aef 605 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
606 }
607
de190aef 608 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2 609
dca6219e
LP
610 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
611 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
612
cec736d2
LP
613 return 0;
614}
615
de190aef
LP
616int journal_file_find_data_object_with_hash(
617 JournalFile *f,
618 const void *data, uint64_t size, uint64_t hash,
619 Object **ret, uint64_t *offset) {
48496df6 620
de190aef 621 uint64_t p, osize, h;
cec736d2
LP
622 int r;
623
624 assert(f);
625 assert(data || size == 0);
626
627 osize = offsetof(Object, data.payload) + size;
628
bc85bfee
LP
629 if (f->header->data_hash_table_size == 0)
630 return -EBADMSG;
631
de190aef
LP
632 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
633 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 634
de190aef
LP
635 while (p > 0) {
636 Object *o;
cec736d2 637
de190aef 638 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
639 if (r < 0)
640 return r;
641
807e17f0 642 if (le64toh(o->data.hash) != hash)
85a131e8 643 goto next;
807e17f0
LP
644
645 if (o->object.flags & OBJECT_COMPRESSED) {
646#ifdef HAVE_XZ
b785c858 647 uint64_t l, rsize;
cec736d2 648
807e17f0
LP
649 l = le64toh(o->object.size);
650 if (l <= offsetof(Object, data.payload))
cec736d2
LP
651 return -EBADMSG;
652
807e17f0
LP
653 l -= offsetof(Object, data.payload);
654
655 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
656 return -EBADMSG;
657
b785c858 658 if (rsize == size &&
807e17f0
LP
659 memcmp(f->compress_buffer, data, size) == 0) {
660
661 if (ret)
662 *ret = o;
663
664 if (offset)
665 *offset = p;
666
667 return 1;
668 }
669#else
670 return -EPROTONOSUPPORT;
671#endif
672
673 } else if (le64toh(o->object.size) == osize &&
674 memcmp(o->data.payload, data, size) == 0) {
675
cec736d2
LP
676 if (ret)
677 *ret = o;
678
679 if (offset)
680 *offset = p;
681
de190aef 682 return 1;
cec736d2
LP
683 }
684
85a131e8 685 next:
cec736d2
LP
686 p = le64toh(o->data.next_hash_offset);
687 }
688
de190aef
LP
689 return 0;
690}
691
692int journal_file_find_data_object(
693 JournalFile *f,
694 const void *data, uint64_t size,
695 Object **ret, uint64_t *offset) {
696
697 uint64_t hash;
698
699 assert(f);
700 assert(data || size == 0);
701
702 hash = hash64(data, size);
703
704 return journal_file_find_data_object_with_hash(f,
705 data, size, hash,
706 ret, offset);
707}
708
48496df6
LP
709static int journal_file_append_data(
710 JournalFile *f,
711 const void *data, uint64_t size,
712 Object **ret, uint64_t *offset) {
713
de190aef
LP
714 uint64_t hash, p;
715 uint64_t osize;
716 Object *o;
717 int r;
807e17f0 718 bool compressed = false;
de190aef
LP
719
720 assert(f);
721 assert(data || size == 0);
722
723 hash = hash64(data, size);
724
725 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
726 if (r < 0)
727 return r;
728 else if (r > 0) {
729
730 if (ret)
731 *ret = o;
732
733 if (offset)
734 *offset = p;
735
736 return 0;
737 }
738
739 osize = offsetof(Object, data.payload) + size;
740 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
741 if (r < 0)
742 return r;
743
cec736d2 744 o->data.hash = htole64(hash);
807e17f0
LP
745
746#ifdef HAVE_XZ
747 if (f->compress &&
748 size >= COMPRESSION_SIZE_THRESHOLD) {
749 uint64_t rsize;
750
751 compressed = compress_blob(data, size, o->data.payload, &rsize);
752
753 if (compressed) {
754 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
755 o->object.flags |= OBJECT_COMPRESSED;
756
807e17f0
LP
757 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
758 }
759 }
760#endif
761
64825d3c 762 if (!compressed && size > 0)
807e17f0 763 memcpy(o->data.payload, data, size);
cec736d2 764
de190aef 765 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
766 if (r < 0)
767 return r;
768
feb12d3e 769#ifdef HAVE_GCRYPT
b0af6f41
LP
770 r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
771 if (r < 0)
772 return r;
feb12d3e 773#endif
b0af6f41 774
48496df6
LP
775 /* The linking might have altered the window, so let's
776 * refresh our pointer */
777 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
778 if (r < 0)
779 return r;
780
cec736d2
LP
781 if (ret)
782 *ret = o;
783
784 if (offset)
de190aef 785 *offset = p;
cec736d2
LP
786
787 return 0;
788}
789
790uint64_t journal_file_entry_n_items(Object *o) {
791 assert(o);
7be3aa17 792 assert(o->object.type == OBJECT_ENTRY);
cec736d2
LP
793
794 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
795}
796
0284adc6 797uint64_t journal_file_entry_array_n_items(Object *o) {
de190aef 798 assert(o);
7be3aa17 799 assert(o->object.type == OBJECT_ENTRY_ARRAY);
de190aef
LP
800
801 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
802}
803
fb9a24b6
LP
804uint64_t journal_file_hash_table_n_items(Object *o) {
805 assert(o);
806 assert(o->object.type == OBJECT_DATA_HASH_TABLE ||
807 o->object.type == OBJECT_FIELD_HASH_TABLE);
808
809 return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
810}
811
de190aef 812static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
813 le64_t *first,
814 le64_t *idx,
de190aef 815 uint64_t p) {
cec736d2 816 int r;
de190aef
LP
817 uint64_t n = 0, ap = 0, q, i, a, hidx;
818 Object *o;
819
cec736d2 820 assert(f);
de190aef
LP
821 assert(first);
822 assert(idx);
823 assert(p > 0);
cec736d2 824
de190aef
LP
825 a = le64toh(*first);
826 i = hidx = le64toh(*idx);
827 while (a > 0) {
828
829 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
830 if (r < 0)
831 return r;
cec736d2 832
de190aef
LP
833 n = journal_file_entry_array_n_items(o);
834 if (i < n) {
835 o->entry_array.items[i] = htole64(p);
836 *idx = htole64(hidx + 1);
837 return 0;
838 }
cec736d2 839
de190aef
LP
840 i -= n;
841 ap = a;
842 a = le64toh(o->entry_array.next_entry_array_offset);
843 }
844
845 if (hidx > n)
846 n = (hidx+1) * 2;
847 else
848 n = n * 2;
849
850 if (n < 4)
851 n = 4;
852
853 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
854 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
855 &o, &q);
cec736d2
LP
856 if (r < 0)
857 return r;
858
feb12d3e 859#ifdef HAVE_GCRYPT
b0af6f41
LP
860 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
861 if (r < 0)
862 return r;
feb12d3e 863#endif
b0af6f41 864
de190aef 865 o->entry_array.items[i] = htole64(p);
cec736d2 866
de190aef 867 if (ap == 0)
7be3aa17 868 *first = htole64(q);
cec736d2 869 else {
de190aef 870 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
871 if (r < 0)
872 return r;
873
de190aef
LP
874 o->entry_array.next_entry_array_offset = htole64(q);
875 }
cec736d2 876
2dee23eb
LP
877 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
878 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
879
de190aef
LP
880 *idx = htole64(hidx + 1);
881
882 return 0;
883}
cec736d2 884
de190aef 885static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
886 le64_t *extra,
887 le64_t *first,
888 le64_t *idx,
de190aef
LP
889 uint64_t p) {
890
891 int r;
892
893 assert(f);
894 assert(extra);
895 assert(first);
896 assert(idx);
897 assert(p > 0);
898
899 if (*idx == 0)
900 *extra = htole64(p);
901 else {
4fd052ae 902 le64_t i;
de190aef 903
7be3aa17 904 i = htole64(le64toh(*idx) - 1);
de190aef
LP
905 r = link_entry_into_array(f, first, &i, p);
906 if (r < 0)
907 return r;
cec736d2
LP
908 }
909
de190aef
LP
910 *idx = htole64(le64toh(*idx) + 1);
911 return 0;
912}
913
914static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
915 uint64_t p;
916 int r;
917 assert(f);
918 assert(o);
919 assert(offset > 0);
920
921 p = le64toh(o->entry.items[i].object_offset);
922 if (p == 0)
923 return -EINVAL;
924
925 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
926 if (r < 0)
927 return r;
928
de190aef
LP
929 return link_entry_into_array_plus_one(f,
930 &o->data.entry_offset,
931 &o->data.entry_array_offset,
932 &o->data.n_entries,
933 offset);
cec736d2
LP
934}
935
936static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 937 uint64_t n, i;
cec736d2
LP
938 int r;
939
940 assert(f);
941 assert(o);
942 assert(offset > 0);
de190aef 943 assert(o->object.type == OBJECT_ENTRY);
cec736d2 944
b788cc23
LP
945 __sync_synchronize();
946
cec736d2 947 /* Link up the entry itself */
de190aef
LP
948 r = link_entry_into_array(f,
949 &f->header->entry_array_offset,
950 &f->header->n_entries,
951 offset);
952 if (r < 0)
953 return r;
cec736d2 954
aaf53376 955 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
cec736d2 956
de190aef 957 if (f->header->head_entry_realtime == 0)
0ac38b70 958 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 959
0ac38b70 960 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
961 f->header->tail_entry_monotonic = o->entry.monotonic;
962
963 f->tail_entry_monotonic_valid = true;
cec736d2
LP
964
965 /* Link up the items */
966 n = journal_file_entry_n_items(o);
967 for (i = 0; i < n; i++) {
968 r = journal_file_link_entry_item(f, o, offset, i);
969 if (r < 0)
970 return r;
971 }
972
cec736d2
LP
973 return 0;
974}
975
976static int journal_file_append_entry_internal(
977 JournalFile *f,
978 const dual_timestamp *ts,
979 uint64_t xor_hash,
980 const EntryItem items[], unsigned n_items,
de190aef 981 uint64_t *seqnum,
cec736d2
LP
982 Object **ret, uint64_t *offset) {
983 uint64_t np;
984 uint64_t osize;
985 Object *o;
986 int r;
987
988 assert(f);
989 assert(items || n_items == 0);
de190aef 990 assert(ts);
cec736d2
LP
991
992 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
993
de190aef 994 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
995 if (r < 0)
996 return r;
997
d98cc1f2 998 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
cec736d2 999 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
1000 o->entry.realtime = htole64(ts->realtime);
1001 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
1002 o->entry.xor_hash = htole64(xor_hash);
1003 o->entry.boot_id = f->header->boot_id;
1004
feb12d3e 1005#ifdef HAVE_GCRYPT
b0af6f41
LP
1006 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
1007 if (r < 0)
1008 return r;
feb12d3e 1009#endif
b0af6f41 1010
cec736d2
LP
1011 r = journal_file_link_entry(f, o, np);
1012 if (r < 0)
1013 return r;
1014
1015 if (ret)
1016 *ret = o;
1017
1018 if (offset)
1019 *offset = np;
1020
1021 return 0;
1022}
1023
cf244689 1024void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
1025 assert(f);
1026
1027 /* inotify() does not receive IN_MODIFY events from file
1028 * accesses done via mmap(). After each access we hence
1029 * trigger IN_MODIFY by truncating the journal file to its
1030 * current size which triggers IN_MODIFY. */
1031
bc85bfee
LP
1032 __sync_synchronize();
1033
50f20cfd
LP
1034 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1035 log_error("Failed to to truncate file to its own size: %m");
1036}
1037
de190aef 1038int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1039 unsigned i;
1040 EntryItem *items;
1041 int r;
1042 uint64_t xor_hash = 0;
de190aef 1043 struct dual_timestamp _ts;
cec736d2
LP
1044
1045 assert(f);
1046 assert(iovec || n_iovec == 0);
1047
de190aef
LP
1048 if (!f->writable)
1049 return -EPERM;
1050
1051 if (!ts) {
1052 dual_timestamp_get(&_ts);
1053 ts = &_ts;
1054 }
1055
1056 if (f->tail_entry_monotonic_valid &&
1057 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1058 return -EINVAL;
1059
feb12d3e 1060#ifdef HAVE_GCRYPT
7560fffc
LP
1061 r = journal_file_maybe_append_tag(f, ts->realtime);
1062 if (r < 0)
1063 return r;
feb12d3e 1064#endif
7560fffc 1065
64825d3c
LP
1066 /* alloca() can't take 0, hence let's allocate at least one */
1067 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
cec736d2
LP
1068
1069 for (i = 0; i < n_iovec; i++) {
1070 uint64_t p;
1071 Object *o;
1072
1073 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1074 if (r < 0)
cf244689 1075 return r;
cec736d2
LP
1076
1077 xor_hash ^= le64toh(o->data.hash);
1078 items[i].object_offset = htole64(p);
de7b95cd 1079 items[i].hash = o->data.hash;
cec736d2
LP
1080 }
1081
de190aef 1082 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1083
50f20cfd
LP
1084 journal_file_post_change(f);
1085
cec736d2
LP
1086 return r;
1087}
1088
de190aef
LP
1089static int generic_array_get(JournalFile *f,
1090 uint64_t first,
1091 uint64_t i,
1092 Object **ret, uint64_t *offset) {
1093
cec736d2 1094 Object *o;
6c8a39b8 1095 uint64_t p = 0, a;
cec736d2
LP
1096 int r;
1097
1098 assert(f);
1099
de190aef
LP
1100 a = first;
1101 while (a > 0) {
1102 uint64_t n;
cec736d2 1103
de190aef
LP
1104 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1105 if (r < 0)
1106 return r;
cec736d2 1107
de190aef
LP
1108 n = journal_file_entry_array_n_items(o);
1109 if (i < n) {
1110 p = le64toh(o->entry_array.items[i]);
1111 break;
cec736d2
LP
1112 }
1113
de190aef
LP
1114 i -= n;
1115 a = le64toh(o->entry_array.next_entry_array_offset);
1116 }
1117
1118 if (a <= 0 || p <= 0)
1119 return 0;
1120
1121 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1122 if (r < 0)
1123 return r;
1124
1125 if (ret)
1126 *ret = o;
1127
1128 if (offset)
1129 *offset = p;
1130
1131 return 1;
1132}
1133
1134static int generic_array_get_plus_one(JournalFile *f,
1135 uint64_t extra,
1136 uint64_t first,
1137 uint64_t i,
1138 Object **ret, uint64_t *offset) {
1139
1140 Object *o;
1141
1142 assert(f);
1143
1144 if (i == 0) {
1145 int r;
1146
1147 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1148 if (r < 0)
1149 return r;
1150
de190aef
LP
1151 if (ret)
1152 *ret = o;
cec736d2 1153
de190aef
LP
1154 if (offset)
1155 *offset = extra;
cec736d2 1156
de190aef 1157 return 1;
cec736d2
LP
1158 }
1159
de190aef
LP
1160 return generic_array_get(f, first, i-1, ret, offset);
1161}
cec736d2 1162
de190aef
LP
1163enum {
1164 TEST_FOUND,
1165 TEST_LEFT,
1166 TEST_RIGHT
1167};
cec736d2 1168
de190aef
LP
1169static int generic_array_bisect(JournalFile *f,
1170 uint64_t first,
1171 uint64_t n,
1172 uint64_t needle,
1173 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1174 direction_t direction,
1175 Object **ret,
1176 uint64_t *offset,
1177 uint64_t *idx) {
1178
1179 uint64_t a, p, t = 0, i = 0, last_p = 0;
1180 bool subtract_one = false;
1181 Object *o, *array = NULL;
1182 int r;
cec736d2 1183
de190aef
LP
1184 assert(f);
1185 assert(test_object);
cec736d2 1186
de190aef
LP
1187 a = first;
1188 while (a > 0) {
1189 uint64_t left, right, k, lp;
1190
1191 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1192 if (r < 0)
1193 return r;
1194
de190aef
LP
1195 k = journal_file_entry_array_n_items(array);
1196 right = MIN(k, n);
1197 if (right <= 0)
1198 return 0;
cec736d2 1199
de190aef
LP
1200 i = right - 1;
1201 lp = p = le64toh(array->entry_array.items[i]);
1202 if (p <= 0)
1203 return -EBADMSG;
cec736d2 1204
de190aef
LP
1205 r = test_object(f, p, needle);
1206 if (r < 0)
1207 return r;
cec736d2 1208
de190aef
LP
1209 if (r == TEST_FOUND)
1210 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1211
1212 if (r == TEST_RIGHT) {
1213 left = 0;
1214 right -= 1;
1215 for (;;) {
1216 if (left == right) {
1217 if (direction == DIRECTION_UP)
1218 subtract_one = true;
1219
1220 i = left;
1221 goto found;
1222 }
1223
1224 assert(left < right);
1225
1226 i = (left + right) / 2;
1227 p = le64toh(array->entry_array.items[i]);
1228 if (p <= 0)
1229 return -EBADMSG;
1230
1231 r = test_object(f, p, needle);
1232 if (r < 0)
1233 return r;
cec736d2 1234
de190aef
LP
1235 if (r == TEST_FOUND)
1236 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1237
1238 if (r == TEST_RIGHT)
1239 right = i;
1240 else
1241 left = i + 1;
1242 }
1243 }
1244
cbdca852
LP
1245 if (k > n) {
1246 if (direction == DIRECTION_UP) {
1247 i = n;
1248 subtract_one = true;
1249 goto found;
1250 }
1251
cec736d2 1252 return 0;
cbdca852 1253 }
cec736d2 1254
de190aef
LP
1255 last_p = lp;
1256
1257 n -= k;
1258 t += k;
1259 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1260 }
1261
1262 return 0;
de190aef
LP
1263
1264found:
1265 if (subtract_one && t == 0 && i == 0)
1266 return 0;
1267
1268 if (subtract_one && i == 0)
1269 p = last_p;
1270 else if (subtract_one)
1271 p = le64toh(array->entry_array.items[i-1]);
1272 else
1273 p = le64toh(array->entry_array.items[i]);
1274
1275 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1276 if (r < 0)
1277 return r;
1278
1279 if (ret)
1280 *ret = o;
1281
1282 if (offset)
1283 *offset = p;
1284
1285 if (idx)
cbdca852 1286 *idx = t + i + (subtract_one ? -1 : 0);
de190aef
LP
1287
1288 return 1;
cec736d2
LP
1289}
1290
de190aef
LP
1291static int generic_array_bisect_plus_one(JournalFile *f,
1292 uint64_t extra,
1293 uint64_t first,
1294 uint64_t n,
1295 uint64_t needle,
1296 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1297 direction_t direction,
1298 Object **ret,
1299 uint64_t *offset,
1300 uint64_t *idx) {
1301
cec736d2 1302 int r;
cbdca852
LP
1303 bool step_back = false;
1304 Object *o;
cec736d2
LP
1305
1306 assert(f);
de190aef 1307 assert(test_object);
cec736d2 1308
de190aef
LP
1309 if (n <= 0)
1310 return 0;
cec736d2 1311
de190aef
LP
1312 /* This bisects the array in object 'first', but first checks
1313 * an extra */
de190aef
LP
1314 r = test_object(f, extra, needle);
1315 if (r < 0)
1316 return r;
a536e261
LP
1317
1318 if (r == TEST_FOUND)
1319 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1320
cbdca852
LP
1321 /* if we are looking with DIRECTION_UP then we need to first
1322 see if in the actual array there is a matching entry, and
1323 return the last one of that. But if there isn't any we need
1324 to return this one. Hence remember this, and return it
1325 below. */
1326 if (r == TEST_LEFT)
1327 step_back = direction == DIRECTION_UP;
de190aef 1328
cbdca852
LP
1329 if (r == TEST_RIGHT) {
1330 if (direction == DIRECTION_DOWN)
1331 goto found;
1332 else
1333 return 0;
a536e261 1334 }
cec736d2 1335
de190aef
LP
1336 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1337
cbdca852
LP
1338 if (r == 0 && step_back)
1339 goto found;
1340
ecf68b1d 1341 if (r > 0 && idx)
de190aef
LP
1342 (*idx) ++;
1343
1344 return r;
cbdca852
LP
1345
1346found:
1347 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1348 if (r < 0)
1349 return r;
1350
1351 if (ret)
1352 *ret = o;
1353
1354 if (offset)
1355 *offset = extra;
1356
1357 if (idx)
1358 *idx = 0;
1359
1360 return 1;
1361}
1362
1363static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1364 assert(f);
1365 assert(p > 0);
1366
1367 if (p == needle)
1368 return TEST_FOUND;
1369 else if (p < needle)
1370 return TEST_LEFT;
1371 else
1372 return TEST_RIGHT;
1373}
1374
1375int journal_file_move_to_entry_by_offset(
1376 JournalFile *f,
1377 uint64_t p,
1378 direction_t direction,
1379 Object **ret,
1380 uint64_t *offset) {
1381
1382 return generic_array_bisect(f,
1383 le64toh(f->header->entry_array_offset),
1384 le64toh(f->header->n_entries),
1385 p,
1386 test_object_offset,
1387 direction,
1388 ret, offset, NULL);
de190aef
LP
1389}
1390
cbdca852 1391
de190aef
LP
1392static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1393 Object *o;
1394 int r;
1395
1396 assert(f);
1397 assert(p > 0);
1398
1399 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1400 if (r < 0)
1401 return r;
1402
de190aef
LP
1403 if (le64toh(o->entry.seqnum) == needle)
1404 return TEST_FOUND;
1405 else if (le64toh(o->entry.seqnum) < needle)
1406 return TEST_LEFT;
1407 else
1408 return TEST_RIGHT;
1409}
cec736d2 1410
de190aef
LP
1411int journal_file_move_to_entry_by_seqnum(
1412 JournalFile *f,
1413 uint64_t seqnum,
1414 direction_t direction,
1415 Object **ret,
1416 uint64_t *offset) {
1417
1418 return generic_array_bisect(f,
1419 le64toh(f->header->entry_array_offset),
1420 le64toh(f->header->n_entries),
1421 seqnum,
1422 test_object_seqnum,
1423 direction,
1424 ret, offset, NULL);
1425}
cec736d2 1426
de190aef
LP
1427static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1428 Object *o;
1429 int r;
1430
1431 assert(f);
1432 assert(p > 0);
1433
1434 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1435 if (r < 0)
1436 return r;
1437
1438 if (le64toh(o->entry.realtime) == needle)
1439 return TEST_FOUND;
1440 else if (le64toh(o->entry.realtime) < needle)
1441 return TEST_LEFT;
1442 else
1443 return TEST_RIGHT;
cec736d2
LP
1444}
1445
de190aef
LP
1446int journal_file_move_to_entry_by_realtime(
1447 JournalFile *f,
1448 uint64_t realtime,
1449 direction_t direction,
1450 Object **ret,
1451 uint64_t *offset) {
1452
1453 return generic_array_bisect(f,
1454 le64toh(f->header->entry_array_offset),
1455 le64toh(f->header->n_entries),
1456 realtime,
1457 test_object_realtime,
1458 direction,
1459 ret, offset, NULL);
1460}
1461
1462static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1463 Object *o;
1464 int r;
1465
1466 assert(f);
1467 assert(p > 0);
1468
1469 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1470 if (r < 0)
1471 return r;
1472
1473 if (le64toh(o->entry.monotonic) == needle)
1474 return TEST_FOUND;
1475 else if (le64toh(o->entry.monotonic) < needle)
1476 return TEST_LEFT;
1477 else
1478 return TEST_RIGHT;
1479}
1480
1481int journal_file_move_to_entry_by_monotonic(
1482 JournalFile *f,
1483 sd_id128_t boot_id,
1484 uint64_t monotonic,
1485 direction_t direction,
1486 Object **ret,
1487 uint64_t *offset) {
1488
10b6f904 1489 char t[9+32+1] = "_BOOT_ID=";
de190aef
LP
1490 Object *o;
1491 int r;
1492
cbdca852 1493 assert(f);
de190aef 1494
cbdca852 1495 sd_id128_to_string(boot_id, t + 9);
de190aef
LP
1496 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1497 if (r < 0)
1498 return r;
cbdca852 1499 if (r == 0)
de190aef
LP
1500 return -ENOENT;
1501
1502 return generic_array_bisect_plus_one(f,
1503 le64toh(o->data.entry_offset),
1504 le64toh(o->data.entry_array_offset),
1505 le64toh(o->data.n_entries),
1506 monotonic,
1507 test_object_monotonic,
1508 direction,
1509 ret, offset, NULL);
1510}
1511
de190aef
LP
1512int journal_file_next_entry(
1513 JournalFile *f,
1514 Object *o, uint64_t p,
1515 direction_t direction,
1516 Object **ret, uint64_t *offset) {
1517
1518 uint64_t i, n;
cec736d2
LP
1519 int r;
1520
1521 assert(f);
de190aef
LP
1522 assert(p > 0 || !o);
1523
1524 n = le64toh(f->header->n_entries);
1525 if (n <= 0)
1526 return 0;
cec736d2
LP
1527
1528 if (!o)
de190aef 1529 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1530 else {
de190aef 1531 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1532 return -EINVAL;
1533
de190aef
LP
1534 r = generic_array_bisect(f,
1535 le64toh(f->header->entry_array_offset),
1536 le64toh(f->header->n_entries),
1537 p,
1538 test_object_offset,
1539 DIRECTION_DOWN,
1540 NULL, NULL,
1541 &i);
1542 if (r <= 0)
1543 return r;
1544
1545 if (direction == DIRECTION_DOWN) {
1546 if (i >= n - 1)
1547 return 0;
1548
1549 i++;
1550 } else {
1551 if (i <= 0)
1552 return 0;
1553
1554 i--;
1555 }
cec736d2
LP
1556 }
1557
de190aef
LP
1558 /* And jump to it */
1559 return generic_array_get(f,
1560 le64toh(f->header->entry_array_offset),
1561 i,
1562 ret, offset);
1563}
cec736d2 1564
de190aef
LP
1565int journal_file_skip_entry(
1566 JournalFile *f,
1567 Object *o, uint64_t p,
1568 int64_t skip,
1569 Object **ret, uint64_t *offset) {
1570
1571 uint64_t i, n;
1572 int r;
1573
1574 assert(f);
1575 assert(o);
1576 assert(p > 0);
1577
1578 if (o->object.type != OBJECT_ENTRY)
1579 return -EINVAL;
1580
1581 r = generic_array_bisect(f,
1582 le64toh(f->header->entry_array_offset),
1583 le64toh(f->header->n_entries),
1584 p,
1585 test_object_offset,
1586 DIRECTION_DOWN,
1587 NULL, NULL,
1588 &i);
1589 if (r <= 0)
cec736d2
LP
1590 return r;
1591
de190aef
LP
1592 /* Calculate new index */
1593 if (skip < 0) {
1594 if ((uint64_t) -skip >= i)
1595 i = 0;
1596 else
1597 i = i - (uint64_t) -skip;
1598 } else
1599 i += (uint64_t) skip;
cec736d2 1600
de190aef
LP
1601 n = le64toh(f->header->n_entries);
1602 if (n <= 0)
1603 return -EBADMSG;
cec736d2 1604
de190aef
LP
1605 if (i >= n)
1606 i = n-1;
1607
1608 return generic_array_get(f,
1609 le64toh(f->header->entry_array_offset),
1610 i,
1611 ret, offset);
cec736d2
LP
1612}
1613
de190aef
LP
1614int journal_file_next_entry_for_data(
1615 JournalFile *f,
1616 Object *o, uint64_t p,
1617 uint64_t data_offset,
1618 direction_t direction,
1619 Object **ret, uint64_t *offset) {
1620
1621 uint64_t n, i;
cec736d2 1622 int r;
de190aef 1623 Object *d;
cec736d2
LP
1624
1625 assert(f);
de190aef 1626 assert(p > 0 || !o);
cec736d2 1627
de190aef 1628 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 1629 if (r < 0)
de190aef 1630 return r;
cec736d2 1631
de190aef
LP
1632 n = le64toh(d->data.n_entries);
1633 if (n <= 0)
1634 return n;
cec736d2 1635
de190aef
LP
1636 if (!o)
1637 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1638 else {
1639 if (o->object.type != OBJECT_ENTRY)
1640 return -EINVAL;
cec736d2 1641
de190aef
LP
1642 r = generic_array_bisect_plus_one(f,
1643 le64toh(d->data.entry_offset),
1644 le64toh(d->data.entry_array_offset),
1645 le64toh(d->data.n_entries),
1646 p,
1647 test_object_offset,
1648 DIRECTION_DOWN,
1649 NULL, NULL,
1650 &i);
1651
1652 if (r <= 0)
cec736d2
LP
1653 return r;
1654
de190aef
LP
1655 if (direction == DIRECTION_DOWN) {
1656 if (i >= n - 1)
1657 return 0;
cec736d2 1658
de190aef
LP
1659 i++;
1660 } else {
1661 if (i <= 0)
1662 return 0;
cec736d2 1663
de190aef
LP
1664 i--;
1665 }
cec736d2 1666
de190aef 1667 }
cec736d2 1668
de190aef
LP
1669 return generic_array_get_plus_one(f,
1670 le64toh(d->data.entry_offset),
1671 le64toh(d->data.entry_array_offset),
1672 i,
1673 ret, offset);
1674}
cec736d2 1675
cbdca852
LP
1676int journal_file_move_to_entry_by_offset_for_data(
1677 JournalFile *f,
1678 uint64_t data_offset,
1679 uint64_t p,
1680 direction_t direction,
1681 Object **ret, uint64_t *offset) {
1682
1683 int r;
1684 Object *d;
1685
1686 assert(f);
1687
1688 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1689 if (r < 0)
1690 return r;
1691
1692 return generic_array_bisect_plus_one(f,
1693 le64toh(d->data.entry_offset),
1694 le64toh(d->data.entry_array_offset),
1695 le64toh(d->data.n_entries),
1696 p,
1697 test_object_offset,
1698 direction,
1699 ret, offset, NULL);
1700}
1701
1702int journal_file_move_to_entry_by_monotonic_for_data(
1703 JournalFile *f,
1704 uint64_t data_offset,
1705 sd_id128_t boot_id,
1706 uint64_t monotonic,
1707 direction_t direction,
1708 Object **ret, uint64_t *offset) {
1709
1710 char t[9+32+1] = "_BOOT_ID=";
1711 Object *o, *d;
1712 int r;
1713 uint64_t b, z;
1714
1715 assert(f);
1716
1717 /* First, seek by time */
1718 sd_id128_to_string(boot_id, t + 9);
1719 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1720 if (r < 0)
1721 return r;
1722 if (r == 0)
1723 return -ENOENT;
1724
1725 r = generic_array_bisect_plus_one(f,
1726 le64toh(o->data.entry_offset),
1727 le64toh(o->data.entry_array_offset),
1728 le64toh(o->data.n_entries),
1729 monotonic,
1730 test_object_monotonic,
1731 direction,
1732 NULL, &z, NULL);
1733 if (r <= 0)
1734 return r;
1735
1736 /* And now, continue seeking until we find an entry that
1737 * exists in both bisection arrays */
1738
1739 for (;;) {
1740 Object *qo;
1741 uint64_t p, q;
1742
1743 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1744 if (r < 0)
1745 return r;
1746
1747 r = generic_array_bisect_plus_one(f,
1748 le64toh(d->data.entry_offset),
1749 le64toh(d->data.entry_array_offset),
1750 le64toh(d->data.n_entries),
1751 z,
1752 test_object_offset,
1753 direction,
1754 NULL, &p, NULL);
1755 if (r <= 0)
1756 return r;
1757
1758 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1759 if (r < 0)
1760 return r;
1761
1762 r = generic_array_bisect_plus_one(f,
1763 le64toh(o->data.entry_offset),
1764 le64toh(o->data.entry_array_offset),
1765 le64toh(o->data.n_entries),
1766 p,
1767 test_object_offset,
1768 direction,
1769 &qo, &q, NULL);
1770
1771 if (r <= 0)
1772 return r;
1773
1774 if (p == q) {
1775 if (ret)
1776 *ret = qo;
1777 if (offset)
1778 *offset = q;
1779
1780 return 1;
1781 }
1782
1783 z = q;
1784 }
1785
1786 return 0;
1787}
1788
de190aef
LP
1789int journal_file_move_to_entry_by_seqnum_for_data(
1790 JournalFile *f,
1791 uint64_t data_offset,
1792 uint64_t seqnum,
1793 direction_t direction,
1794 Object **ret, uint64_t *offset) {
cec736d2 1795
de190aef
LP
1796 Object *d;
1797 int r;
cec736d2 1798
91a31dde
LP
1799 assert(f);
1800
de190aef 1801 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1802 if (r < 0)
de190aef 1803 return r;
cec736d2 1804
de190aef
LP
1805 return generic_array_bisect_plus_one(f,
1806 le64toh(d->data.entry_offset),
1807 le64toh(d->data.entry_array_offset),
1808 le64toh(d->data.n_entries),
1809 seqnum,
1810 test_object_seqnum,
1811 direction,
1812 ret, offset, NULL);
1813}
cec736d2 1814
de190aef
LP
1815int journal_file_move_to_entry_by_realtime_for_data(
1816 JournalFile *f,
1817 uint64_t data_offset,
1818 uint64_t realtime,
1819 direction_t direction,
1820 Object **ret, uint64_t *offset) {
1821
1822 Object *d;
1823 int r;
1824
91a31dde
LP
1825 assert(f);
1826
de190aef 1827 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1828 if (r < 0)
de190aef
LP
1829 return r;
1830
1831 return generic_array_bisect_plus_one(f,
1832 le64toh(d->data.entry_offset),
1833 le64toh(d->data.entry_array_offset),
1834 le64toh(d->data.n_entries),
1835 realtime,
1836 test_object_realtime,
1837 direction,
1838 ret, offset, NULL);
cec736d2
LP
1839}
1840
0284adc6 1841void journal_file_dump(JournalFile *f) {
7560fffc 1842 Object *o;
7560fffc 1843 int r;
0284adc6 1844 uint64_t p;
7560fffc
LP
1845
1846 assert(f);
1847
0284adc6 1848 journal_file_print_header(f);
7560fffc 1849
0284adc6
LP
1850 p = le64toh(f->header->header_size);
1851 while (p != 0) {
1852 r = journal_file_move_to_object(f, -1, p, &o);
1853 if (r < 0)
1854 goto fail;
7560fffc 1855
0284adc6 1856 switch (o->object.type) {
d98cc1f2 1857
0284adc6
LP
1858 case OBJECT_UNUSED:
1859 printf("Type: OBJECT_UNUSED\n");
1860 break;
d98cc1f2 1861
0284adc6
LP
1862 case OBJECT_DATA:
1863 printf("Type: OBJECT_DATA\n");
1864 break;
7560fffc 1865
0284adc6 1866 case OBJECT_ENTRY:
f7fab8a5 1867 printf("Type: OBJECT_ENTRY seqnum=%llu monotonic=%llu realtime=%llu\n",
0284adc6
LP
1868 (unsigned long long) le64toh(o->entry.seqnum),
1869 (unsigned long long) le64toh(o->entry.monotonic),
1870 (unsigned long long) le64toh(o->entry.realtime));
1871 break;
7560fffc 1872
0284adc6
LP
1873 case OBJECT_FIELD_HASH_TABLE:
1874 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1875 break;
7560fffc 1876
0284adc6
LP
1877 case OBJECT_DATA_HASH_TABLE:
1878 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1879 break;
7560fffc 1880
0284adc6
LP
1881 case OBJECT_ENTRY_ARRAY:
1882 printf("Type: OBJECT_ENTRY_ARRAY\n");
1883 break;
7560fffc 1884
0284adc6 1885 case OBJECT_TAG:
f7fab8a5
LP
1886 printf("Type: OBJECT_TAG seqnum=%llu epoch=%llu\n",
1887 (unsigned long long) le64toh(o->tag.seqnum),
1888 (unsigned long long) le64toh(o->tag.epoch));
0284adc6
LP
1889 break;
1890 }
7560fffc 1891
0284adc6
LP
1892 if (o->object.flags & OBJECT_COMPRESSED)
1893 printf("Flags: COMPRESSED\n");
7560fffc 1894
0284adc6
LP
1895 if (p == le64toh(f->header->tail_object_offset))
1896 p = 0;
1897 else
1898 p = p + ALIGN64(le64toh(o->object.size));
1899 }
7560fffc 1900
0284adc6
LP
1901 return;
1902fail:
1903 log_error("File corrupt");
7560fffc
LP
1904}
1905
0284adc6
LP
1906void journal_file_print_header(JournalFile *f) {
1907 char a[33], b[33], c[33];
1908 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
a1a03e30
LP
1909 struct stat st;
1910 char bytes[FORMAT_BYTES_MAX];
7560fffc
LP
1911
1912 assert(f);
7560fffc 1913
0284adc6
LP
1914 printf("File Path: %s\n"
1915 "File ID: %s\n"
1916 "Machine ID: %s\n"
1917 "Boot ID: %s\n"
1918 "Sequential Number ID: %s\n"
1919 "State: %s\n"
1920 "Compatible Flags:%s%s\n"
1921 "Incompatible Flags:%s%s\n"
1922 "Header size: %llu\n"
1923 "Arena size: %llu\n"
1924 "Data Hash Table Size: %llu\n"
1925 "Field Hash Table Size: %llu\n"
0284adc6
LP
1926 "Rotate Suggested: %s\n"
1927 "Head Sequential Number: %llu\n"
1928 "Tail Sequential Number: %llu\n"
1929 "Head Realtime Timestamp: %s\n"
3223f44f
LP
1930 "Tail Realtime Timestamp: %s\n"
1931 "Objects: %llu\n"
1932 "Entry Objects: %llu\n",
0284adc6
LP
1933 f->path,
1934 sd_id128_to_string(f->header->file_id, a),
1935 sd_id128_to_string(f->header->machine_id, b),
1936 sd_id128_to_string(f->header->boot_id, c),
1937 sd_id128_to_string(f->header->seqnum_id, c),
3223f44f
LP
1938 f->header->state == STATE_OFFLINE ? "OFFLINE" :
1939 f->header->state == STATE_ONLINE ? "ONLINE" :
1940 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
8088cbd3
LP
1941 JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
1942 (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
1943 JOURNAL_HEADER_COMPRESSED(f->header) ? " COMPRESSED" : "",
1944 (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
0284adc6
LP
1945 (unsigned long long) le64toh(f->header->header_size),
1946 (unsigned long long) le64toh(f->header->arena_size),
1947 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1948 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
0284adc6
LP
1949 yes_no(journal_file_rotate_suggested(f)),
1950 (unsigned long long) le64toh(f->header->head_entry_seqnum),
1951 (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1952 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
3223f44f
LP
1953 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1954 (unsigned long long) le64toh(f->header->n_objects),
1955 (unsigned long long) le64toh(f->header->n_entries));
7560fffc 1956
0284adc6
LP
1957 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1958 printf("Data Objects: %llu\n"
1959 "Data Hash Table Fill: %.1f%%\n",
1960 (unsigned long long) le64toh(f->header->n_data),
1961 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
7560fffc 1962
0284adc6
LP
1963 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1964 printf("Field Objects: %llu\n"
1965 "Field Hash Table Fill: %.1f%%\n",
1966 (unsigned long long) le64toh(f->header->n_fields),
1967 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
3223f44f
LP
1968
1969 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
1970 printf("Tag Objects: %llu\n",
1971 (unsigned long long) le64toh(f->header->n_tags));
1972 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1973 printf("Entry Array Objects: %llu\n",
1974 (unsigned long long) le64toh(f->header->n_entry_arrays));
a1a03e30
LP
1975
1976 if (fstat(f->fd, &st) >= 0)
1977 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
7560fffc
LP
1978}
1979
0284adc6
LP
1980int journal_file_open(
1981 const char *fname,
1982 int flags,
1983 mode_t mode,
1984 bool compress,
baed47c3 1985 bool seal,
0284adc6
LP
1986 JournalMetrics *metrics,
1987 MMapCache *mmap_cache,
1988 JournalFile *template,
1989 JournalFile **ret) {
7560fffc 1990
0284adc6
LP
1991 JournalFile *f;
1992 int r;
1993 bool newly_created = false;
7560fffc 1994
0284adc6 1995 assert(fname);
7560fffc 1996
0284adc6
LP
1997 if ((flags & O_ACCMODE) != O_RDONLY &&
1998 (flags & O_ACCMODE) != O_RDWR)
1999 return -EINVAL;
7560fffc 2000
a0108012
LP
2001 if (!endswith(fname, ".journal") &&
2002 !endswith(fname, ".journal~"))
0284adc6 2003 return -EINVAL;
7560fffc 2004
0284adc6
LP
2005 f = new0(JournalFile, 1);
2006 if (!f)
2007 return -ENOMEM;
7560fffc 2008
0284adc6
LP
2009 f->fd = -1;
2010 f->mode = mode;
7560fffc 2011
0284adc6
LP
2012 f->flags = flags;
2013 f->prot = prot_from_flags(flags);
2014 f->writable = (flags & O_ACCMODE) != O_RDONLY;
2015 f->compress = compress;
baed47c3 2016 f->seal = seal;
7560fffc 2017
0284adc6
LP
2018 if (mmap_cache)
2019 f->mmap = mmap_cache_ref(mmap_cache);
2020 else {
84168d80 2021 f->mmap = mmap_cache_new();
0284adc6
LP
2022 if (!f->mmap) {
2023 r = -ENOMEM;
2024 goto fail;
2025 }
2026 }
7560fffc 2027
0284adc6
LP
2028 f->path = strdup(fname);
2029 if (!f->path) {
2030 r = -ENOMEM;
2031 goto fail;
2032 }
7560fffc 2033
0284adc6
LP
2034 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2035 if (f->fd < 0) {
2036 r = -errno;
2037 goto fail;
7560fffc 2038 }
7560fffc 2039
0284adc6
LP
2040 if (fstat(f->fd, &f->last_stat) < 0) {
2041 r = -errno;
2042 goto fail;
2043 }
7560fffc 2044
0284adc6
LP
2045 if (f->last_stat.st_size == 0 && f->writable) {
2046 newly_created = true;
7560fffc 2047
feb12d3e 2048#ifdef HAVE_GCRYPT
0284adc6 2049 /* Try to load the FSPRG state, and if we can't, then
baed47c3
LP
2050 * just don't do sealing */
2051 r = journal_file_fss_load(f);
0284adc6 2052 if (r < 0)
baed47c3 2053 f->seal = false;
feb12d3e 2054#endif
7560fffc 2055
0284adc6
LP
2056 r = journal_file_init_header(f, template);
2057 if (r < 0)
2058 goto fail;
7560fffc 2059
0284adc6
LP
2060 if (fstat(f->fd, &f->last_stat) < 0) {
2061 r = -errno;
2062 goto fail;
2063 }
2064 }
7560fffc 2065
0284adc6
LP
2066 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2067 r = -EIO;
2068 goto fail;
2069 }
7560fffc 2070
0284adc6
LP
2071 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2072 if (f->header == MAP_FAILED) {
2073 f->header = NULL;
2074 r = -errno;
2075 goto fail;
2076 }
7560fffc 2077
0284adc6
LP
2078 if (!newly_created) {
2079 r = journal_file_verify_header(f);
2080 if (r < 0)
2081 goto fail;
2082 }
7560fffc 2083
feb12d3e 2084#ifdef HAVE_GCRYPT
0284adc6 2085 if (!newly_created && f->writable) {
baed47c3 2086 r = journal_file_fss_load(f);
0284adc6
LP
2087 if (r < 0)
2088 goto fail;
2089 }
feb12d3e 2090#endif
cec736d2
LP
2091
2092 if (f->writable) {
4a92baf3
LP
2093 if (metrics) {
2094 journal_default_metrics(metrics, f->fd);
2095 f->metrics = *metrics;
2096 } else if (template)
2097 f->metrics = template->metrics;
2098
cec736d2
LP
2099 r = journal_file_refresh_header(f);
2100 if (r < 0)
2101 goto fail;
2102 }
2103
feb12d3e 2104#ifdef HAVE_GCRYPT
baed47c3 2105 r = journal_file_hmac_setup(f);
14d10188
LP
2106 if (r < 0)
2107 goto fail;
feb12d3e 2108#endif
14d10188 2109
cec736d2 2110 if (newly_created) {
de190aef 2111 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
2112 if (r < 0)
2113 goto fail;
2114
de190aef 2115 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
2116 if (r < 0)
2117 goto fail;
7560fffc 2118
feb12d3e 2119#ifdef HAVE_GCRYPT
7560fffc
LP
2120 r = journal_file_append_first_tag(f);
2121 if (r < 0)
2122 goto fail;
feb12d3e 2123#endif
cec736d2
LP
2124 }
2125
de190aef 2126 r = journal_file_map_field_hash_table(f);
cec736d2
LP
2127 if (r < 0)
2128 goto fail;
2129
de190aef 2130 r = journal_file_map_data_hash_table(f);
cec736d2
LP
2131 if (r < 0)
2132 goto fail;
2133
2134 if (ret)
2135 *ret = f;
2136
2137 return 0;
2138
2139fail:
2140 journal_file_close(f);
2141
2142 return r;
2143}
0ac38b70 2144
baed47c3 2145int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
0ac38b70
LP
2146 char *p;
2147 size_t l;
2148 JournalFile *old_file, *new_file = NULL;
2149 int r;
2150
2151 assert(f);
2152 assert(*f);
2153
2154 old_file = *f;
2155
2156 if (!old_file->writable)
2157 return -EINVAL;
2158
2159 if (!endswith(old_file->path, ".journal"))
2160 return -EINVAL;
2161
2162 l = strlen(old_file->path);
2163
9447a7f1 2164 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
0ac38b70
LP
2165 if (!p)
2166 return -ENOMEM;
2167
2168 memcpy(p, old_file->path, l - 8);
2169 p[l-8] = '@';
2170 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2171 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2172 "-%016llx-%016llx.journal",
beec0085 2173 (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
0ac38b70
LP
2174 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2175
2176 r = rename(old_file->path, p);
2177 free(p);
2178
2179 if (r < 0)
2180 return -errno;
2181
ccdbaf91 2182 old_file->header->state = STATE_ARCHIVED;
0ac38b70 2183
baed47c3 2184 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
0ac38b70
LP
2185 journal_file_close(old_file);
2186
2187 *f = new_file;
2188 return r;
2189}
2190
9447a7f1
LP
2191int journal_file_open_reliably(
2192 const char *fname,
2193 int flags,
2194 mode_t mode,
7560fffc 2195 bool compress,
baed47c3 2196 bool seal,
4a92baf3 2197 JournalMetrics *metrics,
27370278 2198 MMapCache *mmap_cache,
9447a7f1
LP
2199 JournalFile *template,
2200 JournalFile **ret) {
2201
2202 int r;
2203 size_t l;
2204 char *p;
2205
baed47c3 2206 r = journal_file_open(fname, flags, mode, compress, seal,
27370278 2207 metrics, mmap_cache, template, ret);
0071d9f1
LP
2208 if (r != -EBADMSG && /* corrupted */
2209 r != -ENODATA && /* truncated */
2210 r != -EHOSTDOWN && /* other machine */
a1a1898f
LP
2211 r != -EPROTONOSUPPORT && /* incompatible feature */
2212 r != -EBUSY && /* unclean shutdown */
2213 r != -ESHUTDOWN /* already archived */)
9447a7f1
LP
2214 return r;
2215
2216 if ((flags & O_ACCMODE) == O_RDONLY)
2217 return r;
2218
2219 if (!(flags & O_CREAT))
2220 return r;
2221
7560fffc
LP
2222 if (!endswith(fname, ".journal"))
2223 return r;
2224
5c70eab4
LP
2225 /* The file is corrupted. Rotate it away and try it again (but only once) */
2226
9447a7f1
LP
2227 l = strlen(fname);
2228 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2229 (int) (l-8), fname,
2230 (unsigned long long) now(CLOCK_REALTIME),
2231 random_ull()) < 0)
2232 return -ENOMEM;
2233
2234 r = rename(fname, p);
2235 free(p);
2236 if (r < 0)
2237 return -errno;
2238
a1a1898f 2239 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
9447a7f1 2240
baed47c3 2241 return journal_file_open(fname, flags, mode, compress, seal,
27370278 2242 metrics, mmap_cache, template, ret);
9447a7f1
LP
2243}
2244
cf244689
LP
2245
2246int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2247 uint64_t i, n;
2248 uint64_t q, xor_hash = 0;
2249 int r;
2250 EntryItem *items;
2251 dual_timestamp ts;
2252
2253 assert(from);
2254 assert(to);
2255 assert(o);
2256 assert(p);
2257
2258 if (!to->writable)
2259 return -EPERM;
2260
2261 ts.monotonic = le64toh(o->entry.monotonic);
2262 ts.realtime = le64toh(o->entry.realtime);
2263
2264 if (to->tail_entry_monotonic_valid &&
2265 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2266 return -EINVAL;
2267
cf244689
LP
2268 n = journal_file_entry_n_items(o);
2269 items = alloca(sizeof(EntryItem) * n);
2270
2271 for (i = 0; i < n; i++) {
4fd052ae
FC
2272 uint64_t l, h;
2273 le64_t le_hash;
cf244689
LP
2274 size_t t;
2275 void *data;
2276 Object *u;
2277
2278 q = le64toh(o->entry.items[i].object_offset);
2279 le_hash = o->entry.items[i].hash;
2280
2281 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2282 if (r < 0)
2283 return r;
2284
2285 if (le_hash != o->data.hash)
2286 return -EBADMSG;
2287
2288 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2289 t = (size_t) l;
2290
2291 /* We hit the limit on 32bit machines */
2292 if ((uint64_t) t != l)
2293 return -E2BIG;
2294
2295 if (o->object.flags & OBJECT_COMPRESSED) {
2296#ifdef HAVE_XZ
2297 uint64_t rsize;
2298
2299 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2300 return -EBADMSG;
2301
2302 data = from->compress_buffer;
2303 l = rsize;
2304#else
2305 return -EPROTONOSUPPORT;
2306#endif
2307 } else
2308 data = o->data.payload;
2309
2310 r = journal_file_append_data(to, data, l, &u, &h);
2311 if (r < 0)
2312 return r;
2313
2314 xor_hash ^= le64toh(u->data.hash);
2315 items[i].object_offset = htole64(h);
2316 items[i].hash = u->data.hash;
2317
2318 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2319 if (r < 0)
2320 return r;
2321 }
2322
2323 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2324}
babfc091
LP
2325
2326void journal_default_metrics(JournalMetrics *m, int fd) {
2327 uint64_t fs_size = 0;
2328 struct statvfs ss;
a7bc2c2a 2329 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2330
2331 assert(m);
2332 assert(fd >= 0);
2333
2334 if (fstatvfs(fd, &ss) >= 0)
2335 fs_size = ss.f_frsize * ss.f_blocks;
2336
2337 if (m->max_use == (uint64_t) -1) {
2338
2339 if (fs_size > 0) {
2340 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2341
2342 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2343 m->max_use = DEFAULT_MAX_USE_UPPER;
2344
2345 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2346 m->max_use = DEFAULT_MAX_USE_LOWER;
2347 } else
2348 m->max_use = DEFAULT_MAX_USE_LOWER;
2349 } else {
2350 m->max_use = PAGE_ALIGN(m->max_use);
2351
2352 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2353 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2354 }
2355
2356 if (m->max_size == (uint64_t) -1) {
2357 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2358
2359 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2360 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2361 } else
2362 m->max_size = PAGE_ALIGN(m->max_size);
2363
2364 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2365 m->max_size = JOURNAL_FILE_SIZE_MIN;
2366
2367 if (m->max_size*2 > m->max_use)
2368 m->max_use = m->max_size*2;
2369
2370 if (m->min_size == (uint64_t) -1)
2371 m->min_size = JOURNAL_FILE_SIZE_MIN;
2372 else {
2373 m->min_size = PAGE_ALIGN(m->min_size);
2374
2375 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2376 m->min_size = JOURNAL_FILE_SIZE_MIN;
2377
2378 if (m->min_size > m->max_size)
2379 m->max_size = m->min_size;
2380 }
2381
2382 if (m->keep_free == (uint64_t) -1) {
2383
2384 if (fs_size > 0) {
2385 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2386
2387 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2388 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2389
2390 } else
2391 m->keep_free = DEFAULT_KEEP_FREE;
2392 }
2393
2b43f939
LP
2394 log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2395 format_bytes(a, sizeof(a), m->max_use),
2396 format_bytes(b, sizeof(b), m->max_size),
2397 format_bytes(c, sizeof(c), m->min_size),
2398 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2399}
08984293
LP
2400
2401int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
08984293
LP
2402 assert(f);
2403 assert(from || to);
2404
2405 if (from) {
162566a4
LP
2406 if (f->header->head_entry_realtime == 0)
2407 return -ENOENT;
08984293 2408
162566a4 2409 *from = le64toh(f->header->head_entry_realtime);
08984293
LP
2410 }
2411
2412 if (to) {
162566a4
LP
2413 if (f->header->tail_entry_realtime == 0)
2414 return -ENOENT;
08984293 2415
162566a4 2416 *to = le64toh(f->header->tail_entry_realtime);
08984293
LP
2417 }
2418
2419 return 1;
2420}
2421
2422int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2423 char t[9+32+1] = "_BOOT_ID=";
2424 Object *o;
2425 uint64_t p;
2426 int r;
2427
2428 assert(f);
2429 assert(from || to);
2430
2431 sd_id128_to_string(boot_id, t + 9);
2432
2433 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2434 if (r <= 0)
2435 return r;
2436
2437 if (le64toh(o->data.n_entries) <= 0)
2438 return 0;
2439
2440 if (from) {
2441 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2442 if (r < 0)
2443 return r;
2444
2445 *from = le64toh(o->entry.monotonic);
2446 }
2447
2448 if (to) {
2449 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2450 if (r < 0)
2451 return r;
2452
2453 r = generic_array_get_plus_one(f,
2454 le64toh(o->data.entry_offset),
2455 le64toh(o->data.entry_array_offset),
2456 le64toh(o->data.n_entries)-1,
2457 &o, NULL);
2458 if (r <= 0)
2459 return r;
2460
2461 *to = le64toh(o->entry.monotonic);
2462 }
2463
2464 return 1;
2465}
dca6219e
LP
2466
2467bool journal_file_rotate_suggested(JournalFile *f) {
2468 assert(f);
2469
2470 /* If we gained new header fields we gained new features,
2471 * hence suggest a rotation */
361f9cbc
LP
2472 if (le64toh(f->header->header_size) < sizeof(Header)) {
2473 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
dca6219e 2474 return true;
361f9cbc 2475 }
dca6219e
LP
2476
2477 /* Let's check if the hash tables grew over a certain fill
2478 * level (75%, borrowing this value from Java's hash table
2479 * implementation), and if so suggest a rotation. To calculate
2480 * the fill level we need the n_data field, which only exists
2481 * in newer versions. */
2482
2483 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
361f9cbc
LP
2484 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2485 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2486 f->path,
2487 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2488 (unsigned long long) le64toh(f->header->n_data),
2489 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2490 (unsigned long long) (f->last_stat.st_size),
2491 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
dca6219e 2492 return true;
361f9cbc 2493 }
dca6219e
LP
2494
2495 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
361f9cbc
LP
2496 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2497 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2498 f->path,
2499 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2500 (unsigned long long) le64toh(f->header->n_fields),
2501 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
dca6219e 2502 return true;
361f9cbc 2503 }
dca6219e
LP
2504
2505 return false;
2506}