]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
journald: don't reposition window if we don't have to
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
30#include "journal-def.h"
31#include "journal-file.h"
0284adc6 32#include "journal-authenticate.h"
cec736d2 33#include "lookup3.h"
807e17f0 34#include "compress.h"
7560fffc 35#include "fsprg.h"
cec736d2 36
4a92baf3
LP
37#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
cec736d2 39
be19b7df 40#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 41
babfc091 42/* This is the minimum journal file size */
b47ffcfd 43#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
babfc091
LP
44
45/* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
49
50/* This is the upper bound if we deduce max_size from max_use */
71100051 51#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
52
53/* This is the upper bound if we deduce the keep_free value from the
54 * file system size */
55#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57/* This is the keep_free value when we can't determine the system
58 * size */
59#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
60
dca6219e
LP
61/* n_data was the first entry we added after the initial file format design */
62#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
cec736d2 63
cec736d2 64void journal_file_close(JournalFile *f) {
de190aef 65 assert(f);
cec736d2 66
feb12d3e 67#ifdef HAVE_GCRYPT
b0af6f41 68 /* Write the final tag */
c586dbf1 69 if (f->seal && f->writable)
b0af6f41 70 journal_file_append_tag(f);
feb12d3e 71#endif
b0af6f41 72
7560fffc 73 /* Sync everything to disk, before we mark the file offline */
16e9f408
LP
74 if (f->mmap && f->fd >= 0)
75 mmap_cache_close_fd(f->mmap, f->fd);
7560fffc
LP
76
77 if (f->writable && f->fd >= 0)
78 fdatasync(f->fd);
79
d384c7a8 80 if (f->header) {
cd96b3b8
LP
81 /* Mark the file offline. Don't override the archived state if it already is set */
82 if (f->writable && f->header->state == STATE_ONLINE)
d384c7a8 83 f->header->state = STATE_OFFLINE;
cec736d2 84
d384c7a8
MS
85 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
86 }
cec736d2 87
0ac38b70
LP
88 if (f->fd >= 0)
89 close_nointr_nofail(f->fd);
90
cec736d2 91 free(f->path);
807e17f0 92
16e9f408
LP
93 if (f->mmap)
94 mmap_cache_unref(f->mmap);
95
807e17f0
LP
96#ifdef HAVE_XZ
97 free(f->compress_buffer);
98#endif
99
7560fffc 100#ifdef HAVE_GCRYPT
baed47c3
LP
101 if (f->fss_file)
102 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
b7c9ae91
LP
103 else if (f->fsprg_state)
104 free(f->fsprg_state);
105
106 free(f->fsprg_seed);
7560fffc
LP
107
108 if (f->hmac)
109 gcry_md_close(f->hmac);
110#endif
111
cec736d2
LP
112 free(f);
113}
114
0ac38b70 115static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
116 Header h;
117 ssize_t k;
118 int r;
119
120 assert(f);
121
122 zero(h);
7560fffc 123 memcpy(h.signature, HEADER_SIGNATURE, 8);
23b0b2b2 124 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2 125
7560fffc
LP
126 h.incompatible_flags =
127 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
128
129 h.compatible_flags =
baed47c3 130 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
7560fffc 131
cec736d2
LP
132 r = sd_id128_randomize(&h.file_id);
133 if (r < 0)
134 return r;
135
0ac38b70
LP
136 if (template) {
137 h.seqnum_id = template->header->seqnum_id;
beec0085 138 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
0ac38b70
LP
139 } else
140 h.seqnum_id = h.file_id;
cec736d2
LP
141
142 k = pwrite(f->fd, &h, sizeof(h), 0);
143 if (k < 0)
144 return -errno;
145
146 if (k != sizeof(h))
147 return -EIO;
148
149 return 0;
150}
151
152static int journal_file_refresh_header(JournalFile *f) {
153 int r;
de190aef 154 sd_id128_t boot_id;
cec736d2
LP
155
156 assert(f);
157
158 r = sd_id128_get_machine(&f->header->machine_id);
159 if (r < 0)
160 return r;
161
de190aef 162 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
163 if (r < 0)
164 return r;
165
de190aef
LP
166 if (sd_id128_equal(boot_id, f->header->boot_id))
167 f->tail_entry_monotonic_valid = true;
168
169 f->header->boot_id = boot_id;
170
171 f->header->state = STATE_ONLINE;
b788cc23 172
7560fffc
LP
173 /* Sync the online state to disk */
174 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
175 fdatasync(f->fd);
b788cc23 176
cec736d2
LP
177 return 0;
178}
179
180static int journal_file_verify_header(JournalFile *f) {
181 assert(f);
182
7560fffc 183 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
cec736d2
LP
184 return -EBADMSG;
185
7560fffc
LP
186 /* In both read and write mode we refuse to open files with
187 * incompatible flags we don't know */
807e17f0 188#ifdef HAVE_XZ
7560fffc 189 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
807e17f0
LP
190 return -EPROTONOSUPPORT;
191#else
cec736d2
LP
192 if (f->header->incompatible_flags != 0)
193 return -EPROTONOSUPPORT;
807e17f0 194#endif
cec736d2 195
7560fffc
LP
196 /* When open for writing we refuse to open files with
197 * compatible flags, too */
198 if (f->writable) {
199#ifdef HAVE_GCRYPT
baed47c3 200 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
7560fffc
LP
201 return -EPROTONOSUPPORT;
202#else
203 if (f->header->compatible_flags != 0)
204 return -EPROTONOSUPPORT;
205#endif
206 }
207
db11ac1a
LP
208 if (f->header->state >= _STATE_MAX)
209 return -EBADMSG;
210
dca6219e
LP
211 /* The first addition was n_data, so check that we are at least this large */
212 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
23b0b2b2
LP
213 return -EBADMSG;
214
8088cbd3 215 if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
beec0085
LP
216 return -EBADMSG;
217
db11ac1a
LP
218 if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
219 return -ENODATA;
220
221 if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
222 return -ENODATA;
223
7762e02b
LP
224 if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
225 !VALID64(le64toh(f->header->field_hash_table_offset)) ||
226 !VALID64(le64toh(f->header->tail_object_offset)) ||
227 !VALID64(le64toh(f->header->entry_array_offset)))
228 return -ENODATA;
229
230 if (le64toh(f->header->data_hash_table_offset) < le64toh(f->header->header_size) ||
231 le64toh(f->header->field_hash_table_offset) < le64toh(f->header->header_size) ||
232 le64toh(f->header->tail_object_offset) < le64toh(f->header->header_size) ||
233 le64toh(f->header->entry_array_offset) < le64toh(f->header->header_size))
cec736d2
LP
234 return -ENODATA;
235
236 if (f->writable) {
ccdbaf91 237 uint8_t state;
cec736d2
LP
238 sd_id128_t machine_id;
239 int r;
240
241 r = sd_id128_get_machine(&machine_id);
242 if (r < 0)
243 return r;
244
245 if (!sd_id128_equal(machine_id, f->header->machine_id))
246 return -EHOSTDOWN;
247
de190aef 248 state = f->header->state;
cec736d2 249
71fa6f00
LP
250 if (state == STATE_ONLINE) {
251 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
252 return -EBUSY;
253 } else if (state == STATE_ARCHIVED)
cec736d2 254 return -ESHUTDOWN;
71fa6f00
LP
255 else if (state != STATE_OFFLINE) {
256 log_debug("Journal file %s has unknown state %u.", f->path, state);
257 return -EBUSY;
258 }
cec736d2
LP
259 }
260
8088cbd3 261 f->compress = JOURNAL_HEADER_COMPRESSED(f->header);
c586dbf1
LP
262
263 if (f->writable)
8088cbd3 264 f->seal = JOURNAL_HEADER_SEALED(f->header);
7560fffc 265
cec736d2
LP
266 return 0;
267}
268
269static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
cec736d2 270 uint64_t old_size, new_size;
fec2aa2f 271 int r;
cec736d2
LP
272
273 assert(f);
274
cec736d2 275 /* We assume that this file is not sparse, and we know that
38ac38b2 276 * for sure, since we always call posix_fallocate()
cec736d2
LP
277 * ourselves */
278
279 old_size =
23b0b2b2 280 le64toh(f->header->header_size) +
cec736d2
LP
281 le64toh(f->header->arena_size);
282
bc85bfee 283 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
284 if (new_size < le64toh(f->header->header_size))
285 new_size = le64toh(f->header->header_size);
bc85bfee
LP
286
287 if (new_size <= old_size)
cec736d2
LP
288 return 0;
289
bc85bfee
LP
290 if (f->metrics.max_size > 0 &&
291 new_size > f->metrics.max_size)
292 return -E2BIG;
cec736d2 293
bc85bfee
LP
294 if (new_size > f->metrics.min_size &&
295 f->metrics.keep_free > 0) {
cec736d2
LP
296 struct statvfs svfs;
297
298 if (fstatvfs(f->fd, &svfs) >= 0) {
299 uint64_t available;
300
301 available = svfs.f_bfree * svfs.f_bsize;
302
bc85bfee
LP
303 if (available >= f->metrics.keep_free)
304 available -= f->metrics.keep_free;
cec736d2
LP
305 else
306 available = 0;
307
308 if (new_size - old_size > available)
309 return -E2BIG;
310 }
311 }
312
bc85bfee
LP
313 /* Note that the glibc fallocate() fallback is very
314 inefficient, hence we try to minimize the allocation area
315 as we can. */
fec2aa2f
GV
316 r = posix_fallocate(f->fd, old_size, new_size - old_size);
317 if (r != 0)
318 return -r;
cec736d2
LP
319
320 if (fstat(f->fd, &f->last_stat) < 0)
321 return -errno;
322
23b0b2b2 323 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
324
325 return 0;
326}
327
fcde2389 328static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
cec736d2 329 assert(f);
cec736d2
LP
330 assert(ret);
331
7762e02b
LP
332 if (size <= 0)
333 return -EINVAL;
334
2a59ea54 335 /* Avoid SIGBUS on invalid accesses */
4bbdcdb3
LP
336 if (offset + size > (uint64_t) f->last_stat.st_size) {
337 /* Hmm, out of range? Let's refresh the fstat() data
338 * first, before we trust that check. */
339
340 if (fstat(f->fd, &f->last_stat) < 0 ||
341 offset + size > (uint64_t) f->last_stat.st_size)
342 return -EADDRNOTAVAIL;
343 }
344
fcde2389 345 return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
cec736d2
LP
346}
347
16e9f408
LP
348static uint64_t minimum_header_size(Object *o) {
349
350 static uint64_t table[] = {
351 [OBJECT_DATA] = sizeof(DataObject),
352 [OBJECT_FIELD] = sizeof(FieldObject),
353 [OBJECT_ENTRY] = sizeof(EntryObject),
354 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
355 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
356 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
357 [OBJECT_TAG] = sizeof(TagObject),
358 };
359
360 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
361 return sizeof(ObjectHeader);
362
363 return table[o->object.type];
364}
365
de190aef 366int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
367 int r;
368 void *t;
369 Object *o;
370 uint64_t s;
16e9f408 371 unsigned context;
cec736d2
LP
372
373 assert(f);
374 assert(ret);
375
db11ac1a
LP
376 /* Objects may only be located at multiple of 64 bit */
377 if (!VALID64(offset))
378 return -EFAULT;
379
16e9f408
LP
380 /* One context for each type, plus one catch-all for the rest */
381 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
382
fcde2389 383 r = journal_file_move_to(f, context, false, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
384 if (r < 0)
385 return r;
386
387 o = (Object*) t;
388 s = le64toh(o->object.size);
389
390 if (s < sizeof(ObjectHeader))
391 return -EBADMSG;
392
16e9f408
LP
393 if (o->object.type <= OBJECT_UNUSED)
394 return -EBADMSG;
395
396 if (s < minimum_header_size(o))
397 return -EBADMSG;
398
de190aef 399 if (type >= 0 && o->object.type != type)
cec736d2
LP
400 return -EBADMSG;
401
402 if (s > sizeof(ObjectHeader)) {
fcde2389 403 r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
cec736d2
LP
404 if (r < 0)
405 return r;
406
407 o = (Object*) t;
408 }
409
cec736d2
LP
410 *ret = o;
411 return 0;
412}
413
d98cc1f2 414static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
415 uint64_t r;
416
417 assert(f);
418
beec0085 419 r = le64toh(f->header->tail_entry_seqnum) + 1;
c2373f84
LP
420
421 if (seqnum) {
de190aef 422 /* If an external seqnum counter was passed, we update
c2373f84
LP
423 * both the local and the external one, and set it to
424 * the maximum of both */
425
426 if (*seqnum + 1 > r)
427 r = *seqnum + 1;
428
429 *seqnum = r;
430 }
431
beec0085 432 f->header->tail_entry_seqnum = htole64(r);
cec736d2 433
beec0085
LP
434 if (f->header->head_entry_seqnum == 0)
435 f->header->head_entry_seqnum = htole64(r);
de190aef 436
cec736d2
LP
437 return r;
438}
439
0284adc6 440int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
441 int r;
442 uint64_t p;
443 Object *tail, *o;
444 void *t;
445
446 assert(f);
16e9f408 447 assert(type > 0 && type < _OBJECT_TYPE_MAX);
cec736d2
LP
448 assert(size >= sizeof(ObjectHeader));
449 assert(offset);
450 assert(ret);
451
452 p = le64toh(f->header->tail_object_offset);
cec736d2 453 if (p == 0)
23b0b2b2 454 p = le64toh(f->header->header_size);
cec736d2 455 else {
de190aef 456 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
457 if (r < 0)
458 return r;
459
460 p += ALIGN64(le64toh(tail->object.size));
461 }
462
463 r = journal_file_allocate(f, p, size);
464 if (r < 0)
465 return r;
466
fcde2389 467 r = journal_file_move_to(f, type, false, p, size, &t);
cec736d2
LP
468 if (r < 0)
469 return r;
470
471 o = (Object*) t;
472
473 zero(o->object);
de190aef 474 o->object.type = type;
cec736d2
LP
475 o->object.size = htole64(size);
476
477 f->header->tail_object_offset = htole64(p);
cec736d2
LP
478 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
479
480 *ret = o;
481 *offset = p;
482
483 return 0;
484}
485
de190aef 486static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
487 uint64_t s, p;
488 Object *o;
489 int r;
490
491 assert(f);
492
dfabe643 493 /* We estimate that we need 1 hash table entry per 768 of
4a92baf3
LP
494 journal file and we want to make sure we never get beyond
495 75% fill level. Calculate the hash table size for the
496 maximum file size based on these metrics. */
497
dfabe643 498 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
4a92baf3
LP
499 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
500 s = DEFAULT_DATA_HASH_TABLE_SIZE;
501
2b43f939 502 log_debug("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
4a92baf3 503
de190aef
LP
504 r = journal_file_append_object(f,
505 OBJECT_DATA_HASH_TABLE,
506 offsetof(Object, hash_table.items) + s,
507 &o, &p);
cec736d2
LP
508 if (r < 0)
509 return r;
510
de190aef 511 memset(o->hash_table.items, 0, s);
cec736d2 512
de190aef
LP
513 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
514 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
515
516 return 0;
517}
518
de190aef 519static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
520 uint64_t s, p;
521 Object *o;
522 int r;
523
524 assert(f);
525
de190aef
LP
526 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
527 r = journal_file_append_object(f,
528 OBJECT_FIELD_HASH_TABLE,
529 offsetof(Object, hash_table.items) + s,
530 &o, &p);
cec736d2
LP
531 if (r < 0)
532 return r;
533
de190aef 534 memset(o->hash_table.items, 0, s);
cec736d2 535
de190aef
LP
536 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
537 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
538
539 return 0;
540}
541
de190aef 542static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
543 uint64_t s, p;
544 void *t;
545 int r;
546
547 assert(f);
548
de190aef
LP
549 p = le64toh(f->header->data_hash_table_offset);
550 s = le64toh(f->header->data_hash_table_size);
cec736d2 551
de190aef 552 r = journal_file_move_to(f,
16e9f408 553 OBJECT_DATA_HASH_TABLE,
fcde2389 554 true,
de190aef
LP
555 p, s,
556 &t);
cec736d2
LP
557 if (r < 0)
558 return r;
559
de190aef 560 f->data_hash_table = t;
cec736d2
LP
561 return 0;
562}
563
de190aef 564static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
565 uint64_t s, p;
566 void *t;
567 int r;
568
569 assert(f);
570
de190aef
LP
571 p = le64toh(f->header->field_hash_table_offset);
572 s = le64toh(f->header->field_hash_table_size);
cec736d2 573
de190aef 574 r = journal_file_move_to(f,
16e9f408 575 OBJECT_FIELD_HASH_TABLE,
fcde2389 576 true,
de190aef
LP
577 p, s,
578 &t);
cec736d2
LP
579 if (r < 0)
580 return r;
581
de190aef 582 f->field_hash_table = t;
cec736d2
LP
583 return 0;
584}
585
de190aef
LP
586static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
587 uint64_t p, h;
cec736d2
LP
588 int r;
589
590 assert(f);
591 assert(o);
592 assert(offset > 0);
de190aef 593 assert(o->object.type == OBJECT_DATA);
cec736d2 594
48496df6
LP
595 /* This might alter the window we are looking at */
596
de190aef
LP
597 o->data.next_hash_offset = o->data.next_field_offset = 0;
598 o->data.entry_offset = o->data.entry_array_offset = 0;
599 o->data.n_entries = 0;
cec736d2 600
de190aef 601 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
8db4213e 602 p = le64toh(f->data_hash_table[h].tail_hash_offset);
cec736d2
LP
603 if (p == 0) {
604 /* Only entry in the hash table is easy */
de190aef 605 f->data_hash_table[h].head_hash_offset = htole64(offset);
cec736d2 606 } else {
48496df6
LP
607 /* Move back to the previous data object, to patch in
608 * pointer */
cec736d2 609
de190aef 610 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
611 if (r < 0)
612 return r;
613
de190aef 614 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
615 }
616
de190aef 617 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2 618
dca6219e
LP
619 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
620 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
621
cec736d2
LP
622 return 0;
623}
624
de190aef
LP
625int journal_file_find_data_object_with_hash(
626 JournalFile *f,
627 const void *data, uint64_t size, uint64_t hash,
628 Object **ret, uint64_t *offset) {
48496df6 629
de190aef 630 uint64_t p, osize, h;
cec736d2
LP
631 int r;
632
633 assert(f);
634 assert(data || size == 0);
635
636 osize = offsetof(Object, data.payload) + size;
637
bc85bfee
LP
638 if (f->header->data_hash_table_size == 0)
639 return -EBADMSG;
640
de190aef
LP
641 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
642 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 643
de190aef
LP
644 while (p > 0) {
645 Object *o;
cec736d2 646
de190aef 647 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
648 if (r < 0)
649 return r;
650
807e17f0 651 if (le64toh(o->data.hash) != hash)
85a131e8 652 goto next;
807e17f0
LP
653
654 if (o->object.flags & OBJECT_COMPRESSED) {
655#ifdef HAVE_XZ
b785c858 656 uint64_t l, rsize;
cec736d2 657
807e17f0
LP
658 l = le64toh(o->object.size);
659 if (l <= offsetof(Object, data.payload))
cec736d2
LP
660 return -EBADMSG;
661
807e17f0
LP
662 l -= offsetof(Object, data.payload);
663
664 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
665 return -EBADMSG;
666
b785c858 667 if (rsize == size &&
807e17f0
LP
668 memcmp(f->compress_buffer, data, size) == 0) {
669
670 if (ret)
671 *ret = o;
672
673 if (offset)
674 *offset = p;
675
676 return 1;
677 }
678#else
679 return -EPROTONOSUPPORT;
680#endif
681
682 } else if (le64toh(o->object.size) == osize &&
683 memcmp(o->data.payload, data, size) == 0) {
684
cec736d2
LP
685 if (ret)
686 *ret = o;
687
688 if (offset)
689 *offset = p;
690
de190aef 691 return 1;
cec736d2
LP
692 }
693
85a131e8 694 next:
cec736d2
LP
695 p = le64toh(o->data.next_hash_offset);
696 }
697
de190aef
LP
698 return 0;
699}
700
701int journal_file_find_data_object(
702 JournalFile *f,
703 const void *data, uint64_t size,
704 Object **ret, uint64_t *offset) {
705
706 uint64_t hash;
707
708 assert(f);
709 assert(data || size == 0);
710
711 hash = hash64(data, size);
712
713 return journal_file_find_data_object_with_hash(f,
714 data, size, hash,
715 ret, offset);
716}
717
48496df6
LP
718static int journal_file_append_data(
719 JournalFile *f,
720 const void *data, uint64_t size,
721 Object **ret, uint64_t *offset) {
722
de190aef
LP
723 uint64_t hash, p;
724 uint64_t osize;
725 Object *o;
726 int r;
807e17f0 727 bool compressed = false;
de190aef
LP
728
729 assert(f);
730 assert(data || size == 0);
731
732 hash = hash64(data, size);
733
734 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
735 if (r < 0)
736 return r;
737 else if (r > 0) {
738
739 if (ret)
740 *ret = o;
741
742 if (offset)
743 *offset = p;
744
745 return 0;
746 }
747
748 osize = offsetof(Object, data.payload) + size;
749 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
750 if (r < 0)
751 return r;
752
cec736d2 753 o->data.hash = htole64(hash);
807e17f0
LP
754
755#ifdef HAVE_XZ
756 if (f->compress &&
757 size >= COMPRESSION_SIZE_THRESHOLD) {
758 uint64_t rsize;
759
760 compressed = compress_blob(data, size, o->data.payload, &rsize);
761
762 if (compressed) {
763 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
764 o->object.flags |= OBJECT_COMPRESSED;
765
807e17f0
LP
766 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
767 }
768 }
769#endif
770
64825d3c 771 if (!compressed && size > 0)
807e17f0 772 memcpy(o->data.payload, data, size);
cec736d2 773
de190aef 774 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
775 if (r < 0)
776 return r;
777
48496df6
LP
778 /* The linking might have altered the window, so let's
779 * refresh our pointer */
780 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
781 if (r < 0)
782 return r;
783
5996c7c2
LP
784#ifdef HAVE_GCRYPT
785 r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
786 if (r < 0)
787 return r;
788#endif
789
cec736d2
LP
790 if (ret)
791 *ret = o;
792
793 if (offset)
de190aef 794 *offset = p;
cec736d2
LP
795
796 return 0;
797}
798
799uint64_t journal_file_entry_n_items(Object *o) {
800 assert(o);
7be3aa17 801 assert(o->object.type == OBJECT_ENTRY);
cec736d2
LP
802
803 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
804}
805
0284adc6 806uint64_t journal_file_entry_array_n_items(Object *o) {
de190aef 807 assert(o);
7be3aa17 808 assert(o->object.type == OBJECT_ENTRY_ARRAY);
de190aef
LP
809
810 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
811}
812
fb9a24b6
LP
813uint64_t journal_file_hash_table_n_items(Object *o) {
814 assert(o);
815 assert(o->object.type == OBJECT_DATA_HASH_TABLE ||
816 o->object.type == OBJECT_FIELD_HASH_TABLE);
817
818 return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
819}
820
de190aef 821static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
822 le64_t *first,
823 le64_t *idx,
de190aef 824 uint64_t p) {
cec736d2 825 int r;
de190aef
LP
826 uint64_t n = 0, ap = 0, q, i, a, hidx;
827 Object *o;
828
cec736d2 829 assert(f);
de190aef
LP
830 assert(first);
831 assert(idx);
832 assert(p > 0);
cec736d2 833
de190aef
LP
834 a = le64toh(*first);
835 i = hidx = le64toh(*idx);
836 while (a > 0) {
837
838 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
839 if (r < 0)
840 return r;
cec736d2 841
de190aef
LP
842 n = journal_file_entry_array_n_items(o);
843 if (i < n) {
844 o->entry_array.items[i] = htole64(p);
845 *idx = htole64(hidx + 1);
846 return 0;
847 }
cec736d2 848
de190aef
LP
849 i -= n;
850 ap = a;
851 a = le64toh(o->entry_array.next_entry_array_offset);
852 }
853
854 if (hidx > n)
855 n = (hidx+1) * 2;
856 else
857 n = n * 2;
858
859 if (n < 4)
860 n = 4;
861
862 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
863 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
864 &o, &q);
cec736d2
LP
865 if (r < 0)
866 return r;
867
feb12d3e 868#ifdef HAVE_GCRYPT
5996c7c2 869 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
b0af6f41
LP
870 if (r < 0)
871 return r;
feb12d3e 872#endif
b0af6f41 873
de190aef 874 o->entry_array.items[i] = htole64(p);
cec736d2 875
de190aef 876 if (ap == 0)
7be3aa17 877 *first = htole64(q);
cec736d2 878 else {
de190aef 879 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
880 if (r < 0)
881 return r;
882
de190aef
LP
883 o->entry_array.next_entry_array_offset = htole64(q);
884 }
cec736d2 885
2dee23eb
LP
886 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
887 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
888
de190aef
LP
889 *idx = htole64(hidx + 1);
890
891 return 0;
892}
cec736d2 893
de190aef 894static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
895 le64_t *extra,
896 le64_t *first,
897 le64_t *idx,
de190aef
LP
898 uint64_t p) {
899
900 int r;
901
902 assert(f);
903 assert(extra);
904 assert(first);
905 assert(idx);
906 assert(p > 0);
907
908 if (*idx == 0)
909 *extra = htole64(p);
910 else {
4fd052ae 911 le64_t i;
de190aef 912
7be3aa17 913 i = htole64(le64toh(*idx) - 1);
de190aef
LP
914 r = link_entry_into_array(f, first, &i, p);
915 if (r < 0)
916 return r;
cec736d2
LP
917 }
918
de190aef
LP
919 *idx = htole64(le64toh(*idx) + 1);
920 return 0;
921}
922
923static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
924 uint64_t p;
925 int r;
926 assert(f);
927 assert(o);
928 assert(offset > 0);
929
930 p = le64toh(o->entry.items[i].object_offset);
931 if (p == 0)
932 return -EINVAL;
933
934 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
935 if (r < 0)
936 return r;
937
de190aef
LP
938 return link_entry_into_array_plus_one(f,
939 &o->data.entry_offset,
940 &o->data.entry_array_offset,
941 &o->data.n_entries,
942 offset);
cec736d2
LP
943}
944
945static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 946 uint64_t n, i;
cec736d2
LP
947 int r;
948
949 assert(f);
950 assert(o);
951 assert(offset > 0);
de190aef 952 assert(o->object.type == OBJECT_ENTRY);
cec736d2 953
b788cc23
LP
954 __sync_synchronize();
955
cec736d2 956 /* Link up the entry itself */
de190aef
LP
957 r = link_entry_into_array(f,
958 &f->header->entry_array_offset,
959 &f->header->n_entries,
960 offset);
961 if (r < 0)
962 return r;
cec736d2 963
aaf53376 964 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
cec736d2 965
de190aef 966 if (f->header->head_entry_realtime == 0)
0ac38b70 967 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 968
0ac38b70 969 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
970 f->header->tail_entry_monotonic = o->entry.monotonic;
971
972 f->tail_entry_monotonic_valid = true;
cec736d2
LP
973
974 /* Link up the items */
975 n = journal_file_entry_n_items(o);
976 for (i = 0; i < n; i++) {
977 r = journal_file_link_entry_item(f, o, offset, i);
978 if (r < 0)
979 return r;
980 }
981
cec736d2
LP
982 return 0;
983}
984
985static int journal_file_append_entry_internal(
986 JournalFile *f,
987 const dual_timestamp *ts,
988 uint64_t xor_hash,
989 const EntryItem items[], unsigned n_items,
de190aef 990 uint64_t *seqnum,
cec736d2
LP
991 Object **ret, uint64_t *offset) {
992 uint64_t np;
993 uint64_t osize;
994 Object *o;
995 int r;
996
997 assert(f);
998 assert(items || n_items == 0);
de190aef 999 assert(ts);
cec736d2
LP
1000
1001 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1002
de190aef 1003 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
1004 if (r < 0)
1005 return r;
1006
d98cc1f2 1007 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
cec736d2 1008 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
1009 o->entry.realtime = htole64(ts->realtime);
1010 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
1011 o->entry.xor_hash = htole64(xor_hash);
1012 o->entry.boot_id = f->header->boot_id;
1013
feb12d3e 1014#ifdef HAVE_GCRYPT
5996c7c2 1015 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
b0af6f41
LP
1016 if (r < 0)
1017 return r;
feb12d3e 1018#endif
b0af6f41 1019
cec736d2
LP
1020 r = journal_file_link_entry(f, o, np);
1021 if (r < 0)
1022 return r;
1023
1024 if (ret)
1025 *ret = o;
1026
1027 if (offset)
1028 *offset = np;
1029
1030 return 0;
1031}
1032
cf244689 1033void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
1034 assert(f);
1035
1036 /* inotify() does not receive IN_MODIFY events from file
1037 * accesses done via mmap(). After each access we hence
1038 * trigger IN_MODIFY by truncating the journal file to its
1039 * current size which triggers IN_MODIFY. */
1040
bc85bfee
LP
1041 __sync_synchronize();
1042
50f20cfd
LP
1043 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1044 log_error("Failed to to truncate file to its own size: %m");
1045}
1046
de190aef 1047int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1048 unsigned i;
1049 EntryItem *items;
1050 int r;
1051 uint64_t xor_hash = 0;
de190aef 1052 struct dual_timestamp _ts;
cec736d2
LP
1053
1054 assert(f);
1055 assert(iovec || n_iovec == 0);
1056
de190aef
LP
1057 if (!f->writable)
1058 return -EPERM;
1059
1060 if (!ts) {
1061 dual_timestamp_get(&_ts);
1062 ts = &_ts;
1063 }
1064
1065 if (f->tail_entry_monotonic_valid &&
1066 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1067 return -EINVAL;
1068
feb12d3e 1069#ifdef HAVE_GCRYPT
7560fffc
LP
1070 r = journal_file_maybe_append_tag(f, ts->realtime);
1071 if (r < 0)
1072 return r;
feb12d3e 1073#endif
7560fffc 1074
64825d3c
LP
1075 /* alloca() can't take 0, hence let's allocate at least one */
1076 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
cec736d2
LP
1077
1078 for (i = 0; i < n_iovec; i++) {
1079 uint64_t p;
1080 Object *o;
1081
1082 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1083 if (r < 0)
cf244689 1084 return r;
cec736d2
LP
1085
1086 xor_hash ^= le64toh(o->data.hash);
1087 items[i].object_offset = htole64(p);
de7b95cd 1088 items[i].hash = o->data.hash;
cec736d2
LP
1089 }
1090
de190aef 1091 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1092
50f20cfd
LP
1093 journal_file_post_change(f);
1094
cec736d2
LP
1095 return r;
1096}
1097
de190aef
LP
1098static int generic_array_get(JournalFile *f,
1099 uint64_t first,
1100 uint64_t i,
1101 Object **ret, uint64_t *offset) {
1102
cec736d2 1103 Object *o;
6c8a39b8 1104 uint64_t p = 0, a;
cec736d2
LP
1105 int r;
1106
1107 assert(f);
1108
de190aef
LP
1109 a = first;
1110 while (a > 0) {
1111 uint64_t n;
cec736d2 1112
de190aef
LP
1113 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1114 if (r < 0)
1115 return r;
cec736d2 1116
de190aef
LP
1117 n = journal_file_entry_array_n_items(o);
1118 if (i < n) {
1119 p = le64toh(o->entry_array.items[i]);
1120 break;
cec736d2
LP
1121 }
1122
de190aef
LP
1123 i -= n;
1124 a = le64toh(o->entry_array.next_entry_array_offset);
1125 }
1126
1127 if (a <= 0 || p <= 0)
1128 return 0;
1129
1130 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1131 if (r < 0)
1132 return r;
1133
1134 if (ret)
1135 *ret = o;
1136
1137 if (offset)
1138 *offset = p;
1139
1140 return 1;
1141}
1142
1143static int generic_array_get_plus_one(JournalFile *f,
1144 uint64_t extra,
1145 uint64_t first,
1146 uint64_t i,
1147 Object **ret, uint64_t *offset) {
1148
1149 Object *o;
1150
1151 assert(f);
1152
1153 if (i == 0) {
1154 int r;
1155
1156 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1157 if (r < 0)
1158 return r;
1159
de190aef
LP
1160 if (ret)
1161 *ret = o;
cec736d2 1162
de190aef
LP
1163 if (offset)
1164 *offset = extra;
cec736d2 1165
de190aef 1166 return 1;
cec736d2
LP
1167 }
1168
de190aef
LP
1169 return generic_array_get(f, first, i-1, ret, offset);
1170}
cec736d2 1171
de190aef
LP
1172enum {
1173 TEST_FOUND,
1174 TEST_LEFT,
1175 TEST_RIGHT
1176};
cec736d2 1177
de190aef
LP
1178static int generic_array_bisect(JournalFile *f,
1179 uint64_t first,
1180 uint64_t n,
1181 uint64_t needle,
1182 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1183 direction_t direction,
1184 Object **ret,
1185 uint64_t *offset,
1186 uint64_t *idx) {
1187
1188 uint64_t a, p, t = 0, i = 0, last_p = 0;
1189 bool subtract_one = false;
1190 Object *o, *array = NULL;
1191 int r;
cec736d2 1192
de190aef
LP
1193 assert(f);
1194 assert(test_object);
cec736d2 1195
de190aef
LP
1196 a = first;
1197 while (a > 0) {
1198 uint64_t left, right, k, lp;
1199
1200 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1201 if (r < 0)
1202 return r;
1203
de190aef
LP
1204 k = journal_file_entry_array_n_items(array);
1205 right = MIN(k, n);
1206 if (right <= 0)
1207 return 0;
cec736d2 1208
de190aef
LP
1209 i = right - 1;
1210 lp = p = le64toh(array->entry_array.items[i]);
1211 if (p <= 0)
1212 return -EBADMSG;
cec736d2 1213
de190aef
LP
1214 r = test_object(f, p, needle);
1215 if (r < 0)
1216 return r;
cec736d2 1217
de190aef
LP
1218 if (r == TEST_FOUND)
1219 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1220
1221 if (r == TEST_RIGHT) {
1222 left = 0;
1223 right -= 1;
1224 for (;;) {
1225 if (left == right) {
1226 if (direction == DIRECTION_UP)
1227 subtract_one = true;
1228
1229 i = left;
1230 goto found;
1231 }
1232
1233 assert(left < right);
1234
1235 i = (left + right) / 2;
1236 p = le64toh(array->entry_array.items[i]);
1237 if (p <= 0)
1238 return -EBADMSG;
1239
1240 r = test_object(f, p, needle);
1241 if (r < 0)
1242 return r;
cec736d2 1243
de190aef
LP
1244 if (r == TEST_FOUND)
1245 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1246
1247 if (r == TEST_RIGHT)
1248 right = i;
1249 else
1250 left = i + 1;
1251 }
1252 }
1253
cbdca852
LP
1254 if (k > n) {
1255 if (direction == DIRECTION_UP) {
1256 i = n;
1257 subtract_one = true;
1258 goto found;
1259 }
1260
cec736d2 1261 return 0;
cbdca852 1262 }
cec736d2 1263
de190aef
LP
1264 last_p = lp;
1265
1266 n -= k;
1267 t += k;
1268 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1269 }
1270
1271 return 0;
de190aef
LP
1272
1273found:
1274 if (subtract_one && t == 0 && i == 0)
1275 return 0;
1276
1277 if (subtract_one && i == 0)
1278 p = last_p;
1279 else if (subtract_one)
1280 p = le64toh(array->entry_array.items[i-1]);
1281 else
1282 p = le64toh(array->entry_array.items[i]);
1283
1284 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1285 if (r < 0)
1286 return r;
1287
1288 if (ret)
1289 *ret = o;
1290
1291 if (offset)
1292 *offset = p;
1293
1294 if (idx)
cbdca852 1295 *idx = t + i + (subtract_one ? -1 : 0);
de190aef
LP
1296
1297 return 1;
cec736d2
LP
1298}
1299
de190aef
LP
1300static int generic_array_bisect_plus_one(JournalFile *f,
1301 uint64_t extra,
1302 uint64_t first,
1303 uint64_t n,
1304 uint64_t needle,
1305 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1306 direction_t direction,
1307 Object **ret,
1308 uint64_t *offset,
1309 uint64_t *idx) {
1310
cec736d2 1311 int r;
cbdca852
LP
1312 bool step_back = false;
1313 Object *o;
cec736d2
LP
1314
1315 assert(f);
de190aef 1316 assert(test_object);
cec736d2 1317
de190aef
LP
1318 if (n <= 0)
1319 return 0;
cec736d2 1320
de190aef
LP
1321 /* This bisects the array in object 'first', but first checks
1322 * an extra */
de190aef
LP
1323 r = test_object(f, extra, needle);
1324 if (r < 0)
1325 return r;
a536e261
LP
1326
1327 if (r == TEST_FOUND)
1328 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1329
cbdca852
LP
1330 /* if we are looking with DIRECTION_UP then we need to first
1331 see if in the actual array there is a matching entry, and
1332 return the last one of that. But if there isn't any we need
1333 to return this one. Hence remember this, and return it
1334 below. */
1335 if (r == TEST_LEFT)
1336 step_back = direction == DIRECTION_UP;
de190aef 1337
cbdca852
LP
1338 if (r == TEST_RIGHT) {
1339 if (direction == DIRECTION_DOWN)
1340 goto found;
1341 else
1342 return 0;
a536e261 1343 }
cec736d2 1344
de190aef
LP
1345 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1346
cbdca852
LP
1347 if (r == 0 && step_back)
1348 goto found;
1349
ecf68b1d 1350 if (r > 0 && idx)
de190aef
LP
1351 (*idx) ++;
1352
1353 return r;
cbdca852
LP
1354
1355found:
1356 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1357 if (r < 0)
1358 return r;
1359
1360 if (ret)
1361 *ret = o;
1362
1363 if (offset)
1364 *offset = extra;
1365
1366 if (idx)
1367 *idx = 0;
1368
1369 return 1;
1370}
1371
1372static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1373 assert(f);
1374 assert(p > 0);
1375
1376 if (p == needle)
1377 return TEST_FOUND;
1378 else if (p < needle)
1379 return TEST_LEFT;
1380 else
1381 return TEST_RIGHT;
1382}
1383
1384int journal_file_move_to_entry_by_offset(
1385 JournalFile *f,
1386 uint64_t p,
1387 direction_t direction,
1388 Object **ret,
1389 uint64_t *offset) {
1390
1391 return generic_array_bisect(f,
1392 le64toh(f->header->entry_array_offset),
1393 le64toh(f->header->n_entries),
1394 p,
1395 test_object_offset,
1396 direction,
1397 ret, offset, NULL);
de190aef
LP
1398}
1399
cbdca852 1400
de190aef
LP
1401static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1402 Object *o;
1403 int r;
1404
1405 assert(f);
1406 assert(p > 0);
1407
1408 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1409 if (r < 0)
1410 return r;
1411
de190aef
LP
1412 if (le64toh(o->entry.seqnum) == needle)
1413 return TEST_FOUND;
1414 else if (le64toh(o->entry.seqnum) < needle)
1415 return TEST_LEFT;
1416 else
1417 return TEST_RIGHT;
1418}
cec736d2 1419
de190aef
LP
1420int journal_file_move_to_entry_by_seqnum(
1421 JournalFile *f,
1422 uint64_t seqnum,
1423 direction_t direction,
1424 Object **ret,
1425 uint64_t *offset) {
1426
1427 return generic_array_bisect(f,
1428 le64toh(f->header->entry_array_offset),
1429 le64toh(f->header->n_entries),
1430 seqnum,
1431 test_object_seqnum,
1432 direction,
1433 ret, offset, NULL);
1434}
cec736d2 1435
de190aef
LP
1436static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1437 Object *o;
1438 int r;
1439
1440 assert(f);
1441 assert(p > 0);
1442
1443 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1444 if (r < 0)
1445 return r;
1446
1447 if (le64toh(o->entry.realtime) == needle)
1448 return TEST_FOUND;
1449 else if (le64toh(o->entry.realtime) < needle)
1450 return TEST_LEFT;
1451 else
1452 return TEST_RIGHT;
cec736d2
LP
1453}
1454
de190aef
LP
1455int journal_file_move_to_entry_by_realtime(
1456 JournalFile *f,
1457 uint64_t realtime,
1458 direction_t direction,
1459 Object **ret,
1460 uint64_t *offset) {
1461
1462 return generic_array_bisect(f,
1463 le64toh(f->header->entry_array_offset),
1464 le64toh(f->header->n_entries),
1465 realtime,
1466 test_object_realtime,
1467 direction,
1468 ret, offset, NULL);
1469}
1470
1471static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1472 Object *o;
1473 int r;
1474
1475 assert(f);
1476 assert(p > 0);
1477
1478 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1479 if (r < 0)
1480 return r;
1481
1482 if (le64toh(o->entry.monotonic) == needle)
1483 return TEST_FOUND;
1484 else if (le64toh(o->entry.monotonic) < needle)
1485 return TEST_LEFT;
1486 else
1487 return TEST_RIGHT;
1488}
1489
1490int journal_file_move_to_entry_by_monotonic(
1491 JournalFile *f,
1492 sd_id128_t boot_id,
1493 uint64_t monotonic,
1494 direction_t direction,
1495 Object **ret,
1496 uint64_t *offset) {
1497
10b6f904 1498 char t[9+32+1] = "_BOOT_ID=";
de190aef
LP
1499 Object *o;
1500 int r;
1501
cbdca852 1502 assert(f);
de190aef 1503
cbdca852 1504 sd_id128_to_string(boot_id, t + 9);
de190aef
LP
1505 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1506 if (r < 0)
1507 return r;
cbdca852 1508 if (r == 0)
de190aef
LP
1509 return -ENOENT;
1510
1511 return generic_array_bisect_plus_one(f,
1512 le64toh(o->data.entry_offset),
1513 le64toh(o->data.entry_array_offset),
1514 le64toh(o->data.n_entries),
1515 monotonic,
1516 test_object_monotonic,
1517 direction,
1518 ret, offset, NULL);
1519}
1520
de190aef
LP
1521int journal_file_next_entry(
1522 JournalFile *f,
1523 Object *o, uint64_t p,
1524 direction_t direction,
1525 Object **ret, uint64_t *offset) {
1526
1527 uint64_t i, n;
cec736d2
LP
1528 int r;
1529
1530 assert(f);
de190aef
LP
1531 assert(p > 0 || !o);
1532
1533 n = le64toh(f->header->n_entries);
1534 if (n <= 0)
1535 return 0;
cec736d2
LP
1536
1537 if (!o)
de190aef 1538 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1539 else {
de190aef 1540 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1541 return -EINVAL;
1542
de190aef
LP
1543 r = generic_array_bisect(f,
1544 le64toh(f->header->entry_array_offset),
1545 le64toh(f->header->n_entries),
1546 p,
1547 test_object_offset,
1548 DIRECTION_DOWN,
1549 NULL, NULL,
1550 &i);
1551 if (r <= 0)
1552 return r;
1553
1554 if (direction == DIRECTION_DOWN) {
1555 if (i >= n - 1)
1556 return 0;
1557
1558 i++;
1559 } else {
1560 if (i <= 0)
1561 return 0;
1562
1563 i--;
1564 }
cec736d2
LP
1565 }
1566
de190aef
LP
1567 /* And jump to it */
1568 return generic_array_get(f,
1569 le64toh(f->header->entry_array_offset),
1570 i,
1571 ret, offset);
1572}
cec736d2 1573
de190aef
LP
1574int journal_file_skip_entry(
1575 JournalFile *f,
1576 Object *o, uint64_t p,
1577 int64_t skip,
1578 Object **ret, uint64_t *offset) {
1579
1580 uint64_t i, n;
1581 int r;
1582
1583 assert(f);
1584 assert(o);
1585 assert(p > 0);
1586
1587 if (o->object.type != OBJECT_ENTRY)
1588 return -EINVAL;
1589
1590 r = generic_array_bisect(f,
1591 le64toh(f->header->entry_array_offset),
1592 le64toh(f->header->n_entries),
1593 p,
1594 test_object_offset,
1595 DIRECTION_DOWN,
1596 NULL, NULL,
1597 &i);
1598 if (r <= 0)
cec736d2
LP
1599 return r;
1600
de190aef
LP
1601 /* Calculate new index */
1602 if (skip < 0) {
1603 if ((uint64_t) -skip >= i)
1604 i = 0;
1605 else
1606 i = i - (uint64_t) -skip;
1607 } else
1608 i += (uint64_t) skip;
cec736d2 1609
de190aef
LP
1610 n = le64toh(f->header->n_entries);
1611 if (n <= 0)
1612 return -EBADMSG;
cec736d2 1613
de190aef
LP
1614 if (i >= n)
1615 i = n-1;
1616
1617 return generic_array_get(f,
1618 le64toh(f->header->entry_array_offset),
1619 i,
1620 ret, offset);
cec736d2
LP
1621}
1622
de190aef
LP
1623int journal_file_next_entry_for_data(
1624 JournalFile *f,
1625 Object *o, uint64_t p,
1626 uint64_t data_offset,
1627 direction_t direction,
1628 Object **ret, uint64_t *offset) {
1629
1630 uint64_t n, i;
cec736d2 1631 int r;
de190aef 1632 Object *d;
cec736d2
LP
1633
1634 assert(f);
de190aef 1635 assert(p > 0 || !o);
cec736d2 1636
de190aef 1637 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 1638 if (r < 0)
de190aef 1639 return r;
cec736d2 1640
de190aef
LP
1641 n = le64toh(d->data.n_entries);
1642 if (n <= 0)
1643 return n;
cec736d2 1644
de190aef
LP
1645 if (!o)
1646 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1647 else {
1648 if (o->object.type != OBJECT_ENTRY)
1649 return -EINVAL;
cec736d2 1650
de190aef
LP
1651 r = generic_array_bisect_plus_one(f,
1652 le64toh(d->data.entry_offset),
1653 le64toh(d->data.entry_array_offset),
1654 le64toh(d->data.n_entries),
1655 p,
1656 test_object_offset,
1657 DIRECTION_DOWN,
1658 NULL, NULL,
1659 &i);
1660
1661 if (r <= 0)
cec736d2
LP
1662 return r;
1663
de190aef
LP
1664 if (direction == DIRECTION_DOWN) {
1665 if (i >= n - 1)
1666 return 0;
cec736d2 1667
de190aef
LP
1668 i++;
1669 } else {
1670 if (i <= 0)
1671 return 0;
cec736d2 1672
de190aef
LP
1673 i--;
1674 }
cec736d2 1675
de190aef 1676 }
cec736d2 1677
de190aef
LP
1678 return generic_array_get_plus_one(f,
1679 le64toh(d->data.entry_offset),
1680 le64toh(d->data.entry_array_offset),
1681 i,
1682 ret, offset);
1683}
cec736d2 1684
cbdca852
LP
1685int journal_file_move_to_entry_by_offset_for_data(
1686 JournalFile *f,
1687 uint64_t data_offset,
1688 uint64_t p,
1689 direction_t direction,
1690 Object **ret, uint64_t *offset) {
1691
1692 int r;
1693 Object *d;
1694
1695 assert(f);
1696
1697 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1698 if (r < 0)
1699 return r;
1700
1701 return generic_array_bisect_plus_one(f,
1702 le64toh(d->data.entry_offset),
1703 le64toh(d->data.entry_array_offset),
1704 le64toh(d->data.n_entries),
1705 p,
1706 test_object_offset,
1707 direction,
1708 ret, offset, NULL);
1709}
1710
1711int journal_file_move_to_entry_by_monotonic_for_data(
1712 JournalFile *f,
1713 uint64_t data_offset,
1714 sd_id128_t boot_id,
1715 uint64_t monotonic,
1716 direction_t direction,
1717 Object **ret, uint64_t *offset) {
1718
1719 char t[9+32+1] = "_BOOT_ID=";
1720 Object *o, *d;
1721 int r;
1722 uint64_t b, z;
1723
1724 assert(f);
1725
1726 /* First, seek by time */
1727 sd_id128_to_string(boot_id, t + 9);
1728 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1729 if (r < 0)
1730 return r;
1731 if (r == 0)
1732 return -ENOENT;
1733
1734 r = generic_array_bisect_plus_one(f,
1735 le64toh(o->data.entry_offset),
1736 le64toh(o->data.entry_array_offset),
1737 le64toh(o->data.n_entries),
1738 monotonic,
1739 test_object_monotonic,
1740 direction,
1741 NULL, &z, NULL);
1742 if (r <= 0)
1743 return r;
1744
1745 /* And now, continue seeking until we find an entry that
1746 * exists in both bisection arrays */
1747
1748 for (;;) {
1749 Object *qo;
1750 uint64_t p, q;
1751
1752 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1753 if (r < 0)
1754 return r;
1755
1756 r = generic_array_bisect_plus_one(f,
1757 le64toh(d->data.entry_offset),
1758 le64toh(d->data.entry_array_offset),
1759 le64toh(d->data.n_entries),
1760 z,
1761 test_object_offset,
1762 direction,
1763 NULL, &p, NULL);
1764 if (r <= 0)
1765 return r;
1766
1767 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1768 if (r < 0)
1769 return r;
1770
1771 r = generic_array_bisect_plus_one(f,
1772 le64toh(o->data.entry_offset),
1773 le64toh(o->data.entry_array_offset),
1774 le64toh(o->data.n_entries),
1775 p,
1776 test_object_offset,
1777 direction,
1778 &qo, &q, NULL);
1779
1780 if (r <= 0)
1781 return r;
1782
1783 if (p == q) {
1784 if (ret)
1785 *ret = qo;
1786 if (offset)
1787 *offset = q;
1788
1789 return 1;
1790 }
1791
1792 z = q;
1793 }
1794
1795 return 0;
1796}
1797
de190aef
LP
1798int journal_file_move_to_entry_by_seqnum_for_data(
1799 JournalFile *f,
1800 uint64_t data_offset,
1801 uint64_t seqnum,
1802 direction_t direction,
1803 Object **ret, uint64_t *offset) {
cec736d2 1804
de190aef
LP
1805 Object *d;
1806 int r;
cec736d2 1807
91a31dde
LP
1808 assert(f);
1809
de190aef 1810 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1811 if (r < 0)
de190aef 1812 return r;
cec736d2 1813
de190aef
LP
1814 return generic_array_bisect_plus_one(f,
1815 le64toh(d->data.entry_offset),
1816 le64toh(d->data.entry_array_offset),
1817 le64toh(d->data.n_entries),
1818 seqnum,
1819 test_object_seqnum,
1820 direction,
1821 ret, offset, NULL);
1822}
cec736d2 1823
de190aef
LP
1824int journal_file_move_to_entry_by_realtime_for_data(
1825 JournalFile *f,
1826 uint64_t data_offset,
1827 uint64_t realtime,
1828 direction_t direction,
1829 Object **ret, uint64_t *offset) {
1830
1831 Object *d;
1832 int r;
1833
91a31dde
LP
1834 assert(f);
1835
de190aef 1836 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1837 if (r < 0)
de190aef
LP
1838 return r;
1839
1840 return generic_array_bisect_plus_one(f,
1841 le64toh(d->data.entry_offset),
1842 le64toh(d->data.entry_array_offset),
1843 le64toh(d->data.n_entries),
1844 realtime,
1845 test_object_realtime,
1846 direction,
1847 ret, offset, NULL);
cec736d2
LP
1848}
1849
0284adc6 1850void journal_file_dump(JournalFile *f) {
7560fffc 1851 Object *o;
7560fffc 1852 int r;
0284adc6 1853 uint64_t p;
7560fffc
LP
1854
1855 assert(f);
1856
0284adc6 1857 journal_file_print_header(f);
7560fffc 1858
0284adc6
LP
1859 p = le64toh(f->header->header_size);
1860 while (p != 0) {
1861 r = journal_file_move_to_object(f, -1, p, &o);
1862 if (r < 0)
1863 goto fail;
7560fffc 1864
0284adc6 1865 switch (o->object.type) {
d98cc1f2 1866
0284adc6
LP
1867 case OBJECT_UNUSED:
1868 printf("Type: OBJECT_UNUSED\n");
1869 break;
d98cc1f2 1870
0284adc6
LP
1871 case OBJECT_DATA:
1872 printf("Type: OBJECT_DATA\n");
1873 break;
7560fffc 1874
0284adc6 1875 case OBJECT_ENTRY:
f7fab8a5 1876 printf("Type: OBJECT_ENTRY seqnum=%llu monotonic=%llu realtime=%llu\n",
0284adc6
LP
1877 (unsigned long long) le64toh(o->entry.seqnum),
1878 (unsigned long long) le64toh(o->entry.monotonic),
1879 (unsigned long long) le64toh(o->entry.realtime));
1880 break;
7560fffc 1881
0284adc6
LP
1882 case OBJECT_FIELD_HASH_TABLE:
1883 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1884 break;
7560fffc 1885
0284adc6
LP
1886 case OBJECT_DATA_HASH_TABLE:
1887 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1888 break;
7560fffc 1889
0284adc6
LP
1890 case OBJECT_ENTRY_ARRAY:
1891 printf("Type: OBJECT_ENTRY_ARRAY\n");
1892 break;
7560fffc 1893
0284adc6 1894 case OBJECT_TAG:
f7fab8a5
LP
1895 printf("Type: OBJECT_TAG seqnum=%llu epoch=%llu\n",
1896 (unsigned long long) le64toh(o->tag.seqnum),
1897 (unsigned long long) le64toh(o->tag.epoch));
0284adc6
LP
1898 break;
1899 }
7560fffc 1900
0284adc6
LP
1901 if (o->object.flags & OBJECT_COMPRESSED)
1902 printf("Flags: COMPRESSED\n");
7560fffc 1903
0284adc6
LP
1904 if (p == le64toh(f->header->tail_object_offset))
1905 p = 0;
1906 else
1907 p = p + ALIGN64(le64toh(o->object.size));
1908 }
7560fffc 1909
0284adc6
LP
1910 return;
1911fail:
1912 log_error("File corrupt");
7560fffc
LP
1913}
1914
0284adc6
LP
1915void journal_file_print_header(JournalFile *f) {
1916 char a[33], b[33], c[33];
1917 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
a1a03e30
LP
1918 struct stat st;
1919 char bytes[FORMAT_BYTES_MAX];
7560fffc
LP
1920
1921 assert(f);
7560fffc 1922
0284adc6
LP
1923 printf("File Path: %s\n"
1924 "File ID: %s\n"
1925 "Machine ID: %s\n"
1926 "Boot ID: %s\n"
1927 "Sequential Number ID: %s\n"
1928 "State: %s\n"
1929 "Compatible Flags:%s%s\n"
1930 "Incompatible Flags:%s%s\n"
1931 "Header size: %llu\n"
1932 "Arena size: %llu\n"
1933 "Data Hash Table Size: %llu\n"
1934 "Field Hash Table Size: %llu\n"
0284adc6
LP
1935 "Rotate Suggested: %s\n"
1936 "Head Sequential Number: %llu\n"
1937 "Tail Sequential Number: %llu\n"
1938 "Head Realtime Timestamp: %s\n"
3223f44f
LP
1939 "Tail Realtime Timestamp: %s\n"
1940 "Objects: %llu\n"
1941 "Entry Objects: %llu\n",
0284adc6
LP
1942 f->path,
1943 sd_id128_to_string(f->header->file_id, a),
1944 sd_id128_to_string(f->header->machine_id, b),
1945 sd_id128_to_string(f->header->boot_id, c),
1946 sd_id128_to_string(f->header->seqnum_id, c),
3223f44f
LP
1947 f->header->state == STATE_OFFLINE ? "OFFLINE" :
1948 f->header->state == STATE_ONLINE ? "ONLINE" :
1949 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
8088cbd3
LP
1950 JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
1951 (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
1952 JOURNAL_HEADER_COMPRESSED(f->header) ? " COMPRESSED" : "",
1953 (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
0284adc6
LP
1954 (unsigned long long) le64toh(f->header->header_size),
1955 (unsigned long long) le64toh(f->header->arena_size),
1956 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1957 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
0284adc6
LP
1958 yes_no(journal_file_rotate_suggested(f)),
1959 (unsigned long long) le64toh(f->header->head_entry_seqnum),
1960 (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1961 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
3223f44f
LP
1962 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1963 (unsigned long long) le64toh(f->header->n_objects),
1964 (unsigned long long) le64toh(f->header->n_entries));
7560fffc 1965
0284adc6
LP
1966 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1967 printf("Data Objects: %llu\n"
1968 "Data Hash Table Fill: %.1f%%\n",
1969 (unsigned long long) le64toh(f->header->n_data),
1970 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
7560fffc 1971
0284adc6
LP
1972 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1973 printf("Field Objects: %llu\n"
1974 "Field Hash Table Fill: %.1f%%\n",
1975 (unsigned long long) le64toh(f->header->n_fields),
1976 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
3223f44f
LP
1977
1978 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
1979 printf("Tag Objects: %llu\n",
1980 (unsigned long long) le64toh(f->header->n_tags));
1981 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1982 printf("Entry Array Objects: %llu\n",
1983 (unsigned long long) le64toh(f->header->n_entry_arrays));
a1a03e30
LP
1984
1985 if (fstat(f->fd, &st) >= 0)
1986 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
7560fffc
LP
1987}
1988
0284adc6
LP
1989int journal_file_open(
1990 const char *fname,
1991 int flags,
1992 mode_t mode,
1993 bool compress,
baed47c3 1994 bool seal,
0284adc6
LP
1995 JournalMetrics *metrics,
1996 MMapCache *mmap_cache,
1997 JournalFile *template,
1998 JournalFile **ret) {
7560fffc 1999
0284adc6
LP
2000 JournalFile *f;
2001 int r;
2002 bool newly_created = false;
7560fffc 2003
0284adc6 2004 assert(fname);
7560fffc 2005
0284adc6
LP
2006 if ((flags & O_ACCMODE) != O_RDONLY &&
2007 (flags & O_ACCMODE) != O_RDWR)
2008 return -EINVAL;
7560fffc 2009
a0108012
LP
2010 if (!endswith(fname, ".journal") &&
2011 !endswith(fname, ".journal~"))
0284adc6 2012 return -EINVAL;
7560fffc 2013
0284adc6
LP
2014 f = new0(JournalFile, 1);
2015 if (!f)
2016 return -ENOMEM;
7560fffc 2017
0284adc6
LP
2018 f->fd = -1;
2019 f->mode = mode;
7560fffc 2020
0284adc6
LP
2021 f->flags = flags;
2022 f->prot = prot_from_flags(flags);
2023 f->writable = (flags & O_ACCMODE) != O_RDONLY;
48b61739 2024#ifdef HAVE_XZ
0284adc6 2025 f->compress = compress;
48b61739 2026#endif
49a32d43 2027#ifdef HAVE_GCRYPT
baed47c3 2028 f->seal = seal;
49a32d43 2029#endif
7560fffc 2030
0284adc6
LP
2031 if (mmap_cache)
2032 f->mmap = mmap_cache_ref(mmap_cache);
2033 else {
84168d80 2034 f->mmap = mmap_cache_new();
0284adc6
LP
2035 if (!f->mmap) {
2036 r = -ENOMEM;
2037 goto fail;
2038 }
2039 }
7560fffc 2040
0284adc6
LP
2041 f->path = strdup(fname);
2042 if (!f->path) {
2043 r = -ENOMEM;
2044 goto fail;
2045 }
7560fffc 2046
0284adc6
LP
2047 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2048 if (f->fd < 0) {
2049 r = -errno;
2050 goto fail;
7560fffc 2051 }
7560fffc 2052
0284adc6
LP
2053 if (fstat(f->fd, &f->last_stat) < 0) {
2054 r = -errno;
2055 goto fail;
2056 }
7560fffc 2057
0284adc6
LP
2058 if (f->last_stat.st_size == 0 && f->writable) {
2059 newly_created = true;
7560fffc 2060
feb12d3e 2061#ifdef HAVE_GCRYPT
0284adc6 2062 /* Try to load the FSPRG state, and if we can't, then
baed47c3 2063 * just don't do sealing */
49a32d43
LP
2064 if (f->seal) {
2065 r = journal_file_fss_load(f);
2066 if (r < 0)
2067 f->seal = false;
2068 }
feb12d3e 2069#endif
7560fffc 2070
0284adc6
LP
2071 r = journal_file_init_header(f, template);
2072 if (r < 0)
2073 goto fail;
7560fffc 2074
0284adc6
LP
2075 if (fstat(f->fd, &f->last_stat) < 0) {
2076 r = -errno;
2077 goto fail;
2078 }
2079 }
7560fffc 2080
0284adc6
LP
2081 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2082 r = -EIO;
2083 goto fail;
2084 }
7560fffc 2085
0284adc6
LP
2086 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2087 if (f->header == MAP_FAILED) {
2088 f->header = NULL;
2089 r = -errno;
2090 goto fail;
2091 }
7560fffc 2092
0284adc6
LP
2093 if (!newly_created) {
2094 r = journal_file_verify_header(f);
2095 if (r < 0)
2096 goto fail;
2097 }
7560fffc 2098
feb12d3e 2099#ifdef HAVE_GCRYPT
0284adc6 2100 if (!newly_created && f->writable) {
baed47c3 2101 r = journal_file_fss_load(f);
0284adc6
LP
2102 if (r < 0)
2103 goto fail;
2104 }
feb12d3e 2105#endif
cec736d2
LP
2106
2107 if (f->writable) {
4a92baf3
LP
2108 if (metrics) {
2109 journal_default_metrics(metrics, f->fd);
2110 f->metrics = *metrics;
2111 } else if (template)
2112 f->metrics = template->metrics;
2113
cec736d2
LP
2114 r = journal_file_refresh_header(f);
2115 if (r < 0)
2116 goto fail;
2117 }
2118
feb12d3e 2119#ifdef HAVE_GCRYPT
baed47c3 2120 r = journal_file_hmac_setup(f);
14d10188
LP
2121 if (r < 0)
2122 goto fail;
feb12d3e 2123#endif
14d10188 2124
cec736d2 2125 if (newly_created) {
de190aef 2126 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
2127 if (r < 0)
2128 goto fail;
2129
de190aef 2130 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
2131 if (r < 0)
2132 goto fail;
7560fffc 2133
feb12d3e 2134#ifdef HAVE_GCRYPT
7560fffc
LP
2135 r = journal_file_append_first_tag(f);
2136 if (r < 0)
2137 goto fail;
feb12d3e 2138#endif
cec736d2
LP
2139 }
2140
de190aef 2141 r = journal_file_map_field_hash_table(f);
cec736d2
LP
2142 if (r < 0)
2143 goto fail;
2144
de190aef 2145 r = journal_file_map_data_hash_table(f);
cec736d2
LP
2146 if (r < 0)
2147 goto fail;
2148
2149 if (ret)
2150 *ret = f;
2151
2152 return 0;
2153
2154fail:
2155 journal_file_close(f);
2156
2157 return r;
2158}
0ac38b70 2159
baed47c3 2160int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
0ac38b70
LP
2161 char *p;
2162 size_t l;
2163 JournalFile *old_file, *new_file = NULL;
2164 int r;
2165
2166 assert(f);
2167 assert(*f);
2168
2169 old_file = *f;
2170
2171 if (!old_file->writable)
2172 return -EINVAL;
2173
2174 if (!endswith(old_file->path, ".journal"))
2175 return -EINVAL;
2176
2177 l = strlen(old_file->path);
2178
9447a7f1 2179 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
0ac38b70
LP
2180 if (!p)
2181 return -ENOMEM;
2182
2183 memcpy(p, old_file->path, l - 8);
2184 p[l-8] = '@';
2185 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2186 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2187 "-%016llx-%016llx.journal",
beec0085 2188 (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
0ac38b70
LP
2189 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2190
2191 r = rename(old_file->path, p);
2192 free(p);
2193
2194 if (r < 0)
2195 return -errno;
2196
ccdbaf91 2197 old_file->header->state = STATE_ARCHIVED;
0ac38b70 2198
baed47c3 2199 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
0ac38b70
LP
2200 journal_file_close(old_file);
2201
2202 *f = new_file;
2203 return r;
2204}
2205
9447a7f1
LP
2206int journal_file_open_reliably(
2207 const char *fname,
2208 int flags,
2209 mode_t mode,
7560fffc 2210 bool compress,
baed47c3 2211 bool seal,
4a92baf3 2212 JournalMetrics *metrics,
27370278 2213 MMapCache *mmap_cache,
9447a7f1
LP
2214 JournalFile *template,
2215 JournalFile **ret) {
2216
2217 int r;
2218 size_t l;
2219 char *p;
2220
baed47c3 2221 r = journal_file_open(fname, flags, mode, compress, seal,
27370278 2222 metrics, mmap_cache, template, ret);
0071d9f1
LP
2223 if (r != -EBADMSG && /* corrupted */
2224 r != -ENODATA && /* truncated */
2225 r != -EHOSTDOWN && /* other machine */
a1a1898f
LP
2226 r != -EPROTONOSUPPORT && /* incompatible feature */
2227 r != -EBUSY && /* unclean shutdown */
2228 r != -ESHUTDOWN /* already archived */)
9447a7f1
LP
2229 return r;
2230
2231 if ((flags & O_ACCMODE) == O_RDONLY)
2232 return r;
2233
2234 if (!(flags & O_CREAT))
2235 return r;
2236
7560fffc
LP
2237 if (!endswith(fname, ".journal"))
2238 return r;
2239
5c70eab4
LP
2240 /* The file is corrupted. Rotate it away and try it again (but only once) */
2241
9447a7f1
LP
2242 l = strlen(fname);
2243 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2244 (int) (l-8), fname,
2245 (unsigned long long) now(CLOCK_REALTIME),
2246 random_ull()) < 0)
2247 return -ENOMEM;
2248
2249 r = rename(fname, p);
2250 free(p);
2251 if (r < 0)
2252 return -errno;
2253
a1a1898f 2254 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
9447a7f1 2255
baed47c3 2256 return journal_file_open(fname, flags, mode, compress, seal,
27370278 2257 metrics, mmap_cache, template, ret);
9447a7f1
LP
2258}
2259
cf244689
LP
2260
2261int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2262 uint64_t i, n;
2263 uint64_t q, xor_hash = 0;
2264 int r;
2265 EntryItem *items;
2266 dual_timestamp ts;
2267
2268 assert(from);
2269 assert(to);
2270 assert(o);
2271 assert(p);
2272
2273 if (!to->writable)
2274 return -EPERM;
2275
2276 ts.monotonic = le64toh(o->entry.monotonic);
2277 ts.realtime = le64toh(o->entry.realtime);
2278
2279 if (to->tail_entry_monotonic_valid &&
2280 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2281 return -EINVAL;
2282
cf244689
LP
2283 n = journal_file_entry_n_items(o);
2284 items = alloca(sizeof(EntryItem) * n);
2285
2286 for (i = 0; i < n; i++) {
4fd052ae
FC
2287 uint64_t l, h;
2288 le64_t le_hash;
cf244689
LP
2289 size_t t;
2290 void *data;
2291 Object *u;
2292
2293 q = le64toh(o->entry.items[i].object_offset);
2294 le_hash = o->entry.items[i].hash;
2295
2296 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2297 if (r < 0)
2298 return r;
2299
2300 if (le_hash != o->data.hash)
2301 return -EBADMSG;
2302
2303 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2304 t = (size_t) l;
2305
2306 /* We hit the limit on 32bit machines */
2307 if ((uint64_t) t != l)
2308 return -E2BIG;
2309
2310 if (o->object.flags & OBJECT_COMPRESSED) {
2311#ifdef HAVE_XZ
2312 uint64_t rsize;
2313
2314 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2315 return -EBADMSG;
2316
2317 data = from->compress_buffer;
2318 l = rsize;
2319#else
2320 return -EPROTONOSUPPORT;
2321#endif
2322 } else
2323 data = o->data.payload;
2324
2325 r = journal_file_append_data(to, data, l, &u, &h);
2326 if (r < 0)
2327 return r;
2328
2329 xor_hash ^= le64toh(u->data.hash);
2330 items[i].object_offset = htole64(h);
2331 items[i].hash = u->data.hash;
2332
2333 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2334 if (r < 0)
2335 return r;
2336 }
2337
2338 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2339}
babfc091
LP
2340
2341void journal_default_metrics(JournalMetrics *m, int fd) {
2342 uint64_t fs_size = 0;
2343 struct statvfs ss;
a7bc2c2a 2344 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2345
2346 assert(m);
2347 assert(fd >= 0);
2348
2349 if (fstatvfs(fd, &ss) >= 0)
2350 fs_size = ss.f_frsize * ss.f_blocks;
2351
2352 if (m->max_use == (uint64_t) -1) {
2353
2354 if (fs_size > 0) {
2355 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2356
2357 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2358 m->max_use = DEFAULT_MAX_USE_UPPER;
2359
2360 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2361 m->max_use = DEFAULT_MAX_USE_LOWER;
2362 } else
2363 m->max_use = DEFAULT_MAX_USE_LOWER;
2364 } else {
2365 m->max_use = PAGE_ALIGN(m->max_use);
2366
2367 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2368 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2369 }
2370
2371 if (m->max_size == (uint64_t) -1) {
2372 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2373
2374 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2375 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2376 } else
2377 m->max_size = PAGE_ALIGN(m->max_size);
2378
2379 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2380 m->max_size = JOURNAL_FILE_SIZE_MIN;
2381
2382 if (m->max_size*2 > m->max_use)
2383 m->max_use = m->max_size*2;
2384
2385 if (m->min_size == (uint64_t) -1)
2386 m->min_size = JOURNAL_FILE_SIZE_MIN;
2387 else {
2388 m->min_size = PAGE_ALIGN(m->min_size);
2389
2390 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2391 m->min_size = JOURNAL_FILE_SIZE_MIN;
2392
2393 if (m->min_size > m->max_size)
2394 m->max_size = m->min_size;
2395 }
2396
2397 if (m->keep_free == (uint64_t) -1) {
2398
2399 if (fs_size > 0) {
2400 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2401
2402 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2403 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2404
2405 } else
2406 m->keep_free = DEFAULT_KEEP_FREE;
2407 }
2408
2b43f939
LP
2409 log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2410 format_bytes(a, sizeof(a), m->max_use),
2411 format_bytes(b, sizeof(b), m->max_size),
2412 format_bytes(c, sizeof(c), m->min_size),
2413 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2414}
08984293
LP
2415
2416int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
08984293
LP
2417 assert(f);
2418 assert(from || to);
2419
2420 if (from) {
162566a4
LP
2421 if (f->header->head_entry_realtime == 0)
2422 return -ENOENT;
08984293 2423
162566a4 2424 *from = le64toh(f->header->head_entry_realtime);
08984293
LP
2425 }
2426
2427 if (to) {
162566a4
LP
2428 if (f->header->tail_entry_realtime == 0)
2429 return -ENOENT;
08984293 2430
162566a4 2431 *to = le64toh(f->header->tail_entry_realtime);
08984293
LP
2432 }
2433
2434 return 1;
2435}
2436
2437int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2438 char t[9+32+1] = "_BOOT_ID=";
2439 Object *o;
2440 uint64_t p;
2441 int r;
2442
2443 assert(f);
2444 assert(from || to);
2445
2446 sd_id128_to_string(boot_id, t + 9);
2447
2448 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2449 if (r <= 0)
2450 return r;
2451
2452 if (le64toh(o->data.n_entries) <= 0)
2453 return 0;
2454
2455 if (from) {
2456 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2457 if (r < 0)
2458 return r;
2459
2460 *from = le64toh(o->entry.monotonic);
2461 }
2462
2463 if (to) {
2464 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2465 if (r < 0)
2466 return r;
2467
2468 r = generic_array_get_plus_one(f,
2469 le64toh(o->data.entry_offset),
2470 le64toh(o->data.entry_array_offset),
2471 le64toh(o->data.n_entries)-1,
2472 &o, NULL);
2473 if (r <= 0)
2474 return r;
2475
2476 *to = le64toh(o->entry.monotonic);
2477 }
2478
2479 return 1;
2480}
dca6219e
LP
2481
2482bool journal_file_rotate_suggested(JournalFile *f) {
2483 assert(f);
2484
2485 /* If we gained new header fields we gained new features,
2486 * hence suggest a rotation */
361f9cbc
LP
2487 if (le64toh(f->header->header_size) < sizeof(Header)) {
2488 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
dca6219e 2489 return true;
361f9cbc 2490 }
dca6219e
LP
2491
2492 /* Let's check if the hash tables grew over a certain fill
2493 * level (75%, borrowing this value from Java's hash table
2494 * implementation), and if so suggest a rotation. To calculate
2495 * the fill level we need the n_data field, which only exists
2496 * in newer versions. */
2497
2498 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
361f9cbc
LP
2499 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2500 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2501 f->path,
2502 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2503 (unsigned long long) le64toh(f->header->n_data),
2504 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2505 (unsigned long long) (f->last_stat.st_size),
2506 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
dca6219e 2507 return true;
361f9cbc 2508 }
dca6219e
LP
2509
2510 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
361f9cbc
LP
2511 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2512 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2513 f->path,
2514 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2515 (unsigned long long) le64toh(f->header->n_fields),
2516 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
dca6219e 2517 return true;
361f9cbc 2518 }
dca6219e
LP
2519
2520 return false;
2521}