]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
journal: don't try to compress without XZ
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
30#include "journal-def.h"
31#include "journal-file.h"
0284adc6 32#include "journal-authenticate.h"
cec736d2 33#include "lookup3.h"
807e17f0 34#include "compress.h"
7560fffc 35#include "fsprg.h"
cec736d2 36
4a92baf3
LP
37#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
cec736d2 39
be19b7df 40#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 41
babfc091 42/* This is the minimum journal file size */
b47ffcfd 43#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
babfc091
LP
44
45/* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
49
50/* This is the upper bound if we deduce max_size from max_use */
71100051 51#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
52
53/* This is the upper bound if we deduce the keep_free value from the
54 * file system size */
55#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57/* This is the keep_free value when we can't determine the system
58 * size */
59#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
60
dca6219e
LP
61/* n_data was the first entry we added after the initial file format design */
62#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
cec736d2 63
cec736d2 64void journal_file_close(JournalFile *f) {
de190aef 65 assert(f);
cec736d2 66
feb12d3e 67#ifdef HAVE_GCRYPT
b0af6f41 68 /* Write the final tag */
c586dbf1 69 if (f->seal && f->writable)
b0af6f41 70 journal_file_append_tag(f);
feb12d3e 71#endif
b0af6f41 72
7560fffc 73 /* Sync everything to disk, before we mark the file offline */
16e9f408
LP
74 if (f->mmap && f->fd >= 0)
75 mmap_cache_close_fd(f->mmap, f->fd);
7560fffc
LP
76
77 if (f->writable && f->fd >= 0)
78 fdatasync(f->fd);
79
d384c7a8 80 if (f->header) {
cd96b3b8
LP
81 /* Mark the file offline. Don't override the archived state if it already is set */
82 if (f->writable && f->header->state == STATE_ONLINE)
d384c7a8 83 f->header->state = STATE_OFFLINE;
cec736d2 84
d384c7a8
MS
85 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
86 }
cec736d2 87
0ac38b70
LP
88 if (f->fd >= 0)
89 close_nointr_nofail(f->fd);
90
cec736d2 91 free(f->path);
807e17f0 92
16e9f408
LP
93 if (f->mmap)
94 mmap_cache_unref(f->mmap);
95
807e17f0
LP
96#ifdef HAVE_XZ
97 free(f->compress_buffer);
98#endif
99
7560fffc 100#ifdef HAVE_GCRYPT
baed47c3
LP
101 if (f->fss_file)
102 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
b7c9ae91
LP
103 else if (f->fsprg_state)
104 free(f->fsprg_state);
105
106 free(f->fsprg_seed);
7560fffc
LP
107
108 if (f->hmac)
109 gcry_md_close(f->hmac);
110#endif
111
cec736d2
LP
112 free(f);
113}
114
0ac38b70 115static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
116 Header h;
117 ssize_t k;
118 int r;
119
120 assert(f);
121
122 zero(h);
7560fffc 123 memcpy(h.signature, HEADER_SIGNATURE, 8);
23b0b2b2 124 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2 125
7560fffc
LP
126 h.incompatible_flags =
127 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
128
129 h.compatible_flags =
baed47c3 130 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
7560fffc 131
cec736d2
LP
132 r = sd_id128_randomize(&h.file_id);
133 if (r < 0)
134 return r;
135
0ac38b70
LP
136 if (template) {
137 h.seqnum_id = template->header->seqnum_id;
beec0085 138 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
0ac38b70
LP
139 } else
140 h.seqnum_id = h.file_id;
cec736d2
LP
141
142 k = pwrite(f->fd, &h, sizeof(h), 0);
143 if (k < 0)
144 return -errno;
145
146 if (k != sizeof(h))
147 return -EIO;
148
149 return 0;
150}
151
152static int journal_file_refresh_header(JournalFile *f) {
153 int r;
de190aef 154 sd_id128_t boot_id;
cec736d2
LP
155
156 assert(f);
157
158 r = sd_id128_get_machine(&f->header->machine_id);
159 if (r < 0)
160 return r;
161
de190aef 162 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
163 if (r < 0)
164 return r;
165
de190aef
LP
166 if (sd_id128_equal(boot_id, f->header->boot_id))
167 f->tail_entry_monotonic_valid = true;
168
169 f->header->boot_id = boot_id;
170
171 f->header->state = STATE_ONLINE;
b788cc23 172
7560fffc
LP
173 /* Sync the online state to disk */
174 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
175 fdatasync(f->fd);
b788cc23 176
cec736d2
LP
177 return 0;
178}
179
180static int journal_file_verify_header(JournalFile *f) {
181 assert(f);
182
7560fffc 183 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
cec736d2
LP
184 return -EBADMSG;
185
7560fffc
LP
186 /* In both read and write mode we refuse to open files with
187 * incompatible flags we don't know */
807e17f0 188#ifdef HAVE_XZ
7560fffc 189 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
807e17f0
LP
190 return -EPROTONOSUPPORT;
191#else
cec736d2
LP
192 if (f->header->incompatible_flags != 0)
193 return -EPROTONOSUPPORT;
807e17f0 194#endif
cec736d2 195
7560fffc
LP
196 /* When open for writing we refuse to open files with
197 * compatible flags, too */
198 if (f->writable) {
199#ifdef HAVE_GCRYPT
baed47c3 200 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
7560fffc
LP
201 return -EPROTONOSUPPORT;
202#else
203 if (f->header->compatible_flags != 0)
204 return -EPROTONOSUPPORT;
205#endif
206 }
207
db11ac1a
LP
208 if (f->header->state >= _STATE_MAX)
209 return -EBADMSG;
210
dca6219e
LP
211 /* The first addition was n_data, so check that we are at least this large */
212 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
23b0b2b2
LP
213 return -EBADMSG;
214
8088cbd3 215 if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
beec0085
LP
216 return -EBADMSG;
217
db11ac1a
LP
218 if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
219 return -ENODATA;
220
221 if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
222 return -ENODATA;
223
7762e02b
LP
224 if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
225 !VALID64(le64toh(f->header->field_hash_table_offset)) ||
226 !VALID64(le64toh(f->header->tail_object_offset)) ||
227 !VALID64(le64toh(f->header->entry_array_offset)))
228 return -ENODATA;
229
230 if (le64toh(f->header->data_hash_table_offset) < le64toh(f->header->header_size) ||
231 le64toh(f->header->field_hash_table_offset) < le64toh(f->header->header_size) ||
232 le64toh(f->header->tail_object_offset) < le64toh(f->header->header_size) ||
233 le64toh(f->header->entry_array_offset) < le64toh(f->header->header_size))
cec736d2
LP
234 return -ENODATA;
235
236 if (f->writable) {
ccdbaf91 237 uint8_t state;
cec736d2
LP
238 sd_id128_t machine_id;
239 int r;
240
241 r = sd_id128_get_machine(&machine_id);
242 if (r < 0)
243 return r;
244
245 if (!sd_id128_equal(machine_id, f->header->machine_id))
246 return -EHOSTDOWN;
247
de190aef 248 state = f->header->state;
cec736d2 249
71fa6f00
LP
250 if (state == STATE_ONLINE) {
251 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
252 return -EBUSY;
253 } else if (state == STATE_ARCHIVED)
cec736d2 254 return -ESHUTDOWN;
71fa6f00
LP
255 else if (state != STATE_OFFLINE) {
256 log_debug("Journal file %s has unknown state %u.", f->path, state);
257 return -EBUSY;
258 }
cec736d2
LP
259 }
260
8088cbd3 261 f->compress = JOURNAL_HEADER_COMPRESSED(f->header);
c586dbf1
LP
262
263 if (f->writable)
8088cbd3 264 f->seal = JOURNAL_HEADER_SEALED(f->header);
7560fffc 265
cec736d2
LP
266 return 0;
267}
268
269static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
cec736d2 270 uint64_t old_size, new_size;
fec2aa2f 271 int r;
cec736d2
LP
272
273 assert(f);
274
cec736d2 275 /* We assume that this file is not sparse, and we know that
38ac38b2 276 * for sure, since we always call posix_fallocate()
cec736d2
LP
277 * ourselves */
278
279 old_size =
23b0b2b2 280 le64toh(f->header->header_size) +
cec736d2
LP
281 le64toh(f->header->arena_size);
282
bc85bfee 283 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
284 if (new_size < le64toh(f->header->header_size))
285 new_size = le64toh(f->header->header_size);
bc85bfee
LP
286
287 if (new_size <= old_size)
cec736d2
LP
288 return 0;
289
bc85bfee
LP
290 if (f->metrics.max_size > 0 &&
291 new_size > f->metrics.max_size)
292 return -E2BIG;
cec736d2 293
bc85bfee
LP
294 if (new_size > f->metrics.min_size &&
295 f->metrics.keep_free > 0) {
cec736d2
LP
296 struct statvfs svfs;
297
298 if (fstatvfs(f->fd, &svfs) >= 0) {
299 uint64_t available;
300
301 available = svfs.f_bfree * svfs.f_bsize;
302
bc85bfee
LP
303 if (available >= f->metrics.keep_free)
304 available -= f->metrics.keep_free;
cec736d2
LP
305 else
306 available = 0;
307
308 if (new_size - old_size > available)
309 return -E2BIG;
310 }
311 }
312
bc85bfee
LP
313 /* Note that the glibc fallocate() fallback is very
314 inefficient, hence we try to minimize the allocation area
315 as we can. */
fec2aa2f
GV
316 r = posix_fallocate(f->fd, old_size, new_size - old_size);
317 if (r != 0)
318 return -r;
cec736d2
LP
319
320 if (fstat(f->fd, &f->last_stat) < 0)
321 return -errno;
322
23b0b2b2 323 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
324
325 return 0;
326}
327
fcde2389 328static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
cec736d2 329 assert(f);
cec736d2
LP
330 assert(ret);
331
7762e02b
LP
332 if (size <= 0)
333 return -EINVAL;
334
2a59ea54 335 /* Avoid SIGBUS on invalid accesses */
4bbdcdb3
LP
336 if (offset + size > (uint64_t) f->last_stat.st_size) {
337 /* Hmm, out of range? Let's refresh the fstat() data
338 * first, before we trust that check. */
339
340 if (fstat(f->fd, &f->last_stat) < 0 ||
341 offset + size > (uint64_t) f->last_stat.st_size)
342 return -EADDRNOTAVAIL;
343 }
344
fcde2389 345 return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
cec736d2
LP
346}
347
16e9f408
LP
348static uint64_t minimum_header_size(Object *o) {
349
350 static uint64_t table[] = {
351 [OBJECT_DATA] = sizeof(DataObject),
352 [OBJECT_FIELD] = sizeof(FieldObject),
353 [OBJECT_ENTRY] = sizeof(EntryObject),
354 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
355 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
356 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
357 [OBJECT_TAG] = sizeof(TagObject),
358 };
359
360 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
361 return sizeof(ObjectHeader);
362
363 return table[o->object.type];
364}
365
de190aef 366int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
367 int r;
368 void *t;
369 Object *o;
370 uint64_t s;
16e9f408 371 unsigned context;
cec736d2
LP
372
373 assert(f);
374 assert(ret);
375
db11ac1a
LP
376 /* Objects may only be located at multiple of 64 bit */
377 if (!VALID64(offset))
378 return -EFAULT;
379
16e9f408
LP
380 /* One context for each type, plus one catch-all for the rest */
381 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
382
fcde2389 383 r = journal_file_move_to(f, context, false, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
384 if (r < 0)
385 return r;
386
387 o = (Object*) t;
388 s = le64toh(o->object.size);
389
390 if (s < sizeof(ObjectHeader))
391 return -EBADMSG;
392
16e9f408
LP
393 if (o->object.type <= OBJECT_UNUSED)
394 return -EBADMSG;
395
396 if (s < minimum_header_size(o))
397 return -EBADMSG;
398
de190aef 399 if (type >= 0 && o->object.type != type)
cec736d2
LP
400 return -EBADMSG;
401
402 if (s > sizeof(ObjectHeader)) {
fcde2389 403 r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
cec736d2
LP
404 if (r < 0)
405 return r;
406
407 o = (Object*) t;
408 }
409
cec736d2
LP
410 *ret = o;
411 return 0;
412}
413
d98cc1f2 414static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
415 uint64_t r;
416
417 assert(f);
418
beec0085 419 r = le64toh(f->header->tail_entry_seqnum) + 1;
c2373f84
LP
420
421 if (seqnum) {
de190aef 422 /* If an external seqnum counter was passed, we update
c2373f84
LP
423 * both the local and the external one, and set it to
424 * the maximum of both */
425
426 if (*seqnum + 1 > r)
427 r = *seqnum + 1;
428
429 *seqnum = r;
430 }
431
beec0085 432 f->header->tail_entry_seqnum = htole64(r);
cec736d2 433
beec0085
LP
434 if (f->header->head_entry_seqnum == 0)
435 f->header->head_entry_seqnum = htole64(r);
de190aef 436
cec736d2
LP
437 return r;
438}
439
0284adc6 440int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
441 int r;
442 uint64_t p;
443 Object *tail, *o;
444 void *t;
445
446 assert(f);
16e9f408 447 assert(type > 0 && type < _OBJECT_TYPE_MAX);
cec736d2
LP
448 assert(size >= sizeof(ObjectHeader));
449 assert(offset);
450 assert(ret);
451
452 p = le64toh(f->header->tail_object_offset);
cec736d2 453 if (p == 0)
23b0b2b2 454 p = le64toh(f->header->header_size);
cec736d2 455 else {
de190aef 456 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
457 if (r < 0)
458 return r;
459
460 p += ALIGN64(le64toh(tail->object.size));
461 }
462
463 r = journal_file_allocate(f, p, size);
464 if (r < 0)
465 return r;
466
fcde2389 467 r = journal_file_move_to(f, type, false, p, size, &t);
cec736d2
LP
468 if (r < 0)
469 return r;
470
471 o = (Object*) t;
472
473 zero(o->object);
de190aef 474 o->object.type = type;
cec736d2
LP
475 o->object.size = htole64(size);
476
477 f->header->tail_object_offset = htole64(p);
cec736d2
LP
478 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
479
480 *ret = o;
481 *offset = p;
482
483 return 0;
484}
485
de190aef 486static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
487 uint64_t s, p;
488 Object *o;
489 int r;
490
491 assert(f);
492
dfabe643 493 /* We estimate that we need 1 hash table entry per 768 of
4a92baf3
LP
494 journal file and we want to make sure we never get beyond
495 75% fill level. Calculate the hash table size for the
496 maximum file size based on these metrics. */
497
dfabe643 498 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
4a92baf3
LP
499 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
500 s = DEFAULT_DATA_HASH_TABLE_SIZE;
501
2b43f939 502 log_debug("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
4a92baf3 503
de190aef
LP
504 r = journal_file_append_object(f,
505 OBJECT_DATA_HASH_TABLE,
506 offsetof(Object, hash_table.items) + s,
507 &o, &p);
cec736d2
LP
508 if (r < 0)
509 return r;
510
de190aef 511 memset(o->hash_table.items, 0, s);
cec736d2 512
de190aef
LP
513 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
514 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
515
516 return 0;
517}
518
de190aef 519static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
520 uint64_t s, p;
521 Object *o;
522 int r;
523
524 assert(f);
525
de190aef
LP
526 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
527 r = journal_file_append_object(f,
528 OBJECT_FIELD_HASH_TABLE,
529 offsetof(Object, hash_table.items) + s,
530 &o, &p);
cec736d2
LP
531 if (r < 0)
532 return r;
533
de190aef 534 memset(o->hash_table.items, 0, s);
cec736d2 535
de190aef
LP
536 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
537 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
538
539 return 0;
540}
541
de190aef 542static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
543 uint64_t s, p;
544 void *t;
545 int r;
546
547 assert(f);
548
de190aef
LP
549 p = le64toh(f->header->data_hash_table_offset);
550 s = le64toh(f->header->data_hash_table_size);
cec736d2 551
de190aef 552 r = journal_file_move_to(f,
16e9f408 553 OBJECT_DATA_HASH_TABLE,
fcde2389 554 true,
de190aef
LP
555 p, s,
556 &t);
cec736d2
LP
557 if (r < 0)
558 return r;
559
de190aef 560 f->data_hash_table = t;
cec736d2
LP
561 return 0;
562}
563
de190aef 564static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
565 uint64_t s, p;
566 void *t;
567 int r;
568
569 assert(f);
570
de190aef
LP
571 p = le64toh(f->header->field_hash_table_offset);
572 s = le64toh(f->header->field_hash_table_size);
cec736d2 573
de190aef 574 r = journal_file_move_to(f,
16e9f408 575 OBJECT_FIELD_HASH_TABLE,
fcde2389 576 true,
de190aef
LP
577 p, s,
578 &t);
cec736d2
LP
579 if (r < 0)
580 return r;
581
de190aef 582 f->field_hash_table = t;
cec736d2
LP
583 return 0;
584}
585
de190aef
LP
586static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
587 uint64_t p, h;
cec736d2
LP
588 int r;
589
590 assert(f);
591 assert(o);
592 assert(offset > 0);
de190aef 593 assert(o->object.type == OBJECT_DATA);
cec736d2 594
48496df6
LP
595 /* This might alter the window we are looking at */
596
de190aef
LP
597 o->data.next_hash_offset = o->data.next_field_offset = 0;
598 o->data.entry_offset = o->data.entry_array_offset = 0;
599 o->data.n_entries = 0;
cec736d2 600
de190aef 601 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
8db4213e 602 p = le64toh(f->data_hash_table[h].tail_hash_offset);
cec736d2
LP
603 if (p == 0) {
604 /* Only entry in the hash table is easy */
de190aef 605 f->data_hash_table[h].head_hash_offset = htole64(offset);
cec736d2 606 } else {
48496df6
LP
607 /* Move back to the previous data object, to patch in
608 * pointer */
cec736d2 609
de190aef 610 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
611 if (r < 0)
612 return r;
613
de190aef 614 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
615 }
616
de190aef 617 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2 618
dca6219e
LP
619 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
620 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
621
cec736d2
LP
622 return 0;
623}
624
de190aef
LP
625int journal_file_find_data_object_with_hash(
626 JournalFile *f,
627 const void *data, uint64_t size, uint64_t hash,
628 Object **ret, uint64_t *offset) {
48496df6 629
de190aef 630 uint64_t p, osize, h;
cec736d2
LP
631 int r;
632
633 assert(f);
634 assert(data || size == 0);
635
636 osize = offsetof(Object, data.payload) + size;
637
bc85bfee
LP
638 if (f->header->data_hash_table_size == 0)
639 return -EBADMSG;
640
de190aef
LP
641 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
642 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 643
de190aef
LP
644 while (p > 0) {
645 Object *o;
cec736d2 646
de190aef 647 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
648 if (r < 0)
649 return r;
650
807e17f0 651 if (le64toh(o->data.hash) != hash)
85a131e8 652 goto next;
807e17f0
LP
653
654 if (o->object.flags & OBJECT_COMPRESSED) {
655#ifdef HAVE_XZ
b785c858 656 uint64_t l, rsize;
cec736d2 657
807e17f0
LP
658 l = le64toh(o->object.size);
659 if (l <= offsetof(Object, data.payload))
cec736d2
LP
660 return -EBADMSG;
661
807e17f0
LP
662 l -= offsetof(Object, data.payload);
663
664 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
665 return -EBADMSG;
666
b785c858 667 if (rsize == size &&
807e17f0
LP
668 memcmp(f->compress_buffer, data, size) == 0) {
669
670 if (ret)
671 *ret = o;
672
673 if (offset)
674 *offset = p;
675
676 return 1;
677 }
678#else
679 return -EPROTONOSUPPORT;
680#endif
681
682 } else if (le64toh(o->object.size) == osize &&
683 memcmp(o->data.payload, data, size) == 0) {
684
cec736d2
LP
685 if (ret)
686 *ret = o;
687
688 if (offset)
689 *offset = p;
690
de190aef 691 return 1;
cec736d2
LP
692 }
693
85a131e8 694 next:
cec736d2
LP
695 p = le64toh(o->data.next_hash_offset);
696 }
697
de190aef
LP
698 return 0;
699}
700
701int journal_file_find_data_object(
702 JournalFile *f,
703 const void *data, uint64_t size,
704 Object **ret, uint64_t *offset) {
705
706 uint64_t hash;
707
708 assert(f);
709 assert(data || size == 0);
710
711 hash = hash64(data, size);
712
713 return journal_file_find_data_object_with_hash(f,
714 data, size, hash,
715 ret, offset);
716}
717
48496df6
LP
718static int journal_file_append_data(
719 JournalFile *f,
720 const void *data, uint64_t size,
721 Object **ret, uint64_t *offset) {
722
de190aef
LP
723 uint64_t hash, p;
724 uint64_t osize;
725 Object *o;
726 int r;
807e17f0 727 bool compressed = false;
de190aef
LP
728
729 assert(f);
730 assert(data || size == 0);
731
732 hash = hash64(data, size);
733
734 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
735 if (r < 0)
736 return r;
737 else if (r > 0) {
738
739 if (ret)
740 *ret = o;
741
742 if (offset)
743 *offset = p;
744
745 return 0;
746 }
747
748 osize = offsetof(Object, data.payload) + size;
749 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
750 if (r < 0)
751 return r;
752
cec736d2 753 o->data.hash = htole64(hash);
807e17f0
LP
754
755#ifdef HAVE_XZ
756 if (f->compress &&
757 size >= COMPRESSION_SIZE_THRESHOLD) {
758 uint64_t rsize;
759
760 compressed = compress_blob(data, size, o->data.payload, &rsize);
761
762 if (compressed) {
763 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
764 o->object.flags |= OBJECT_COMPRESSED;
765
807e17f0
LP
766 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
767 }
768 }
769#endif
770
64825d3c 771 if (!compressed && size > 0)
807e17f0 772 memcpy(o->data.payload, data, size);
cec736d2 773
de190aef 774 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
775 if (r < 0)
776 return r;
777
feb12d3e 778#ifdef HAVE_GCRYPT
b0af6f41
LP
779 r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
780 if (r < 0)
781 return r;
feb12d3e 782#endif
b0af6f41 783
48496df6
LP
784 /* The linking might have altered the window, so let's
785 * refresh our pointer */
786 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
787 if (r < 0)
788 return r;
789
cec736d2
LP
790 if (ret)
791 *ret = o;
792
793 if (offset)
de190aef 794 *offset = p;
cec736d2
LP
795
796 return 0;
797}
798
799uint64_t journal_file_entry_n_items(Object *o) {
800 assert(o);
7be3aa17 801 assert(o->object.type == OBJECT_ENTRY);
cec736d2
LP
802
803 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
804}
805
0284adc6 806uint64_t journal_file_entry_array_n_items(Object *o) {
de190aef 807 assert(o);
7be3aa17 808 assert(o->object.type == OBJECT_ENTRY_ARRAY);
de190aef
LP
809
810 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
811}
812
fb9a24b6
LP
813uint64_t journal_file_hash_table_n_items(Object *o) {
814 assert(o);
815 assert(o->object.type == OBJECT_DATA_HASH_TABLE ||
816 o->object.type == OBJECT_FIELD_HASH_TABLE);
817
818 return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
819}
820
de190aef 821static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
822 le64_t *first,
823 le64_t *idx,
de190aef 824 uint64_t p) {
cec736d2 825 int r;
de190aef
LP
826 uint64_t n = 0, ap = 0, q, i, a, hidx;
827 Object *o;
828
cec736d2 829 assert(f);
de190aef
LP
830 assert(first);
831 assert(idx);
832 assert(p > 0);
cec736d2 833
de190aef
LP
834 a = le64toh(*first);
835 i = hidx = le64toh(*idx);
836 while (a > 0) {
837
838 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
839 if (r < 0)
840 return r;
cec736d2 841
de190aef
LP
842 n = journal_file_entry_array_n_items(o);
843 if (i < n) {
844 o->entry_array.items[i] = htole64(p);
845 *idx = htole64(hidx + 1);
846 return 0;
847 }
cec736d2 848
de190aef
LP
849 i -= n;
850 ap = a;
851 a = le64toh(o->entry_array.next_entry_array_offset);
852 }
853
854 if (hidx > n)
855 n = (hidx+1) * 2;
856 else
857 n = n * 2;
858
859 if (n < 4)
860 n = 4;
861
862 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
863 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
864 &o, &q);
cec736d2
LP
865 if (r < 0)
866 return r;
867
feb12d3e 868#ifdef HAVE_GCRYPT
b0af6f41
LP
869 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
870 if (r < 0)
871 return r;
feb12d3e 872#endif
b0af6f41 873
de190aef 874 o->entry_array.items[i] = htole64(p);
cec736d2 875
de190aef 876 if (ap == 0)
7be3aa17 877 *first = htole64(q);
cec736d2 878 else {
de190aef 879 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
880 if (r < 0)
881 return r;
882
de190aef
LP
883 o->entry_array.next_entry_array_offset = htole64(q);
884 }
cec736d2 885
2dee23eb
LP
886 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
887 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
888
de190aef
LP
889 *idx = htole64(hidx + 1);
890
891 return 0;
892}
cec736d2 893
de190aef 894static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
895 le64_t *extra,
896 le64_t *first,
897 le64_t *idx,
de190aef
LP
898 uint64_t p) {
899
900 int r;
901
902 assert(f);
903 assert(extra);
904 assert(first);
905 assert(idx);
906 assert(p > 0);
907
908 if (*idx == 0)
909 *extra = htole64(p);
910 else {
4fd052ae 911 le64_t i;
de190aef 912
7be3aa17 913 i = htole64(le64toh(*idx) - 1);
de190aef
LP
914 r = link_entry_into_array(f, first, &i, p);
915 if (r < 0)
916 return r;
cec736d2
LP
917 }
918
de190aef
LP
919 *idx = htole64(le64toh(*idx) + 1);
920 return 0;
921}
922
923static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
924 uint64_t p;
925 int r;
926 assert(f);
927 assert(o);
928 assert(offset > 0);
929
930 p = le64toh(o->entry.items[i].object_offset);
931 if (p == 0)
932 return -EINVAL;
933
934 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
935 if (r < 0)
936 return r;
937
de190aef
LP
938 return link_entry_into_array_plus_one(f,
939 &o->data.entry_offset,
940 &o->data.entry_array_offset,
941 &o->data.n_entries,
942 offset);
cec736d2
LP
943}
944
945static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 946 uint64_t n, i;
cec736d2
LP
947 int r;
948
949 assert(f);
950 assert(o);
951 assert(offset > 0);
de190aef 952 assert(o->object.type == OBJECT_ENTRY);
cec736d2 953
b788cc23
LP
954 __sync_synchronize();
955
cec736d2 956 /* Link up the entry itself */
de190aef
LP
957 r = link_entry_into_array(f,
958 &f->header->entry_array_offset,
959 &f->header->n_entries,
960 offset);
961 if (r < 0)
962 return r;
cec736d2 963
aaf53376 964 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
cec736d2 965
de190aef 966 if (f->header->head_entry_realtime == 0)
0ac38b70 967 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 968
0ac38b70 969 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
970 f->header->tail_entry_monotonic = o->entry.monotonic;
971
972 f->tail_entry_monotonic_valid = true;
cec736d2
LP
973
974 /* Link up the items */
975 n = journal_file_entry_n_items(o);
976 for (i = 0; i < n; i++) {
977 r = journal_file_link_entry_item(f, o, offset, i);
978 if (r < 0)
979 return r;
980 }
981
cec736d2
LP
982 return 0;
983}
984
985static int journal_file_append_entry_internal(
986 JournalFile *f,
987 const dual_timestamp *ts,
988 uint64_t xor_hash,
989 const EntryItem items[], unsigned n_items,
de190aef 990 uint64_t *seqnum,
cec736d2
LP
991 Object **ret, uint64_t *offset) {
992 uint64_t np;
993 uint64_t osize;
994 Object *o;
995 int r;
996
997 assert(f);
998 assert(items || n_items == 0);
de190aef 999 assert(ts);
cec736d2
LP
1000
1001 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1002
de190aef 1003 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
1004 if (r < 0)
1005 return r;
1006
d98cc1f2 1007 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
cec736d2 1008 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
1009 o->entry.realtime = htole64(ts->realtime);
1010 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
1011 o->entry.xor_hash = htole64(xor_hash);
1012 o->entry.boot_id = f->header->boot_id;
1013
feb12d3e 1014#ifdef HAVE_GCRYPT
b0af6f41
LP
1015 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
1016 if (r < 0)
1017 return r;
feb12d3e 1018#endif
b0af6f41 1019
cec736d2
LP
1020 r = journal_file_link_entry(f, o, np);
1021 if (r < 0)
1022 return r;
1023
1024 if (ret)
1025 *ret = o;
1026
1027 if (offset)
1028 *offset = np;
1029
1030 return 0;
1031}
1032
cf244689 1033void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
1034 assert(f);
1035
1036 /* inotify() does not receive IN_MODIFY events from file
1037 * accesses done via mmap(). After each access we hence
1038 * trigger IN_MODIFY by truncating the journal file to its
1039 * current size which triggers IN_MODIFY. */
1040
bc85bfee
LP
1041 __sync_synchronize();
1042
50f20cfd
LP
1043 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1044 log_error("Failed to to truncate file to its own size: %m");
1045}
1046
de190aef 1047int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1048 unsigned i;
1049 EntryItem *items;
1050 int r;
1051 uint64_t xor_hash = 0;
de190aef 1052 struct dual_timestamp _ts;
cec736d2
LP
1053
1054 assert(f);
1055 assert(iovec || n_iovec == 0);
1056
de190aef
LP
1057 if (!f->writable)
1058 return -EPERM;
1059
1060 if (!ts) {
1061 dual_timestamp_get(&_ts);
1062 ts = &_ts;
1063 }
1064
1065 if (f->tail_entry_monotonic_valid &&
1066 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1067 return -EINVAL;
1068
feb12d3e 1069#ifdef HAVE_GCRYPT
7560fffc
LP
1070 r = journal_file_maybe_append_tag(f, ts->realtime);
1071 if (r < 0)
1072 return r;
feb12d3e 1073#endif
7560fffc 1074
64825d3c
LP
1075 /* alloca() can't take 0, hence let's allocate at least one */
1076 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
cec736d2
LP
1077
1078 for (i = 0; i < n_iovec; i++) {
1079 uint64_t p;
1080 Object *o;
1081
1082 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1083 if (r < 0)
cf244689 1084 return r;
cec736d2
LP
1085
1086 xor_hash ^= le64toh(o->data.hash);
1087 items[i].object_offset = htole64(p);
de7b95cd 1088 items[i].hash = o->data.hash;
cec736d2
LP
1089 }
1090
de190aef 1091 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1092
50f20cfd
LP
1093 journal_file_post_change(f);
1094
cec736d2
LP
1095 return r;
1096}
1097
de190aef
LP
1098static int generic_array_get(JournalFile *f,
1099 uint64_t first,
1100 uint64_t i,
1101 Object **ret, uint64_t *offset) {
1102
cec736d2 1103 Object *o;
6c8a39b8 1104 uint64_t p = 0, a;
cec736d2
LP
1105 int r;
1106
1107 assert(f);
1108
de190aef
LP
1109 a = first;
1110 while (a > 0) {
1111 uint64_t n;
cec736d2 1112
de190aef
LP
1113 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1114 if (r < 0)
1115 return r;
cec736d2 1116
de190aef
LP
1117 n = journal_file_entry_array_n_items(o);
1118 if (i < n) {
1119 p = le64toh(o->entry_array.items[i]);
1120 break;
cec736d2
LP
1121 }
1122
de190aef
LP
1123 i -= n;
1124 a = le64toh(o->entry_array.next_entry_array_offset);
1125 }
1126
1127 if (a <= 0 || p <= 0)
1128 return 0;
1129
1130 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1131 if (r < 0)
1132 return r;
1133
1134 if (ret)
1135 *ret = o;
1136
1137 if (offset)
1138 *offset = p;
1139
1140 return 1;
1141}
1142
1143static int generic_array_get_plus_one(JournalFile *f,
1144 uint64_t extra,
1145 uint64_t first,
1146 uint64_t i,
1147 Object **ret, uint64_t *offset) {
1148
1149 Object *o;
1150
1151 assert(f);
1152
1153 if (i == 0) {
1154 int r;
1155
1156 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1157 if (r < 0)
1158 return r;
1159
de190aef
LP
1160 if (ret)
1161 *ret = o;
cec736d2 1162
de190aef
LP
1163 if (offset)
1164 *offset = extra;
cec736d2 1165
de190aef 1166 return 1;
cec736d2
LP
1167 }
1168
de190aef
LP
1169 return generic_array_get(f, first, i-1, ret, offset);
1170}
cec736d2 1171
de190aef
LP
1172enum {
1173 TEST_FOUND,
1174 TEST_LEFT,
1175 TEST_RIGHT
1176};
cec736d2 1177
de190aef
LP
1178static int generic_array_bisect(JournalFile *f,
1179 uint64_t first,
1180 uint64_t n,
1181 uint64_t needle,
1182 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1183 direction_t direction,
1184 Object **ret,
1185 uint64_t *offset,
1186 uint64_t *idx) {
1187
1188 uint64_t a, p, t = 0, i = 0, last_p = 0;
1189 bool subtract_one = false;
1190 Object *o, *array = NULL;
1191 int r;
cec736d2 1192
de190aef
LP
1193 assert(f);
1194 assert(test_object);
cec736d2 1195
de190aef
LP
1196 a = first;
1197 while (a > 0) {
1198 uint64_t left, right, k, lp;
1199
1200 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1201 if (r < 0)
1202 return r;
1203
de190aef
LP
1204 k = journal_file_entry_array_n_items(array);
1205 right = MIN(k, n);
1206 if (right <= 0)
1207 return 0;
cec736d2 1208
de190aef
LP
1209 i = right - 1;
1210 lp = p = le64toh(array->entry_array.items[i]);
1211 if (p <= 0)
1212 return -EBADMSG;
cec736d2 1213
de190aef
LP
1214 r = test_object(f, p, needle);
1215 if (r < 0)
1216 return r;
cec736d2 1217
de190aef
LP
1218 if (r == TEST_FOUND)
1219 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1220
1221 if (r == TEST_RIGHT) {
1222 left = 0;
1223 right -= 1;
1224 for (;;) {
1225 if (left == right) {
1226 if (direction == DIRECTION_UP)
1227 subtract_one = true;
1228
1229 i = left;
1230 goto found;
1231 }
1232
1233 assert(left < right);
1234
1235 i = (left + right) / 2;
1236 p = le64toh(array->entry_array.items[i]);
1237 if (p <= 0)
1238 return -EBADMSG;
1239
1240 r = test_object(f, p, needle);
1241 if (r < 0)
1242 return r;
cec736d2 1243
de190aef
LP
1244 if (r == TEST_FOUND)
1245 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1246
1247 if (r == TEST_RIGHT)
1248 right = i;
1249 else
1250 left = i + 1;
1251 }
1252 }
1253
cbdca852
LP
1254 if (k > n) {
1255 if (direction == DIRECTION_UP) {
1256 i = n;
1257 subtract_one = true;
1258 goto found;
1259 }
1260
cec736d2 1261 return 0;
cbdca852 1262 }
cec736d2 1263
de190aef
LP
1264 last_p = lp;
1265
1266 n -= k;
1267 t += k;
1268 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1269 }
1270
1271 return 0;
de190aef
LP
1272
1273found:
1274 if (subtract_one && t == 0 && i == 0)
1275 return 0;
1276
1277 if (subtract_one && i == 0)
1278 p = last_p;
1279 else if (subtract_one)
1280 p = le64toh(array->entry_array.items[i-1]);
1281 else
1282 p = le64toh(array->entry_array.items[i]);
1283
1284 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1285 if (r < 0)
1286 return r;
1287
1288 if (ret)
1289 *ret = o;
1290
1291 if (offset)
1292 *offset = p;
1293
1294 if (idx)
cbdca852 1295 *idx = t + i + (subtract_one ? -1 : 0);
de190aef
LP
1296
1297 return 1;
cec736d2
LP
1298}
1299
de190aef
LP
1300static int generic_array_bisect_plus_one(JournalFile *f,
1301 uint64_t extra,
1302 uint64_t first,
1303 uint64_t n,
1304 uint64_t needle,
1305 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1306 direction_t direction,
1307 Object **ret,
1308 uint64_t *offset,
1309 uint64_t *idx) {
1310
cec736d2 1311 int r;
cbdca852
LP
1312 bool step_back = false;
1313 Object *o;
cec736d2
LP
1314
1315 assert(f);
de190aef 1316 assert(test_object);
cec736d2 1317
de190aef
LP
1318 if (n <= 0)
1319 return 0;
cec736d2 1320
de190aef
LP
1321 /* This bisects the array in object 'first', but first checks
1322 * an extra */
de190aef
LP
1323 r = test_object(f, extra, needle);
1324 if (r < 0)
1325 return r;
a536e261
LP
1326
1327 if (r == TEST_FOUND)
1328 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1329
cbdca852
LP
1330 /* if we are looking with DIRECTION_UP then we need to first
1331 see if in the actual array there is a matching entry, and
1332 return the last one of that. But if there isn't any we need
1333 to return this one. Hence remember this, and return it
1334 below. */
1335 if (r == TEST_LEFT)
1336 step_back = direction == DIRECTION_UP;
de190aef 1337
cbdca852
LP
1338 if (r == TEST_RIGHT) {
1339 if (direction == DIRECTION_DOWN)
1340 goto found;
1341 else
1342 return 0;
a536e261 1343 }
cec736d2 1344
de190aef
LP
1345 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1346
cbdca852
LP
1347 if (r == 0 && step_back)
1348 goto found;
1349
ecf68b1d 1350 if (r > 0 && idx)
de190aef
LP
1351 (*idx) ++;
1352
1353 return r;
cbdca852
LP
1354
1355found:
1356 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1357 if (r < 0)
1358 return r;
1359
1360 if (ret)
1361 *ret = o;
1362
1363 if (offset)
1364 *offset = extra;
1365
1366 if (idx)
1367 *idx = 0;
1368
1369 return 1;
1370}
1371
1372static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1373 assert(f);
1374 assert(p > 0);
1375
1376 if (p == needle)
1377 return TEST_FOUND;
1378 else if (p < needle)
1379 return TEST_LEFT;
1380 else
1381 return TEST_RIGHT;
1382}
1383
1384int journal_file_move_to_entry_by_offset(
1385 JournalFile *f,
1386 uint64_t p,
1387 direction_t direction,
1388 Object **ret,
1389 uint64_t *offset) {
1390
1391 return generic_array_bisect(f,
1392 le64toh(f->header->entry_array_offset),
1393 le64toh(f->header->n_entries),
1394 p,
1395 test_object_offset,
1396 direction,
1397 ret, offset, NULL);
de190aef
LP
1398}
1399
cbdca852 1400
de190aef
LP
1401static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1402 Object *o;
1403 int r;
1404
1405 assert(f);
1406 assert(p > 0);
1407
1408 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1409 if (r < 0)
1410 return r;
1411
de190aef
LP
1412 if (le64toh(o->entry.seqnum) == needle)
1413 return TEST_FOUND;
1414 else if (le64toh(o->entry.seqnum) < needle)
1415 return TEST_LEFT;
1416 else
1417 return TEST_RIGHT;
1418}
cec736d2 1419
de190aef
LP
1420int journal_file_move_to_entry_by_seqnum(
1421 JournalFile *f,
1422 uint64_t seqnum,
1423 direction_t direction,
1424 Object **ret,
1425 uint64_t *offset) {
1426
1427 return generic_array_bisect(f,
1428 le64toh(f->header->entry_array_offset),
1429 le64toh(f->header->n_entries),
1430 seqnum,
1431 test_object_seqnum,
1432 direction,
1433 ret, offset, NULL);
1434}
cec736d2 1435
de190aef
LP
1436static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1437 Object *o;
1438 int r;
1439
1440 assert(f);
1441 assert(p > 0);
1442
1443 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1444 if (r < 0)
1445 return r;
1446
1447 if (le64toh(o->entry.realtime) == needle)
1448 return TEST_FOUND;
1449 else if (le64toh(o->entry.realtime) < needle)
1450 return TEST_LEFT;
1451 else
1452 return TEST_RIGHT;
cec736d2
LP
1453}
1454
de190aef
LP
1455int journal_file_move_to_entry_by_realtime(
1456 JournalFile *f,
1457 uint64_t realtime,
1458 direction_t direction,
1459 Object **ret,
1460 uint64_t *offset) {
1461
1462 return generic_array_bisect(f,
1463 le64toh(f->header->entry_array_offset),
1464 le64toh(f->header->n_entries),
1465 realtime,
1466 test_object_realtime,
1467 direction,
1468 ret, offset, NULL);
1469}
1470
1471static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1472 Object *o;
1473 int r;
1474
1475 assert(f);
1476 assert(p > 0);
1477
1478 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1479 if (r < 0)
1480 return r;
1481
1482 if (le64toh(o->entry.monotonic) == needle)
1483 return TEST_FOUND;
1484 else if (le64toh(o->entry.monotonic) < needle)
1485 return TEST_LEFT;
1486 else
1487 return TEST_RIGHT;
1488}
1489
1490int journal_file_move_to_entry_by_monotonic(
1491 JournalFile *f,
1492 sd_id128_t boot_id,
1493 uint64_t monotonic,
1494 direction_t direction,
1495 Object **ret,
1496 uint64_t *offset) {
1497
10b6f904 1498 char t[9+32+1] = "_BOOT_ID=";
de190aef
LP
1499 Object *o;
1500 int r;
1501
cbdca852 1502 assert(f);
de190aef 1503
cbdca852 1504 sd_id128_to_string(boot_id, t + 9);
de190aef
LP
1505 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1506 if (r < 0)
1507 return r;
cbdca852 1508 if (r == 0)
de190aef
LP
1509 return -ENOENT;
1510
1511 return generic_array_bisect_plus_one(f,
1512 le64toh(o->data.entry_offset),
1513 le64toh(o->data.entry_array_offset),
1514 le64toh(o->data.n_entries),
1515 monotonic,
1516 test_object_monotonic,
1517 direction,
1518 ret, offset, NULL);
1519}
1520
de190aef
LP
1521int journal_file_next_entry(
1522 JournalFile *f,
1523 Object *o, uint64_t p,
1524 direction_t direction,
1525 Object **ret, uint64_t *offset) {
1526
1527 uint64_t i, n;
cec736d2
LP
1528 int r;
1529
1530 assert(f);
de190aef
LP
1531 assert(p > 0 || !o);
1532
1533 n = le64toh(f->header->n_entries);
1534 if (n <= 0)
1535 return 0;
cec736d2
LP
1536
1537 if (!o)
de190aef 1538 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1539 else {
de190aef 1540 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1541 return -EINVAL;
1542
de190aef
LP
1543 r = generic_array_bisect(f,
1544 le64toh(f->header->entry_array_offset),
1545 le64toh(f->header->n_entries),
1546 p,
1547 test_object_offset,
1548 DIRECTION_DOWN,
1549 NULL, NULL,
1550 &i);
1551 if (r <= 0)
1552 return r;
1553
1554 if (direction == DIRECTION_DOWN) {
1555 if (i >= n - 1)
1556 return 0;
1557
1558 i++;
1559 } else {
1560 if (i <= 0)
1561 return 0;
1562
1563 i--;
1564 }
cec736d2
LP
1565 }
1566
de190aef
LP
1567 /* And jump to it */
1568 return generic_array_get(f,
1569 le64toh(f->header->entry_array_offset),
1570 i,
1571 ret, offset);
1572}
cec736d2 1573
de190aef
LP
1574int journal_file_skip_entry(
1575 JournalFile *f,
1576 Object *o, uint64_t p,
1577 int64_t skip,
1578 Object **ret, uint64_t *offset) {
1579
1580 uint64_t i, n;
1581 int r;
1582
1583 assert(f);
1584 assert(o);
1585 assert(p > 0);
1586
1587 if (o->object.type != OBJECT_ENTRY)
1588 return -EINVAL;
1589
1590 r = generic_array_bisect(f,
1591 le64toh(f->header->entry_array_offset),
1592 le64toh(f->header->n_entries),
1593 p,
1594 test_object_offset,
1595 DIRECTION_DOWN,
1596 NULL, NULL,
1597 &i);
1598 if (r <= 0)
cec736d2
LP
1599 return r;
1600
de190aef
LP
1601 /* Calculate new index */
1602 if (skip < 0) {
1603 if ((uint64_t) -skip >= i)
1604 i = 0;
1605 else
1606 i = i - (uint64_t) -skip;
1607 } else
1608 i += (uint64_t) skip;
cec736d2 1609
de190aef
LP
1610 n = le64toh(f->header->n_entries);
1611 if (n <= 0)
1612 return -EBADMSG;
cec736d2 1613
de190aef
LP
1614 if (i >= n)
1615 i = n-1;
1616
1617 return generic_array_get(f,
1618 le64toh(f->header->entry_array_offset),
1619 i,
1620 ret, offset);
cec736d2
LP
1621}
1622
de190aef
LP
1623int journal_file_next_entry_for_data(
1624 JournalFile *f,
1625 Object *o, uint64_t p,
1626 uint64_t data_offset,
1627 direction_t direction,
1628 Object **ret, uint64_t *offset) {
1629
1630 uint64_t n, i;
cec736d2 1631 int r;
de190aef 1632 Object *d;
cec736d2
LP
1633
1634 assert(f);
de190aef 1635 assert(p > 0 || !o);
cec736d2 1636
de190aef 1637 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 1638 if (r < 0)
de190aef 1639 return r;
cec736d2 1640
de190aef
LP
1641 n = le64toh(d->data.n_entries);
1642 if (n <= 0)
1643 return n;
cec736d2 1644
de190aef
LP
1645 if (!o)
1646 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1647 else {
1648 if (o->object.type != OBJECT_ENTRY)
1649 return -EINVAL;
cec736d2 1650
de190aef
LP
1651 r = generic_array_bisect_plus_one(f,
1652 le64toh(d->data.entry_offset),
1653 le64toh(d->data.entry_array_offset),
1654 le64toh(d->data.n_entries),
1655 p,
1656 test_object_offset,
1657 DIRECTION_DOWN,
1658 NULL, NULL,
1659 &i);
1660
1661 if (r <= 0)
cec736d2
LP
1662 return r;
1663
de190aef
LP
1664 if (direction == DIRECTION_DOWN) {
1665 if (i >= n - 1)
1666 return 0;
cec736d2 1667
de190aef
LP
1668 i++;
1669 } else {
1670 if (i <= 0)
1671 return 0;
cec736d2 1672
de190aef
LP
1673 i--;
1674 }
cec736d2 1675
de190aef 1676 }
cec736d2 1677
de190aef
LP
1678 return generic_array_get_plus_one(f,
1679 le64toh(d->data.entry_offset),
1680 le64toh(d->data.entry_array_offset),
1681 i,
1682 ret, offset);
1683}
cec736d2 1684
cbdca852
LP
1685int journal_file_move_to_entry_by_offset_for_data(
1686 JournalFile *f,
1687 uint64_t data_offset,
1688 uint64_t p,
1689 direction_t direction,
1690 Object **ret, uint64_t *offset) {
1691
1692 int r;
1693 Object *d;
1694
1695 assert(f);
1696
1697 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1698 if (r < 0)
1699 return r;
1700
1701 return generic_array_bisect_plus_one(f,
1702 le64toh(d->data.entry_offset),
1703 le64toh(d->data.entry_array_offset),
1704 le64toh(d->data.n_entries),
1705 p,
1706 test_object_offset,
1707 direction,
1708 ret, offset, NULL);
1709}
1710
1711int journal_file_move_to_entry_by_monotonic_for_data(
1712 JournalFile *f,
1713 uint64_t data_offset,
1714 sd_id128_t boot_id,
1715 uint64_t monotonic,
1716 direction_t direction,
1717 Object **ret, uint64_t *offset) {
1718
1719 char t[9+32+1] = "_BOOT_ID=";
1720 Object *o, *d;
1721 int r;
1722 uint64_t b, z;
1723
1724 assert(f);
1725
1726 /* First, seek by time */
1727 sd_id128_to_string(boot_id, t + 9);
1728 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1729 if (r < 0)
1730 return r;
1731 if (r == 0)
1732 return -ENOENT;
1733
1734 r = generic_array_bisect_plus_one(f,
1735 le64toh(o->data.entry_offset),
1736 le64toh(o->data.entry_array_offset),
1737 le64toh(o->data.n_entries),
1738 monotonic,
1739 test_object_monotonic,
1740 direction,
1741 NULL, &z, NULL);
1742 if (r <= 0)
1743 return r;
1744
1745 /* And now, continue seeking until we find an entry that
1746 * exists in both bisection arrays */
1747
1748 for (;;) {
1749 Object *qo;
1750 uint64_t p, q;
1751
1752 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1753 if (r < 0)
1754 return r;
1755
1756 r = generic_array_bisect_plus_one(f,
1757 le64toh(d->data.entry_offset),
1758 le64toh(d->data.entry_array_offset),
1759 le64toh(d->data.n_entries),
1760 z,
1761 test_object_offset,
1762 direction,
1763 NULL, &p, NULL);
1764 if (r <= 0)
1765 return r;
1766
1767 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1768 if (r < 0)
1769 return r;
1770
1771 r = generic_array_bisect_plus_one(f,
1772 le64toh(o->data.entry_offset),
1773 le64toh(o->data.entry_array_offset),
1774 le64toh(o->data.n_entries),
1775 p,
1776 test_object_offset,
1777 direction,
1778 &qo, &q, NULL);
1779
1780 if (r <= 0)
1781 return r;
1782
1783 if (p == q) {
1784 if (ret)
1785 *ret = qo;
1786 if (offset)
1787 *offset = q;
1788
1789 return 1;
1790 }
1791
1792 z = q;
1793 }
1794
1795 return 0;
1796}
1797
de190aef
LP
1798int journal_file_move_to_entry_by_seqnum_for_data(
1799 JournalFile *f,
1800 uint64_t data_offset,
1801 uint64_t seqnum,
1802 direction_t direction,
1803 Object **ret, uint64_t *offset) {
cec736d2 1804
de190aef
LP
1805 Object *d;
1806 int r;
cec736d2 1807
91a31dde
LP
1808 assert(f);
1809
de190aef 1810 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1811 if (r < 0)
de190aef 1812 return r;
cec736d2 1813
de190aef
LP
1814 return generic_array_bisect_plus_one(f,
1815 le64toh(d->data.entry_offset),
1816 le64toh(d->data.entry_array_offset),
1817 le64toh(d->data.n_entries),
1818 seqnum,
1819 test_object_seqnum,
1820 direction,
1821 ret, offset, NULL);
1822}
cec736d2 1823
de190aef
LP
1824int journal_file_move_to_entry_by_realtime_for_data(
1825 JournalFile *f,
1826 uint64_t data_offset,
1827 uint64_t realtime,
1828 direction_t direction,
1829 Object **ret, uint64_t *offset) {
1830
1831 Object *d;
1832 int r;
1833
91a31dde
LP
1834 assert(f);
1835
de190aef 1836 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1837 if (r < 0)
de190aef
LP
1838 return r;
1839
1840 return generic_array_bisect_plus_one(f,
1841 le64toh(d->data.entry_offset),
1842 le64toh(d->data.entry_array_offset),
1843 le64toh(d->data.n_entries),
1844 realtime,
1845 test_object_realtime,
1846 direction,
1847 ret, offset, NULL);
cec736d2
LP
1848}
1849
0284adc6 1850void journal_file_dump(JournalFile *f) {
7560fffc 1851 Object *o;
7560fffc 1852 int r;
0284adc6 1853 uint64_t p;
7560fffc
LP
1854
1855 assert(f);
1856
0284adc6 1857 journal_file_print_header(f);
7560fffc 1858
0284adc6
LP
1859 p = le64toh(f->header->header_size);
1860 while (p != 0) {
1861 r = journal_file_move_to_object(f, -1, p, &o);
1862 if (r < 0)
1863 goto fail;
7560fffc 1864
0284adc6 1865 switch (o->object.type) {
d98cc1f2 1866
0284adc6
LP
1867 case OBJECT_UNUSED:
1868 printf("Type: OBJECT_UNUSED\n");
1869 break;
d98cc1f2 1870
0284adc6
LP
1871 case OBJECT_DATA:
1872 printf("Type: OBJECT_DATA\n");
1873 break;
7560fffc 1874
0284adc6 1875 case OBJECT_ENTRY:
f7fab8a5 1876 printf("Type: OBJECT_ENTRY seqnum=%llu monotonic=%llu realtime=%llu\n",
0284adc6
LP
1877 (unsigned long long) le64toh(o->entry.seqnum),
1878 (unsigned long long) le64toh(o->entry.monotonic),
1879 (unsigned long long) le64toh(o->entry.realtime));
1880 break;
7560fffc 1881
0284adc6
LP
1882 case OBJECT_FIELD_HASH_TABLE:
1883 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1884 break;
7560fffc 1885
0284adc6
LP
1886 case OBJECT_DATA_HASH_TABLE:
1887 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1888 break;
7560fffc 1889
0284adc6
LP
1890 case OBJECT_ENTRY_ARRAY:
1891 printf("Type: OBJECT_ENTRY_ARRAY\n");
1892 break;
7560fffc 1893
0284adc6 1894 case OBJECT_TAG:
f7fab8a5
LP
1895 printf("Type: OBJECT_TAG seqnum=%llu epoch=%llu\n",
1896 (unsigned long long) le64toh(o->tag.seqnum),
1897 (unsigned long long) le64toh(o->tag.epoch));
0284adc6
LP
1898 break;
1899 }
7560fffc 1900
0284adc6
LP
1901 if (o->object.flags & OBJECT_COMPRESSED)
1902 printf("Flags: COMPRESSED\n");
7560fffc 1903
0284adc6
LP
1904 if (p == le64toh(f->header->tail_object_offset))
1905 p = 0;
1906 else
1907 p = p + ALIGN64(le64toh(o->object.size));
1908 }
7560fffc 1909
0284adc6
LP
1910 return;
1911fail:
1912 log_error("File corrupt");
7560fffc
LP
1913}
1914
0284adc6
LP
1915void journal_file_print_header(JournalFile *f) {
1916 char a[33], b[33], c[33];
1917 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
a1a03e30
LP
1918 struct stat st;
1919 char bytes[FORMAT_BYTES_MAX];
7560fffc
LP
1920
1921 assert(f);
7560fffc 1922
0284adc6
LP
1923 printf("File Path: %s\n"
1924 "File ID: %s\n"
1925 "Machine ID: %s\n"
1926 "Boot ID: %s\n"
1927 "Sequential Number ID: %s\n"
1928 "State: %s\n"
1929 "Compatible Flags:%s%s\n"
1930 "Incompatible Flags:%s%s\n"
1931 "Header size: %llu\n"
1932 "Arena size: %llu\n"
1933 "Data Hash Table Size: %llu\n"
1934 "Field Hash Table Size: %llu\n"
0284adc6
LP
1935 "Rotate Suggested: %s\n"
1936 "Head Sequential Number: %llu\n"
1937 "Tail Sequential Number: %llu\n"
1938 "Head Realtime Timestamp: %s\n"
3223f44f
LP
1939 "Tail Realtime Timestamp: %s\n"
1940 "Objects: %llu\n"
1941 "Entry Objects: %llu\n",
0284adc6
LP
1942 f->path,
1943 sd_id128_to_string(f->header->file_id, a),
1944 sd_id128_to_string(f->header->machine_id, b),
1945 sd_id128_to_string(f->header->boot_id, c),
1946 sd_id128_to_string(f->header->seqnum_id, c),
3223f44f
LP
1947 f->header->state == STATE_OFFLINE ? "OFFLINE" :
1948 f->header->state == STATE_ONLINE ? "ONLINE" :
1949 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
8088cbd3
LP
1950 JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
1951 (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
1952 JOURNAL_HEADER_COMPRESSED(f->header) ? " COMPRESSED" : "",
1953 (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
0284adc6
LP
1954 (unsigned long long) le64toh(f->header->header_size),
1955 (unsigned long long) le64toh(f->header->arena_size),
1956 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1957 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
0284adc6
LP
1958 yes_no(journal_file_rotate_suggested(f)),
1959 (unsigned long long) le64toh(f->header->head_entry_seqnum),
1960 (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1961 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
3223f44f
LP
1962 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1963 (unsigned long long) le64toh(f->header->n_objects),
1964 (unsigned long long) le64toh(f->header->n_entries));
7560fffc 1965
0284adc6
LP
1966 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1967 printf("Data Objects: %llu\n"
1968 "Data Hash Table Fill: %.1f%%\n",
1969 (unsigned long long) le64toh(f->header->n_data),
1970 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
7560fffc 1971
0284adc6
LP
1972 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1973 printf("Field Objects: %llu\n"
1974 "Field Hash Table Fill: %.1f%%\n",
1975 (unsigned long long) le64toh(f->header->n_fields),
1976 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
3223f44f
LP
1977
1978 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
1979 printf("Tag Objects: %llu\n",
1980 (unsigned long long) le64toh(f->header->n_tags));
1981 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1982 printf("Entry Array Objects: %llu\n",
1983 (unsigned long long) le64toh(f->header->n_entry_arrays));
a1a03e30
LP
1984
1985 if (fstat(f->fd, &st) >= 0)
1986 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
7560fffc
LP
1987}
1988
0284adc6
LP
1989int journal_file_open(
1990 const char *fname,
1991 int flags,
1992 mode_t mode,
1993 bool compress,
baed47c3 1994 bool seal,
0284adc6
LP
1995 JournalMetrics *metrics,
1996 MMapCache *mmap_cache,
1997 JournalFile *template,
1998 JournalFile **ret) {
7560fffc 1999
0284adc6
LP
2000 JournalFile *f;
2001 int r;
2002 bool newly_created = false;
7560fffc 2003
0284adc6 2004 assert(fname);
7560fffc 2005
0284adc6
LP
2006 if ((flags & O_ACCMODE) != O_RDONLY &&
2007 (flags & O_ACCMODE) != O_RDWR)
2008 return -EINVAL;
7560fffc 2009
a0108012
LP
2010 if (!endswith(fname, ".journal") &&
2011 !endswith(fname, ".journal~"))
0284adc6 2012 return -EINVAL;
7560fffc 2013
0284adc6
LP
2014 f = new0(JournalFile, 1);
2015 if (!f)
2016 return -ENOMEM;
7560fffc 2017
0284adc6
LP
2018 f->fd = -1;
2019 f->mode = mode;
7560fffc 2020
0284adc6
LP
2021 f->flags = flags;
2022 f->prot = prot_from_flags(flags);
2023 f->writable = (flags & O_ACCMODE) != O_RDONLY;
48b61739 2024#ifdef HAVE_XZ
0284adc6 2025 f->compress = compress;
48b61739 2026#endif
baed47c3 2027 f->seal = seal;
7560fffc 2028
0284adc6
LP
2029 if (mmap_cache)
2030 f->mmap = mmap_cache_ref(mmap_cache);
2031 else {
84168d80 2032 f->mmap = mmap_cache_new();
0284adc6
LP
2033 if (!f->mmap) {
2034 r = -ENOMEM;
2035 goto fail;
2036 }
2037 }
7560fffc 2038
0284adc6
LP
2039 f->path = strdup(fname);
2040 if (!f->path) {
2041 r = -ENOMEM;
2042 goto fail;
2043 }
7560fffc 2044
0284adc6
LP
2045 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2046 if (f->fd < 0) {
2047 r = -errno;
2048 goto fail;
7560fffc 2049 }
7560fffc 2050
0284adc6
LP
2051 if (fstat(f->fd, &f->last_stat) < 0) {
2052 r = -errno;
2053 goto fail;
2054 }
7560fffc 2055
0284adc6
LP
2056 if (f->last_stat.st_size == 0 && f->writable) {
2057 newly_created = true;
7560fffc 2058
feb12d3e 2059#ifdef HAVE_GCRYPT
0284adc6 2060 /* Try to load the FSPRG state, and if we can't, then
baed47c3
LP
2061 * just don't do sealing */
2062 r = journal_file_fss_load(f);
0284adc6 2063 if (r < 0)
baed47c3 2064 f->seal = false;
feb12d3e 2065#endif
7560fffc 2066
0284adc6
LP
2067 r = journal_file_init_header(f, template);
2068 if (r < 0)
2069 goto fail;
7560fffc 2070
0284adc6
LP
2071 if (fstat(f->fd, &f->last_stat) < 0) {
2072 r = -errno;
2073 goto fail;
2074 }
2075 }
7560fffc 2076
0284adc6
LP
2077 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2078 r = -EIO;
2079 goto fail;
2080 }
7560fffc 2081
0284adc6
LP
2082 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2083 if (f->header == MAP_FAILED) {
2084 f->header = NULL;
2085 r = -errno;
2086 goto fail;
2087 }
7560fffc 2088
0284adc6
LP
2089 if (!newly_created) {
2090 r = journal_file_verify_header(f);
2091 if (r < 0)
2092 goto fail;
2093 }
7560fffc 2094
feb12d3e 2095#ifdef HAVE_GCRYPT
0284adc6 2096 if (!newly_created && f->writable) {
baed47c3 2097 r = journal_file_fss_load(f);
0284adc6
LP
2098 if (r < 0)
2099 goto fail;
2100 }
feb12d3e 2101#endif
cec736d2
LP
2102
2103 if (f->writable) {
4a92baf3
LP
2104 if (metrics) {
2105 journal_default_metrics(metrics, f->fd);
2106 f->metrics = *metrics;
2107 } else if (template)
2108 f->metrics = template->metrics;
2109
cec736d2
LP
2110 r = journal_file_refresh_header(f);
2111 if (r < 0)
2112 goto fail;
2113 }
2114
feb12d3e 2115#ifdef HAVE_GCRYPT
baed47c3 2116 r = journal_file_hmac_setup(f);
14d10188
LP
2117 if (r < 0)
2118 goto fail;
feb12d3e 2119#endif
14d10188 2120
cec736d2 2121 if (newly_created) {
de190aef 2122 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
2123 if (r < 0)
2124 goto fail;
2125
de190aef 2126 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
2127 if (r < 0)
2128 goto fail;
7560fffc 2129
feb12d3e 2130#ifdef HAVE_GCRYPT
7560fffc
LP
2131 r = journal_file_append_first_tag(f);
2132 if (r < 0)
2133 goto fail;
feb12d3e 2134#endif
cec736d2
LP
2135 }
2136
de190aef 2137 r = journal_file_map_field_hash_table(f);
cec736d2
LP
2138 if (r < 0)
2139 goto fail;
2140
de190aef 2141 r = journal_file_map_data_hash_table(f);
cec736d2
LP
2142 if (r < 0)
2143 goto fail;
2144
2145 if (ret)
2146 *ret = f;
2147
2148 return 0;
2149
2150fail:
2151 journal_file_close(f);
2152
2153 return r;
2154}
0ac38b70 2155
baed47c3 2156int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
0ac38b70
LP
2157 char *p;
2158 size_t l;
2159 JournalFile *old_file, *new_file = NULL;
2160 int r;
2161
2162 assert(f);
2163 assert(*f);
2164
2165 old_file = *f;
2166
2167 if (!old_file->writable)
2168 return -EINVAL;
2169
2170 if (!endswith(old_file->path, ".journal"))
2171 return -EINVAL;
2172
2173 l = strlen(old_file->path);
2174
9447a7f1 2175 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
0ac38b70
LP
2176 if (!p)
2177 return -ENOMEM;
2178
2179 memcpy(p, old_file->path, l - 8);
2180 p[l-8] = '@';
2181 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2182 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2183 "-%016llx-%016llx.journal",
beec0085 2184 (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
0ac38b70
LP
2185 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2186
2187 r = rename(old_file->path, p);
2188 free(p);
2189
2190 if (r < 0)
2191 return -errno;
2192
ccdbaf91 2193 old_file->header->state = STATE_ARCHIVED;
0ac38b70 2194
baed47c3 2195 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
0ac38b70
LP
2196 journal_file_close(old_file);
2197
2198 *f = new_file;
2199 return r;
2200}
2201
9447a7f1
LP
2202int journal_file_open_reliably(
2203 const char *fname,
2204 int flags,
2205 mode_t mode,
7560fffc 2206 bool compress,
baed47c3 2207 bool seal,
4a92baf3 2208 JournalMetrics *metrics,
27370278 2209 MMapCache *mmap_cache,
9447a7f1
LP
2210 JournalFile *template,
2211 JournalFile **ret) {
2212
2213 int r;
2214 size_t l;
2215 char *p;
2216
baed47c3 2217 r = journal_file_open(fname, flags, mode, compress, seal,
27370278 2218 metrics, mmap_cache, template, ret);
0071d9f1
LP
2219 if (r != -EBADMSG && /* corrupted */
2220 r != -ENODATA && /* truncated */
2221 r != -EHOSTDOWN && /* other machine */
a1a1898f
LP
2222 r != -EPROTONOSUPPORT && /* incompatible feature */
2223 r != -EBUSY && /* unclean shutdown */
2224 r != -ESHUTDOWN /* already archived */)
9447a7f1
LP
2225 return r;
2226
2227 if ((flags & O_ACCMODE) == O_RDONLY)
2228 return r;
2229
2230 if (!(flags & O_CREAT))
2231 return r;
2232
7560fffc
LP
2233 if (!endswith(fname, ".journal"))
2234 return r;
2235
5c70eab4
LP
2236 /* The file is corrupted. Rotate it away and try it again (but only once) */
2237
9447a7f1
LP
2238 l = strlen(fname);
2239 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2240 (int) (l-8), fname,
2241 (unsigned long long) now(CLOCK_REALTIME),
2242 random_ull()) < 0)
2243 return -ENOMEM;
2244
2245 r = rename(fname, p);
2246 free(p);
2247 if (r < 0)
2248 return -errno;
2249
a1a1898f 2250 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
9447a7f1 2251
baed47c3 2252 return journal_file_open(fname, flags, mode, compress, seal,
27370278 2253 metrics, mmap_cache, template, ret);
9447a7f1
LP
2254}
2255
cf244689
LP
2256
2257int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2258 uint64_t i, n;
2259 uint64_t q, xor_hash = 0;
2260 int r;
2261 EntryItem *items;
2262 dual_timestamp ts;
2263
2264 assert(from);
2265 assert(to);
2266 assert(o);
2267 assert(p);
2268
2269 if (!to->writable)
2270 return -EPERM;
2271
2272 ts.monotonic = le64toh(o->entry.monotonic);
2273 ts.realtime = le64toh(o->entry.realtime);
2274
2275 if (to->tail_entry_monotonic_valid &&
2276 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2277 return -EINVAL;
2278
cf244689
LP
2279 n = journal_file_entry_n_items(o);
2280 items = alloca(sizeof(EntryItem) * n);
2281
2282 for (i = 0; i < n; i++) {
4fd052ae
FC
2283 uint64_t l, h;
2284 le64_t le_hash;
cf244689
LP
2285 size_t t;
2286 void *data;
2287 Object *u;
2288
2289 q = le64toh(o->entry.items[i].object_offset);
2290 le_hash = o->entry.items[i].hash;
2291
2292 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2293 if (r < 0)
2294 return r;
2295
2296 if (le_hash != o->data.hash)
2297 return -EBADMSG;
2298
2299 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2300 t = (size_t) l;
2301
2302 /* We hit the limit on 32bit machines */
2303 if ((uint64_t) t != l)
2304 return -E2BIG;
2305
2306 if (o->object.flags & OBJECT_COMPRESSED) {
2307#ifdef HAVE_XZ
2308 uint64_t rsize;
2309
2310 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2311 return -EBADMSG;
2312
2313 data = from->compress_buffer;
2314 l = rsize;
2315#else
2316 return -EPROTONOSUPPORT;
2317#endif
2318 } else
2319 data = o->data.payload;
2320
2321 r = journal_file_append_data(to, data, l, &u, &h);
2322 if (r < 0)
2323 return r;
2324
2325 xor_hash ^= le64toh(u->data.hash);
2326 items[i].object_offset = htole64(h);
2327 items[i].hash = u->data.hash;
2328
2329 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2330 if (r < 0)
2331 return r;
2332 }
2333
2334 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2335}
babfc091
LP
2336
2337void journal_default_metrics(JournalMetrics *m, int fd) {
2338 uint64_t fs_size = 0;
2339 struct statvfs ss;
a7bc2c2a 2340 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2341
2342 assert(m);
2343 assert(fd >= 0);
2344
2345 if (fstatvfs(fd, &ss) >= 0)
2346 fs_size = ss.f_frsize * ss.f_blocks;
2347
2348 if (m->max_use == (uint64_t) -1) {
2349
2350 if (fs_size > 0) {
2351 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2352
2353 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2354 m->max_use = DEFAULT_MAX_USE_UPPER;
2355
2356 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2357 m->max_use = DEFAULT_MAX_USE_LOWER;
2358 } else
2359 m->max_use = DEFAULT_MAX_USE_LOWER;
2360 } else {
2361 m->max_use = PAGE_ALIGN(m->max_use);
2362
2363 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2364 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2365 }
2366
2367 if (m->max_size == (uint64_t) -1) {
2368 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2369
2370 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2371 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2372 } else
2373 m->max_size = PAGE_ALIGN(m->max_size);
2374
2375 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2376 m->max_size = JOURNAL_FILE_SIZE_MIN;
2377
2378 if (m->max_size*2 > m->max_use)
2379 m->max_use = m->max_size*2;
2380
2381 if (m->min_size == (uint64_t) -1)
2382 m->min_size = JOURNAL_FILE_SIZE_MIN;
2383 else {
2384 m->min_size = PAGE_ALIGN(m->min_size);
2385
2386 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2387 m->min_size = JOURNAL_FILE_SIZE_MIN;
2388
2389 if (m->min_size > m->max_size)
2390 m->max_size = m->min_size;
2391 }
2392
2393 if (m->keep_free == (uint64_t) -1) {
2394
2395 if (fs_size > 0) {
2396 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2397
2398 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2399 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2400
2401 } else
2402 m->keep_free = DEFAULT_KEEP_FREE;
2403 }
2404
2b43f939
LP
2405 log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2406 format_bytes(a, sizeof(a), m->max_use),
2407 format_bytes(b, sizeof(b), m->max_size),
2408 format_bytes(c, sizeof(c), m->min_size),
2409 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2410}
08984293
LP
2411
2412int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
08984293
LP
2413 assert(f);
2414 assert(from || to);
2415
2416 if (from) {
162566a4
LP
2417 if (f->header->head_entry_realtime == 0)
2418 return -ENOENT;
08984293 2419
162566a4 2420 *from = le64toh(f->header->head_entry_realtime);
08984293
LP
2421 }
2422
2423 if (to) {
162566a4
LP
2424 if (f->header->tail_entry_realtime == 0)
2425 return -ENOENT;
08984293 2426
162566a4 2427 *to = le64toh(f->header->tail_entry_realtime);
08984293
LP
2428 }
2429
2430 return 1;
2431}
2432
2433int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2434 char t[9+32+1] = "_BOOT_ID=";
2435 Object *o;
2436 uint64_t p;
2437 int r;
2438
2439 assert(f);
2440 assert(from || to);
2441
2442 sd_id128_to_string(boot_id, t + 9);
2443
2444 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2445 if (r <= 0)
2446 return r;
2447
2448 if (le64toh(o->data.n_entries) <= 0)
2449 return 0;
2450
2451 if (from) {
2452 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2453 if (r < 0)
2454 return r;
2455
2456 *from = le64toh(o->entry.monotonic);
2457 }
2458
2459 if (to) {
2460 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2461 if (r < 0)
2462 return r;
2463
2464 r = generic_array_get_plus_one(f,
2465 le64toh(o->data.entry_offset),
2466 le64toh(o->data.entry_array_offset),
2467 le64toh(o->data.n_entries)-1,
2468 &o, NULL);
2469 if (r <= 0)
2470 return r;
2471
2472 *to = le64toh(o->entry.monotonic);
2473 }
2474
2475 return 1;
2476}
dca6219e
LP
2477
2478bool journal_file_rotate_suggested(JournalFile *f) {
2479 assert(f);
2480
2481 /* If we gained new header fields we gained new features,
2482 * hence suggest a rotation */
361f9cbc
LP
2483 if (le64toh(f->header->header_size) < sizeof(Header)) {
2484 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
dca6219e 2485 return true;
361f9cbc 2486 }
dca6219e
LP
2487
2488 /* Let's check if the hash tables grew over a certain fill
2489 * level (75%, borrowing this value from Java's hash table
2490 * implementation), and if so suggest a rotation. To calculate
2491 * the fill level we need the n_data field, which only exists
2492 * in newer versions. */
2493
2494 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
361f9cbc
LP
2495 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2496 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2497 f->path,
2498 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2499 (unsigned long long) le64toh(f->header->n_data),
2500 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2501 (unsigned long long) (f->last_stat.st_size),
2502 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
dca6219e 2503 return true;
361f9cbc 2504 }
dca6219e
LP
2505
2506 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
361f9cbc
LP
2507 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2508 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2509 f->path,
2510 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2511 (unsigned long long) le64toh(f->header->n_fields),
2512 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
dca6219e 2513 return true;
361f9cbc 2514 }
dca6219e
LP
2515
2516 return false;
2517}