]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
mount: don't try to initialize extra deps for mount units before initializing their...
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
30#include "journal-def.h"
31#include "journal-file.h"
0284adc6 32#include "journal-authenticate.h"
cec736d2 33#include "lookup3.h"
807e17f0 34#include "compress.h"
7560fffc 35#include "fsprg.h"
cec736d2 36
4a92baf3
LP
37#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
cec736d2 39
be19b7df 40#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 41
babfc091 42/* This is the minimum journal file size */
b47ffcfd 43#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
babfc091
LP
44
45/* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
49
50/* This is the upper bound if we deduce max_size from max_use */
71100051 51#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
52
53/* This is the upper bound if we deduce the keep_free value from the
54 * file system size */
55#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57/* This is the keep_free value when we can't determine the system
58 * size */
59#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
60
dca6219e
LP
61/* n_data was the first entry we added after the initial file format design */
62#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
cec736d2 63
cec736d2 64void journal_file_close(JournalFile *f) {
de190aef 65 assert(f);
cec736d2 66
feb12d3e 67#ifdef HAVE_GCRYPT
b0af6f41 68 /* Write the final tag */
c586dbf1 69 if (f->seal && f->writable)
b0af6f41 70 journal_file_append_tag(f);
feb12d3e 71#endif
b0af6f41 72
7560fffc 73 /* Sync everything to disk, before we mark the file offline */
16e9f408
LP
74 if (f->mmap && f->fd >= 0)
75 mmap_cache_close_fd(f->mmap, f->fd);
7560fffc
LP
76
77 if (f->writable && f->fd >= 0)
78 fdatasync(f->fd);
79
d384c7a8 80 if (f->header) {
cd96b3b8
LP
81 /* Mark the file offline. Don't override the archived state if it already is set */
82 if (f->writable && f->header->state == STATE_ONLINE)
d384c7a8 83 f->header->state = STATE_OFFLINE;
cec736d2 84
d384c7a8
MS
85 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
86 }
cec736d2 87
0ac38b70
LP
88 if (f->fd >= 0)
89 close_nointr_nofail(f->fd);
90
cec736d2 91 free(f->path);
807e17f0 92
16e9f408
LP
93 if (f->mmap)
94 mmap_cache_unref(f->mmap);
95
807e17f0
LP
96#ifdef HAVE_XZ
97 free(f->compress_buffer);
98#endif
99
7560fffc 100#ifdef HAVE_GCRYPT
baed47c3
LP
101 if (f->fss_file)
102 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
b7c9ae91
LP
103 else if (f->fsprg_state)
104 free(f->fsprg_state);
105
106 free(f->fsprg_seed);
7560fffc
LP
107
108 if (f->hmac)
109 gcry_md_close(f->hmac);
110#endif
111
cec736d2
LP
112 free(f);
113}
114
0ac38b70 115static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
116 Header h;
117 ssize_t k;
118 int r;
119
120 assert(f);
121
122 zero(h);
7560fffc 123 memcpy(h.signature, HEADER_SIGNATURE, 8);
23b0b2b2 124 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2 125
7560fffc
LP
126 h.incompatible_flags =
127 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
128
129 h.compatible_flags =
baed47c3 130 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
7560fffc 131
cec736d2
LP
132 r = sd_id128_randomize(&h.file_id);
133 if (r < 0)
134 return r;
135
0ac38b70
LP
136 if (template) {
137 h.seqnum_id = template->header->seqnum_id;
beec0085 138 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
0ac38b70
LP
139 } else
140 h.seqnum_id = h.file_id;
cec736d2
LP
141
142 k = pwrite(f->fd, &h, sizeof(h), 0);
143 if (k < 0)
144 return -errno;
145
146 if (k != sizeof(h))
147 return -EIO;
148
149 return 0;
150}
151
152static int journal_file_refresh_header(JournalFile *f) {
153 int r;
de190aef 154 sd_id128_t boot_id;
cec736d2
LP
155
156 assert(f);
157
158 r = sd_id128_get_machine(&f->header->machine_id);
159 if (r < 0)
160 return r;
161
de190aef 162 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
163 if (r < 0)
164 return r;
165
de190aef
LP
166 if (sd_id128_equal(boot_id, f->header->boot_id))
167 f->tail_entry_monotonic_valid = true;
168
169 f->header->boot_id = boot_id;
170
171 f->header->state = STATE_ONLINE;
b788cc23 172
7560fffc
LP
173 /* Sync the online state to disk */
174 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
175 fdatasync(f->fd);
b788cc23 176
cec736d2
LP
177 return 0;
178}
179
180static int journal_file_verify_header(JournalFile *f) {
181 assert(f);
182
7560fffc 183 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
cec736d2
LP
184 return -EBADMSG;
185
7560fffc
LP
186 /* In both read and write mode we refuse to open files with
187 * incompatible flags we don't know */
807e17f0 188#ifdef HAVE_XZ
7560fffc 189 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
807e17f0
LP
190 return -EPROTONOSUPPORT;
191#else
cec736d2
LP
192 if (f->header->incompatible_flags != 0)
193 return -EPROTONOSUPPORT;
807e17f0 194#endif
cec736d2 195
7560fffc
LP
196 /* When open for writing we refuse to open files with
197 * compatible flags, too */
198 if (f->writable) {
199#ifdef HAVE_GCRYPT
baed47c3 200 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
7560fffc
LP
201 return -EPROTONOSUPPORT;
202#else
203 if (f->header->compatible_flags != 0)
204 return -EPROTONOSUPPORT;
205#endif
206 }
207
db11ac1a
LP
208 if (f->header->state >= _STATE_MAX)
209 return -EBADMSG;
210
dca6219e
LP
211 /* The first addition was n_data, so check that we are at least this large */
212 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
23b0b2b2
LP
213 return -EBADMSG;
214
8088cbd3 215 if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
beec0085
LP
216 return -EBADMSG;
217
db11ac1a
LP
218 if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
219 return -ENODATA;
220
221 if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
222 return -ENODATA;
223
7762e02b
LP
224 if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
225 !VALID64(le64toh(f->header->field_hash_table_offset)) ||
226 !VALID64(le64toh(f->header->tail_object_offset)) ||
227 !VALID64(le64toh(f->header->entry_array_offset)))
228 return -ENODATA;
229
230 if (le64toh(f->header->data_hash_table_offset) < le64toh(f->header->header_size) ||
231 le64toh(f->header->field_hash_table_offset) < le64toh(f->header->header_size) ||
232 le64toh(f->header->tail_object_offset) < le64toh(f->header->header_size) ||
233 le64toh(f->header->entry_array_offset) < le64toh(f->header->header_size))
cec736d2
LP
234 return -ENODATA;
235
236 if (f->writable) {
ccdbaf91 237 uint8_t state;
cec736d2
LP
238 sd_id128_t machine_id;
239 int r;
240
241 r = sd_id128_get_machine(&machine_id);
242 if (r < 0)
243 return r;
244
245 if (!sd_id128_equal(machine_id, f->header->machine_id))
246 return -EHOSTDOWN;
247
de190aef 248 state = f->header->state;
cec736d2 249
71fa6f00
LP
250 if (state == STATE_ONLINE) {
251 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
252 return -EBUSY;
253 } else if (state == STATE_ARCHIVED)
cec736d2 254 return -ESHUTDOWN;
71fa6f00
LP
255 else if (state != STATE_OFFLINE) {
256 log_debug("Journal file %s has unknown state %u.", f->path, state);
257 return -EBUSY;
258 }
cec736d2
LP
259 }
260
8088cbd3 261 f->compress = JOURNAL_HEADER_COMPRESSED(f->header);
c586dbf1 262
f1889c91 263 f->seal = JOURNAL_HEADER_SEALED(f->header);
7560fffc 264
cec736d2
LP
265 return 0;
266}
267
268static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
cec736d2 269 uint64_t old_size, new_size;
fec2aa2f 270 int r;
cec736d2
LP
271
272 assert(f);
273
cec736d2 274 /* We assume that this file is not sparse, and we know that
38ac38b2 275 * for sure, since we always call posix_fallocate()
cec736d2
LP
276 * ourselves */
277
278 old_size =
23b0b2b2 279 le64toh(f->header->header_size) +
cec736d2
LP
280 le64toh(f->header->arena_size);
281
bc85bfee 282 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
283 if (new_size < le64toh(f->header->header_size))
284 new_size = le64toh(f->header->header_size);
bc85bfee
LP
285
286 if (new_size <= old_size)
cec736d2
LP
287 return 0;
288
bc85bfee
LP
289 if (f->metrics.max_size > 0 &&
290 new_size > f->metrics.max_size)
291 return -E2BIG;
cec736d2 292
bc85bfee
LP
293 if (new_size > f->metrics.min_size &&
294 f->metrics.keep_free > 0) {
cec736d2
LP
295 struct statvfs svfs;
296
297 if (fstatvfs(f->fd, &svfs) >= 0) {
298 uint64_t available;
299
300 available = svfs.f_bfree * svfs.f_bsize;
301
bc85bfee
LP
302 if (available >= f->metrics.keep_free)
303 available -= f->metrics.keep_free;
cec736d2
LP
304 else
305 available = 0;
306
307 if (new_size - old_size > available)
308 return -E2BIG;
309 }
310 }
311
bc85bfee
LP
312 /* Note that the glibc fallocate() fallback is very
313 inefficient, hence we try to minimize the allocation area
314 as we can. */
fec2aa2f
GV
315 r = posix_fallocate(f->fd, old_size, new_size - old_size);
316 if (r != 0)
317 return -r;
cec736d2
LP
318
319 if (fstat(f->fd, &f->last_stat) < 0)
320 return -errno;
321
23b0b2b2 322 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
323
324 return 0;
325}
326
fcde2389 327static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
cec736d2 328 assert(f);
cec736d2
LP
329 assert(ret);
330
7762e02b
LP
331 if (size <= 0)
332 return -EINVAL;
333
2a59ea54 334 /* Avoid SIGBUS on invalid accesses */
4bbdcdb3
LP
335 if (offset + size > (uint64_t) f->last_stat.st_size) {
336 /* Hmm, out of range? Let's refresh the fstat() data
337 * first, before we trust that check. */
338
339 if (fstat(f->fd, &f->last_stat) < 0 ||
340 offset + size > (uint64_t) f->last_stat.st_size)
341 return -EADDRNOTAVAIL;
342 }
343
fcde2389 344 return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
cec736d2
LP
345}
346
16e9f408
LP
347static uint64_t minimum_header_size(Object *o) {
348
349 static uint64_t table[] = {
350 [OBJECT_DATA] = sizeof(DataObject),
351 [OBJECT_FIELD] = sizeof(FieldObject),
352 [OBJECT_ENTRY] = sizeof(EntryObject),
353 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
354 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
355 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
356 [OBJECT_TAG] = sizeof(TagObject),
357 };
358
359 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
360 return sizeof(ObjectHeader);
361
362 return table[o->object.type];
363}
364
de190aef 365int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
366 int r;
367 void *t;
368 Object *o;
369 uint64_t s;
16e9f408 370 unsigned context;
cec736d2
LP
371
372 assert(f);
373 assert(ret);
374
db11ac1a
LP
375 /* Objects may only be located at multiple of 64 bit */
376 if (!VALID64(offset))
377 return -EFAULT;
378
16e9f408
LP
379 /* One context for each type, plus one catch-all for the rest */
380 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
381
fcde2389 382 r = journal_file_move_to(f, context, false, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
383 if (r < 0)
384 return r;
385
386 o = (Object*) t;
387 s = le64toh(o->object.size);
388
389 if (s < sizeof(ObjectHeader))
390 return -EBADMSG;
391
16e9f408
LP
392 if (o->object.type <= OBJECT_UNUSED)
393 return -EBADMSG;
394
395 if (s < minimum_header_size(o))
396 return -EBADMSG;
397
de190aef 398 if (type >= 0 && o->object.type != type)
cec736d2
LP
399 return -EBADMSG;
400
401 if (s > sizeof(ObjectHeader)) {
fcde2389 402 r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
cec736d2
LP
403 if (r < 0)
404 return r;
405
406 o = (Object*) t;
407 }
408
cec736d2
LP
409 *ret = o;
410 return 0;
411}
412
d98cc1f2 413static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
414 uint64_t r;
415
416 assert(f);
417
beec0085 418 r = le64toh(f->header->tail_entry_seqnum) + 1;
c2373f84
LP
419
420 if (seqnum) {
de190aef 421 /* If an external seqnum counter was passed, we update
c2373f84
LP
422 * both the local and the external one, and set it to
423 * the maximum of both */
424
425 if (*seqnum + 1 > r)
426 r = *seqnum + 1;
427
428 *seqnum = r;
429 }
430
beec0085 431 f->header->tail_entry_seqnum = htole64(r);
cec736d2 432
beec0085
LP
433 if (f->header->head_entry_seqnum == 0)
434 f->header->head_entry_seqnum = htole64(r);
de190aef 435
cec736d2
LP
436 return r;
437}
438
0284adc6 439int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
440 int r;
441 uint64_t p;
442 Object *tail, *o;
443 void *t;
444
445 assert(f);
16e9f408 446 assert(type > 0 && type < _OBJECT_TYPE_MAX);
cec736d2
LP
447 assert(size >= sizeof(ObjectHeader));
448 assert(offset);
449 assert(ret);
450
451 p = le64toh(f->header->tail_object_offset);
cec736d2 452 if (p == 0)
23b0b2b2 453 p = le64toh(f->header->header_size);
cec736d2 454 else {
de190aef 455 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
456 if (r < 0)
457 return r;
458
459 p += ALIGN64(le64toh(tail->object.size));
460 }
461
462 r = journal_file_allocate(f, p, size);
463 if (r < 0)
464 return r;
465
fcde2389 466 r = journal_file_move_to(f, type, false, p, size, &t);
cec736d2
LP
467 if (r < 0)
468 return r;
469
470 o = (Object*) t;
471
472 zero(o->object);
de190aef 473 o->object.type = type;
cec736d2
LP
474 o->object.size = htole64(size);
475
476 f->header->tail_object_offset = htole64(p);
cec736d2
LP
477 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
478
479 *ret = o;
480 *offset = p;
481
482 return 0;
483}
484
de190aef 485static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
486 uint64_t s, p;
487 Object *o;
488 int r;
489
490 assert(f);
491
dfabe643 492 /* We estimate that we need 1 hash table entry per 768 of
4a92baf3
LP
493 journal file and we want to make sure we never get beyond
494 75% fill level. Calculate the hash table size for the
495 maximum file size based on these metrics. */
496
dfabe643 497 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
4a92baf3
LP
498 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
499 s = DEFAULT_DATA_HASH_TABLE_SIZE;
500
2b43f939 501 log_debug("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
4a92baf3 502
de190aef
LP
503 r = journal_file_append_object(f,
504 OBJECT_DATA_HASH_TABLE,
505 offsetof(Object, hash_table.items) + s,
506 &o, &p);
cec736d2
LP
507 if (r < 0)
508 return r;
509
de190aef 510 memset(o->hash_table.items, 0, s);
cec736d2 511
de190aef
LP
512 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
513 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
514
515 return 0;
516}
517
de190aef 518static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
519 uint64_t s, p;
520 Object *o;
521 int r;
522
523 assert(f);
524
de190aef
LP
525 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
526 r = journal_file_append_object(f,
527 OBJECT_FIELD_HASH_TABLE,
528 offsetof(Object, hash_table.items) + s,
529 &o, &p);
cec736d2
LP
530 if (r < 0)
531 return r;
532
de190aef 533 memset(o->hash_table.items, 0, s);
cec736d2 534
de190aef
LP
535 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
536 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
537
538 return 0;
539}
540
de190aef 541static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
542 uint64_t s, p;
543 void *t;
544 int r;
545
546 assert(f);
547
de190aef
LP
548 p = le64toh(f->header->data_hash_table_offset);
549 s = le64toh(f->header->data_hash_table_size);
cec736d2 550
de190aef 551 r = journal_file_move_to(f,
16e9f408 552 OBJECT_DATA_HASH_TABLE,
fcde2389 553 true,
de190aef
LP
554 p, s,
555 &t);
cec736d2
LP
556 if (r < 0)
557 return r;
558
de190aef 559 f->data_hash_table = t;
cec736d2
LP
560 return 0;
561}
562
de190aef 563static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
564 uint64_t s, p;
565 void *t;
566 int r;
567
568 assert(f);
569
de190aef
LP
570 p = le64toh(f->header->field_hash_table_offset);
571 s = le64toh(f->header->field_hash_table_size);
cec736d2 572
de190aef 573 r = journal_file_move_to(f,
16e9f408 574 OBJECT_FIELD_HASH_TABLE,
fcde2389 575 true,
de190aef
LP
576 p, s,
577 &t);
cec736d2
LP
578 if (r < 0)
579 return r;
580
de190aef 581 f->field_hash_table = t;
cec736d2
LP
582 return 0;
583}
584
de190aef
LP
585static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
586 uint64_t p, h;
cec736d2
LP
587 int r;
588
589 assert(f);
590 assert(o);
591 assert(offset > 0);
b588975f
LP
592
593 if (o->object.type != OBJECT_DATA)
594 return -EINVAL;
cec736d2 595
48496df6
LP
596 /* This might alter the window we are looking at */
597
de190aef
LP
598 o->data.next_hash_offset = o->data.next_field_offset = 0;
599 o->data.entry_offset = o->data.entry_array_offset = 0;
600 o->data.n_entries = 0;
cec736d2 601
de190aef 602 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
8db4213e 603 p = le64toh(f->data_hash_table[h].tail_hash_offset);
cec736d2
LP
604 if (p == 0) {
605 /* Only entry in the hash table is easy */
de190aef 606 f->data_hash_table[h].head_hash_offset = htole64(offset);
cec736d2 607 } else {
48496df6
LP
608 /* Move back to the previous data object, to patch in
609 * pointer */
cec736d2 610
de190aef 611 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
612 if (r < 0)
613 return r;
614
de190aef 615 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
616 }
617
de190aef 618 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2 619
dca6219e
LP
620 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
621 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
622
cec736d2
LP
623 return 0;
624}
625
de190aef
LP
626int journal_file_find_data_object_with_hash(
627 JournalFile *f,
628 const void *data, uint64_t size, uint64_t hash,
629 Object **ret, uint64_t *offset) {
48496df6 630
de190aef 631 uint64_t p, osize, h;
cec736d2
LP
632 int r;
633
634 assert(f);
635 assert(data || size == 0);
636
637 osize = offsetof(Object, data.payload) + size;
638
bc85bfee
LP
639 if (f->header->data_hash_table_size == 0)
640 return -EBADMSG;
641
de190aef
LP
642 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
643 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 644
de190aef
LP
645 while (p > 0) {
646 Object *o;
cec736d2 647
de190aef 648 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
649 if (r < 0)
650 return r;
651
807e17f0 652 if (le64toh(o->data.hash) != hash)
85a131e8 653 goto next;
807e17f0
LP
654
655 if (o->object.flags & OBJECT_COMPRESSED) {
656#ifdef HAVE_XZ
b785c858 657 uint64_t l, rsize;
cec736d2 658
807e17f0
LP
659 l = le64toh(o->object.size);
660 if (l <= offsetof(Object, data.payload))
cec736d2
LP
661 return -EBADMSG;
662
807e17f0
LP
663 l -= offsetof(Object, data.payload);
664
665 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
666 return -EBADMSG;
667
b785c858 668 if (rsize == size &&
807e17f0
LP
669 memcmp(f->compress_buffer, data, size) == 0) {
670
671 if (ret)
672 *ret = o;
673
674 if (offset)
675 *offset = p;
676
677 return 1;
678 }
679#else
680 return -EPROTONOSUPPORT;
681#endif
682
683 } else if (le64toh(o->object.size) == osize &&
684 memcmp(o->data.payload, data, size) == 0) {
685
cec736d2
LP
686 if (ret)
687 *ret = o;
688
689 if (offset)
690 *offset = p;
691
de190aef 692 return 1;
cec736d2
LP
693 }
694
85a131e8 695 next:
cec736d2
LP
696 p = le64toh(o->data.next_hash_offset);
697 }
698
de190aef
LP
699 return 0;
700}
701
702int journal_file_find_data_object(
703 JournalFile *f,
704 const void *data, uint64_t size,
705 Object **ret, uint64_t *offset) {
706
707 uint64_t hash;
708
709 assert(f);
710 assert(data || size == 0);
711
712 hash = hash64(data, size);
713
714 return journal_file_find_data_object_with_hash(f,
715 data, size, hash,
716 ret, offset);
717}
718
48496df6
LP
719static int journal_file_append_data(
720 JournalFile *f,
721 const void *data, uint64_t size,
722 Object **ret, uint64_t *offset) {
723
de190aef
LP
724 uint64_t hash, p;
725 uint64_t osize;
726 Object *o;
727 int r;
807e17f0 728 bool compressed = false;
de190aef
LP
729
730 assert(f);
731 assert(data || size == 0);
732
733 hash = hash64(data, size);
734
735 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
736 if (r < 0)
737 return r;
738 else if (r > 0) {
739
740 if (ret)
741 *ret = o;
742
743 if (offset)
744 *offset = p;
745
746 return 0;
747 }
748
749 osize = offsetof(Object, data.payload) + size;
750 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
751 if (r < 0)
752 return r;
753
cec736d2 754 o->data.hash = htole64(hash);
807e17f0
LP
755
756#ifdef HAVE_XZ
757 if (f->compress &&
758 size >= COMPRESSION_SIZE_THRESHOLD) {
759 uint64_t rsize;
760
761 compressed = compress_blob(data, size, o->data.payload, &rsize);
762
763 if (compressed) {
764 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
765 o->object.flags |= OBJECT_COMPRESSED;
766
807e17f0
LP
767 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
768 }
769 }
770#endif
771
64825d3c 772 if (!compressed && size > 0)
807e17f0 773 memcpy(o->data.payload, data, size);
cec736d2 774
de190aef 775 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
776 if (r < 0)
777 return r;
778
48496df6
LP
779 /* The linking might have altered the window, so let's
780 * refresh our pointer */
781 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
782 if (r < 0)
783 return r;
784
5996c7c2
LP
785#ifdef HAVE_GCRYPT
786 r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
787 if (r < 0)
788 return r;
789#endif
790
cec736d2
LP
791 if (ret)
792 *ret = o;
793
794 if (offset)
de190aef 795 *offset = p;
cec736d2
LP
796
797 return 0;
798}
799
800uint64_t journal_file_entry_n_items(Object *o) {
801 assert(o);
b588975f
LP
802
803 if (o->object.type != OBJECT_ENTRY)
804 return 0;
cec736d2
LP
805
806 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
807}
808
0284adc6 809uint64_t journal_file_entry_array_n_items(Object *o) {
de190aef 810 assert(o);
b588975f
LP
811
812 if (o->object.type != OBJECT_ENTRY_ARRAY)
813 return 0;
de190aef
LP
814
815 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
816}
817
fb9a24b6
LP
818uint64_t journal_file_hash_table_n_items(Object *o) {
819 assert(o);
b588975f
LP
820
821 if (o->object.type != OBJECT_DATA_HASH_TABLE &&
822 o->object.type != OBJECT_FIELD_HASH_TABLE)
823 return 0;
fb9a24b6
LP
824
825 return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
826}
827
de190aef 828static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
829 le64_t *first,
830 le64_t *idx,
de190aef 831 uint64_t p) {
cec736d2 832 int r;
de190aef
LP
833 uint64_t n = 0, ap = 0, q, i, a, hidx;
834 Object *o;
835
cec736d2 836 assert(f);
de190aef
LP
837 assert(first);
838 assert(idx);
839 assert(p > 0);
cec736d2 840
de190aef
LP
841 a = le64toh(*first);
842 i = hidx = le64toh(*idx);
843 while (a > 0) {
844
845 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
846 if (r < 0)
847 return r;
cec736d2 848
de190aef
LP
849 n = journal_file_entry_array_n_items(o);
850 if (i < n) {
851 o->entry_array.items[i] = htole64(p);
852 *idx = htole64(hidx + 1);
853 return 0;
854 }
cec736d2 855
de190aef
LP
856 i -= n;
857 ap = a;
858 a = le64toh(o->entry_array.next_entry_array_offset);
859 }
860
861 if (hidx > n)
862 n = (hidx+1) * 2;
863 else
864 n = n * 2;
865
866 if (n < 4)
867 n = 4;
868
869 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
870 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
871 &o, &q);
cec736d2
LP
872 if (r < 0)
873 return r;
874
feb12d3e 875#ifdef HAVE_GCRYPT
5996c7c2 876 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
b0af6f41
LP
877 if (r < 0)
878 return r;
feb12d3e 879#endif
b0af6f41 880
de190aef 881 o->entry_array.items[i] = htole64(p);
cec736d2 882
de190aef 883 if (ap == 0)
7be3aa17 884 *first = htole64(q);
cec736d2 885 else {
de190aef 886 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
887 if (r < 0)
888 return r;
889
de190aef
LP
890 o->entry_array.next_entry_array_offset = htole64(q);
891 }
cec736d2 892
2dee23eb
LP
893 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
894 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
895
de190aef
LP
896 *idx = htole64(hidx + 1);
897
898 return 0;
899}
cec736d2 900
de190aef 901static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
902 le64_t *extra,
903 le64_t *first,
904 le64_t *idx,
de190aef
LP
905 uint64_t p) {
906
907 int r;
908
909 assert(f);
910 assert(extra);
911 assert(first);
912 assert(idx);
913 assert(p > 0);
914
915 if (*idx == 0)
916 *extra = htole64(p);
917 else {
4fd052ae 918 le64_t i;
de190aef 919
7be3aa17 920 i = htole64(le64toh(*idx) - 1);
de190aef
LP
921 r = link_entry_into_array(f, first, &i, p);
922 if (r < 0)
923 return r;
cec736d2
LP
924 }
925
de190aef
LP
926 *idx = htole64(le64toh(*idx) + 1);
927 return 0;
928}
929
930static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
931 uint64_t p;
932 int r;
933 assert(f);
934 assert(o);
935 assert(offset > 0);
936
937 p = le64toh(o->entry.items[i].object_offset);
938 if (p == 0)
939 return -EINVAL;
940
941 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
942 if (r < 0)
943 return r;
944
de190aef
LP
945 return link_entry_into_array_plus_one(f,
946 &o->data.entry_offset,
947 &o->data.entry_array_offset,
948 &o->data.n_entries,
949 offset);
cec736d2
LP
950}
951
952static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 953 uint64_t n, i;
cec736d2
LP
954 int r;
955
956 assert(f);
957 assert(o);
958 assert(offset > 0);
b588975f
LP
959
960 if (o->object.type != OBJECT_ENTRY)
961 return -EINVAL;
cec736d2 962
b788cc23
LP
963 __sync_synchronize();
964
cec736d2 965 /* Link up the entry itself */
de190aef
LP
966 r = link_entry_into_array(f,
967 &f->header->entry_array_offset,
968 &f->header->n_entries,
969 offset);
970 if (r < 0)
971 return r;
cec736d2 972
aaf53376 973 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
cec736d2 974
de190aef 975 if (f->header->head_entry_realtime == 0)
0ac38b70 976 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 977
0ac38b70 978 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
979 f->header->tail_entry_monotonic = o->entry.monotonic;
980
981 f->tail_entry_monotonic_valid = true;
cec736d2
LP
982
983 /* Link up the items */
984 n = journal_file_entry_n_items(o);
985 for (i = 0; i < n; i++) {
986 r = journal_file_link_entry_item(f, o, offset, i);
987 if (r < 0)
988 return r;
989 }
990
cec736d2
LP
991 return 0;
992}
993
994static int journal_file_append_entry_internal(
995 JournalFile *f,
996 const dual_timestamp *ts,
997 uint64_t xor_hash,
998 const EntryItem items[], unsigned n_items,
de190aef 999 uint64_t *seqnum,
cec736d2
LP
1000 Object **ret, uint64_t *offset) {
1001 uint64_t np;
1002 uint64_t osize;
1003 Object *o;
1004 int r;
1005
1006 assert(f);
1007 assert(items || n_items == 0);
de190aef 1008 assert(ts);
cec736d2
LP
1009
1010 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1011
de190aef 1012 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
1013 if (r < 0)
1014 return r;
1015
d98cc1f2 1016 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
cec736d2 1017 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
1018 o->entry.realtime = htole64(ts->realtime);
1019 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
1020 o->entry.xor_hash = htole64(xor_hash);
1021 o->entry.boot_id = f->header->boot_id;
1022
feb12d3e 1023#ifdef HAVE_GCRYPT
5996c7c2 1024 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
b0af6f41
LP
1025 if (r < 0)
1026 return r;
feb12d3e 1027#endif
b0af6f41 1028
cec736d2
LP
1029 r = journal_file_link_entry(f, o, np);
1030 if (r < 0)
1031 return r;
1032
1033 if (ret)
1034 *ret = o;
1035
1036 if (offset)
1037 *offset = np;
1038
1039 return 0;
1040}
1041
cf244689 1042void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
1043 assert(f);
1044
1045 /* inotify() does not receive IN_MODIFY events from file
1046 * accesses done via mmap(). After each access we hence
1047 * trigger IN_MODIFY by truncating the journal file to its
1048 * current size which triggers IN_MODIFY. */
1049
bc85bfee
LP
1050 __sync_synchronize();
1051
50f20cfd 1052 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
c5315881 1053 log_error("Failed to truncate file to its own size: %m");
50f20cfd
LP
1054}
1055
de190aef 1056int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1057 unsigned i;
1058 EntryItem *items;
1059 int r;
1060 uint64_t xor_hash = 0;
de190aef 1061 struct dual_timestamp _ts;
cec736d2
LP
1062
1063 assert(f);
1064 assert(iovec || n_iovec == 0);
1065
de190aef
LP
1066 if (!f->writable)
1067 return -EPERM;
1068
1069 if (!ts) {
1070 dual_timestamp_get(&_ts);
1071 ts = &_ts;
1072 }
1073
1074 if (f->tail_entry_monotonic_valid &&
1075 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1076 return -EINVAL;
1077
feb12d3e 1078#ifdef HAVE_GCRYPT
7560fffc
LP
1079 r = journal_file_maybe_append_tag(f, ts->realtime);
1080 if (r < 0)
1081 return r;
feb12d3e 1082#endif
7560fffc 1083
64825d3c
LP
1084 /* alloca() can't take 0, hence let's allocate at least one */
1085 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
cec736d2
LP
1086
1087 for (i = 0; i < n_iovec; i++) {
1088 uint64_t p;
1089 Object *o;
1090
1091 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1092 if (r < 0)
cf244689 1093 return r;
cec736d2
LP
1094
1095 xor_hash ^= le64toh(o->data.hash);
1096 items[i].object_offset = htole64(p);
de7b95cd 1097 items[i].hash = o->data.hash;
cec736d2
LP
1098 }
1099
de190aef 1100 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1101
50f20cfd
LP
1102 journal_file_post_change(f);
1103
cec736d2
LP
1104 return r;
1105}
1106
de190aef
LP
1107static int generic_array_get(JournalFile *f,
1108 uint64_t first,
1109 uint64_t i,
1110 Object **ret, uint64_t *offset) {
1111
cec736d2 1112 Object *o;
6c8a39b8 1113 uint64_t p = 0, a;
cec736d2
LP
1114 int r;
1115
1116 assert(f);
1117
de190aef
LP
1118 a = first;
1119 while (a > 0) {
1120 uint64_t n;
cec736d2 1121
de190aef
LP
1122 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1123 if (r < 0)
1124 return r;
cec736d2 1125
de190aef
LP
1126 n = journal_file_entry_array_n_items(o);
1127 if (i < n) {
1128 p = le64toh(o->entry_array.items[i]);
1129 break;
cec736d2
LP
1130 }
1131
de190aef
LP
1132 i -= n;
1133 a = le64toh(o->entry_array.next_entry_array_offset);
1134 }
1135
1136 if (a <= 0 || p <= 0)
1137 return 0;
1138
1139 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1140 if (r < 0)
1141 return r;
1142
1143 if (ret)
1144 *ret = o;
1145
1146 if (offset)
1147 *offset = p;
1148
1149 return 1;
1150}
1151
1152static int generic_array_get_plus_one(JournalFile *f,
1153 uint64_t extra,
1154 uint64_t first,
1155 uint64_t i,
1156 Object **ret, uint64_t *offset) {
1157
1158 Object *o;
1159
1160 assert(f);
1161
1162 if (i == 0) {
1163 int r;
1164
1165 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1166 if (r < 0)
1167 return r;
1168
de190aef
LP
1169 if (ret)
1170 *ret = o;
cec736d2 1171
de190aef
LP
1172 if (offset)
1173 *offset = extra;
cec736d2 1174
de190aef 1175 return 1;
cec736d2
LP
1176 }
1177
de190aef
LP
1178 return generic_array_get(f, first, i-1, ret, offset);
1179}
cec736d2 1180
de190aef
LP
1181enum {
1182 TEST_FOUND,
1183 TEST_LEFT,
1184 TEST_RIGHT
1185};
cec736d2 1186
de190aef
LP
1187static int generic_array_bisect(JournalFile *f,
1188 uint64_t first,
1189 uint64_t n,
1190 uint64_t needle,
1191 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1192 direction_t direction,
1193 Object **ret,
1194 uint64_t *offset,
1195 uint64_t *idx) {
1196
1197 uint64_t a, p, t = 0, i = 0, last_p = 0;
1198 bool subtract_one = false;
1199 Object *o, *array = NULL;
1200 int r;
cec736d2 1201
de190aef
LP
1202 assert(f);
1203 assert(test_object);
cec736d2 1204
de190aef
LP
1205 a = first;
1206 while (a > 0) {
1207 uint64_t left, right, k, lp;
1208
1209 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1210 if (r < 0)
1211 return r;
1212
de190aef
LP
1213 k = journal_file_entry_array_n_items(array);
1214 right = MIN(k, n);
1215 if (right <= 0)
1216 return 0;
cec736d2 1217
de190aef
LP
1218 i = right - 1;
1219 lp = p = le64toh(array->entry_array.items[i]);
1220 if (p <= 0)
1221 return -EBADMSG;
cec736d2 1222
de190aef
LP
1223 r = test_object(f, p, needle);
1224 if (r < 0)
1225 return r;
cec736d2 1226
de190aef
LP
1227 if (r == TEST_FOUND)
1228 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1229
1230 if (r == TEST_RIGHT) {
1231 left = 0;
1232 right -= 1;
1233 for (;;) {
1234 if (left == right) {
1235 if (direction == DIRECTION_UP)
1236 subtract_one = true;
1237
1238 i = left;
1239 goto found;
1240 }
1241
1242 assert(left < right);
1243
1244 i = (left + right) / 2;
1245 p = le64toh(array->entry_array.items[i]);
1246 if (p <= 0)
1247 return -EBADMSG;
1248
1249 r = test_object(f, p, needle);
1250 if (r < 0)
1251 return r;
cec736d2 1252
de190aef
LP
1253 if (r == TEST_FOUND)
1254 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1255
1256 if (r == TEST_RIGHT)
1257 right = i;
1258 else
1259 left = i + 1;
1260 }
1261 }
1262
cbdca852
LP
1263 if (k > n) {
1264 if (direction == DIRECTION_UP) {
1265 i = n;
1266 subtract_one = true;
1267 goto found;
1268 }
1269
cec736d2 1270 return 0;
cbdca852 1271 }
cec736d2 1272
de190aef
LP
1273 last_p = lp;
1274
1275 n -= k;
1276 t += k;
1277 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1278 }
1279
1280 return 0;
de190aef
LP
1281
1282found:
1283 if (subtract_one && t == 0 && i == 0)
1284 return 0;
1285
1286 if (subtract_one && i == 0)
1287 p = last_p;
1288 else if (subtract_one)
1289 p = le64toh(array->entry_array.items[i-1]);
1290 else
1291 p = le64toh(array->entry_array.items[i]);
1292
1293 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1294 if (r < 0)
1295 return r;
1296
1297 if (ret)
1298 *ret = o;
1299
1300 if (offset)
1301 *offset = p;
1302
1303 if (idx)
cbdca852 1304 *idx = t + i + (subtract_one ? -1 : 0);
de190aef
LP
1305
1306 return 1;
cec736d2
LP
1307}
1308
de190aef
LP
1309static int generic_array_bisect_plus_one(JournalFile *f,
1310 uint64_t extra,
1311 uint64_t first,
1312 uint64_t n,
1313 uint64_t needle,
1314 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1315 direction_t direction,
1316 Object **ret,
1317 uint64_t *offset,
1318 uint64_t *idx) {
1319
cec736d2 1320 int r;
cbdca852
LP
1321 bool step_back = false;
1322 Object *o;
cec736d2
LP
1323
1324 assert(f);
de190aef 1325 assert(test_object);
cec736d2 1326
de190aef
LP
1327 if (n <= 0)
1328 return 0;
cec736d2 1329
de190aef
LP
1330 /* This bisects the array in object 'first', but first checks
1331 * an extra */
de190aef
LP
1332 r = test_object(f, extra, needle);
1333 if (r < 0)
1334 return r;
a536e261
LP
1335
1336 if (r == TEST_FOUND)
1337 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1338
cbdca852
LP
1339 /* if we are looking with DIRECTION_UP then we need to first
1340 see if in the actual array there is a matching entry, and
1341 return the last one of that. But if there isn't any we need
1342 to return this one. Hence remember this, and return it
1343 below. */
1344 if (r == TEST_LEFT)
1345 step_back = direction == DIRECTION_UP;
de190aef 1346
cbdca852
LP
1347 if (r == TEST_RIGHT) {
1348 if (direction == DIRECTION_DOWN)
1349 goto found;
1350 else
1351 return 0;
a536e261 1352 }
cec736d2 1353
de190aef
LP
1354 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1355
cbdca852
LP
1356 if (r == 0 && step_back)
1357 goto found;
1358
ecf68b1d 1359 if (r > 0 && idx)
de190aef
LP
1360 (*idx) ++;
1361
1362 return r;
cbdca852
LP
1363
1364found:
1365 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1366 if (r < 0)
1367 return r;
1368
1369 if (ret)
1370 *ret = o;
1371
1372 if (offset)
1373 *offset = extra;
1374
1375 if (idx)
1376 *idx = 0;
1377
1378 return 1;
1379}
1380
1381static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1382 assert(f);
1383 assert(p > 0);
1384
1385 if (p == needle)
1386 return TEST_FOUND;
1387 else if (p < needle)
1388 return TEST_LEFT;
1389 else
1390 return TEST_RIGHT;
1391}
1392
1393int journal_file_move_to_entry_by_offset(
1394 JournalFile *f,
1395 uint64_t p,
1396 direction_t direction,
1397 Object **ret,
1398 uint64_t *offset) {
1399
1400 return generic_array_bisect(f,
1401 le64toh(f->header->entry_array_offset),
1402 le64toh(f->header->n_entries),
1403 p,
1404 test_object_offset,
1405 direction,
1406 ret, offset, NULL);
de190aef
LP
1407}
1408
cbdca852 1409
de190aef
LP
1410static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1411 Object *o;
1412 int r;
1413
1414 assert(f);
1415 assert(p > 0);
1416
1417 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1418 if (r < 0)
1419 return r;
1420
de190aef
LP
1421 if (le64toh(o->entry.seqnum) == needle)
1422 return TEST_FOUND;
1423 else if (le64toh(o->entry.seqnum) < needle)
1424 return TEST_LEFT;
1425 else
1426 return TEST_RIGHT;
1427}
cec736d2 1428
de190aef
LP
1429int journal_file_move_to_entry_by_seqnum(
1430 JournalFile *f,
1431 uint64_t seqnum,
1432 direction_t direction,
1433 Object **ret,
1434 uint64_t *offset) {
1435
1436 return generic_array_bisect(f,
1437 le64toh(f->header->entry_array_offset),
1438 le64toh(f->header->n_entries),
1439 seqnum,
1440 test_object_seqnum,
1441 direction,
1442 ret, offset, NULL);
1443}
cec736d2 1444
de190aef
LP
1445static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1446 Object *o;
1447 int r;
1448
1449 assert(f);
1450 assert(p > 0);
1451
1452 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1453 if (r < 0)
1454 return r;
1455
1456 if (le64toh(o->entry.realtime) == needle)
1457 return TEST_FOUND;
1458 else if (le64toh(o->entry.realtime) < needle)
1459 return TEST_LEFT;
1460 else
1461 return TEST_RIGHT;
cec736d2
LP
1462}
1463
de190aef
LP
1464int journal_file_move_to_entry_by_realtime(
1465 JournalFile *f,
1466 uint64_t realtime,
1467 direction_t direction,
1468 Object **ret,
1469 uint64_t *offset) {
1470
1471 return generic_array_bisect(f,
1472 le64toh(f->header->entry_array_offset),
1473 le64toh(f->header->n_entries),
1474 realtime,
1475 test_object_realtime,
1476 direction,
1477 ret, offset, NULL);
1478}
1479
1480static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1481 Object *o;
1482 int r;
1483
1484 assert(f);
1485 assert(p > 0);
1486
1487 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1488 if (r < 0)
1489 return r;
1490
1491 if (le64toh(o->entry.monotonic) == needle)
1492 return TEST_FOUND;
1493 else if (le64toh(o->entry.monotonic) < needle)
1494 return TEST_LEFT;
1495 else
1496 return TEST_RIGHT;
1497}
1498
1499int journal_file_move_to_entry_by_monotonic(
1500 JournalFile *f,
1501 sd_id128_t boot_id,
1502 uint64_t monotonic,
1503 direction_t direction,
1504 Object **ret,
1505 uint64_t *offset) {
1506
10b6f904 1507 char t[9+32+1] = "_BOOT_ID=";
de190aef
LP
1508 Object *o;
1509 int r;
1510
cbdca852 1511 assert(f);
de190aef 1512
cbdca852 1513 sd_id128_to_string(boot_id, t + 9);
de190aef
LP
1514 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1515 if (r < 0)
1516 return r;
cbdca852 1517 if (r == 0)
de190aef
LP
1518 return -ENOENT;
1519
1520 return generic_array_bisect_plus_one(f,
1521 le64toh(o->data.entry_offset),
1522 le64toh(o->data.entry_array_offset),
1523 le64toh(o->data.n_entries),
1524 monotonic,
1525 test_object_monotonic,
1526 direction,
1527 ret, offset, NULL);
1528}
1529
de190aef
LP
1530int journal_file_next_entry(
1531 JournalFile *f,
1532 Object *o, uint64_t p,
1533 direction_t direction,
1534 Object **ret, uint64_t *offset) {
1535
1536 uint64_t i, n;
cec736d2
LP
1537 int r;
1538
1539 assert(f);
de190aef
LP
1540 assert(p > 0 || !o);
1541
1542 n = le64toh(f->header->n_entries);
1543 if (n <= 0)
1544 return 0;
cec736d2
LP
1545
1546 if (!o)
de190aef 1547 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1548 else {
de190aef 1549 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1550 return -EINVAL;
1551
de190aef
LP
1552 r = generic_array_bisect(f,
1553 le64toh(f->header->entry_array_offset),
1554 le64toh(f->header->n_entries),
1555 p,
1556 test_object_offset,
1557 DIRECTION_DOWN,
1558 NULL, NULL,
1559 &i);
1560 if (r <= 0)
1561 return r;
1562
1563 if (direction == DIRECTION_DOWN) {
1564 if (i >= n - 1)
1565 return 0;
1566
1567 i++;
1568 } else {
1569 if (i <= 0)
1570 return 0;
1571
1572 i--;
1573 }
cec736d2
LP
1574 }
1575
de190aef
LP
1576 /* And jump to it */
1577 return generic_array_get(f,
1578 le64toh(f->header->entry_array_offset),
1579 i,
1580 ret, offset);
1581}
cec736d2 1582
de190aef
LP
1583int journal_file_skip_entry(
1584 JournalFile *f,
1585 Object *o, uint64_t p,
1586 int64_t skip,
1587 Object **ret, uint64_t *offset) {
1588
1589 uint64_t i, n;
1590 int r;
1591
1592 assert(f);
1593 assert(o);
1594 assert(p > 0);
1595
1596 if (o->object.type != OBJECT_ENTRY)
1597 return -EINVAL;
1598
1599 r = generic_array_bisect(f,
1600 le64toh(f->header->entry_array_offset),
1601 le64toh(f->header->n_entries),
1602 p,
1603 test_object_offset,
1604 DIRECTION_DOWN,
1605 NULL, NULL,
1606 &i);
1607 if (r <= 0)
cec736d2
LP
1608 return r;
1609
de190aef
LP
1610 /* Calculate new index */
1611 if (skip < 0) {
1612 if ((uint64_t) -skip >= i)
1613 i = 0;
1614 else
1615 i = i - (uint64_t) -skip;
1616 } else
1617 i += (uint64_t) skip;
cec736d2 1618
de190aef
LP
1619 n = le64toh(f->header->n_entries);
1620 if (n <= 0)
1621 return -EBADMSG;
cec736d2 1622
de190aef
LP
1623 if (i >= n)
1624 i = n-1;
1625
1626 return generic_array_get(f,
1627 le64toh(f->header->entry_array_offset),
1628 i,
1629 ret, offset);
cec736d2
LP
1630}
1631
de190aef
LP
1632int journal_file_next_entry_for_data(
1633 JournalFile *f,
1634 Object *o, uint64_t p,
1635 uint64_t data_offset,
1636 direction_t direction,
1637 Object **ret, uint64_t *offset) {
1638
1639 uint64_t n, i;
cec736d2 1640 int r;
de190aef 1641 Object *d;
cec736d2
LP
1642
1643 assert(f);
de190aef 1644 assert(p > 0 || !o);
cec736d2 1645
de190aef 1646 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 1647 if (r < 0)
de190aef 1648 return r;
cec736d2 1649
de190aef
LP
1650 n = le64toh(d->data.n_entries);
1651 if (n <= 0)
1652 return n;
cec736d2 1653
de190aef
LP
1654 if (!o)
1655 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1656 else {
1657 if (o->object.type != OBJECT_ENTRY)
1658 return -EINVAL;
cec736d2 1659
de190aef
LP
1660 r = generic_array_bisect_plus_one(f,
1661 le64toh(d->data.entry_offset),
1662 le64toh(d->data.entry_array_offset),
1663 le64toh(d->data.n_entries),
1664 p,
1665 test_object_offset,
1666 DIRECTION_DOWN,
1667 NULL, NULL,
1668 &i);
1669
1670 if (r <= 0)
cec736d2
LP
1671 return r;
1672
de190aef
LP
1673 if (direction == DIRECTION_DOWN) {
1674 if (i >= n - 1)
1675 return 0;
cec736d2 1676
de190aef
LP
1677 i++;
1678 } else {
1679 if (i <= 0)
1680 return 0;
cec736d2 1681
de190aef
LP
1682 i--;
1683 }
cec736d2 1684
de190aef 1685 }
cec736d2 1686
de190aef
LP
1687 return generic_array_get_plus_one(f,
1688 le64toh(d->data.entry_offset),
1689 le64toh(d->data.entry_array_offset),
1690 i,
1691 ret, offset);
1692}
cec736d2 1693
cbdca852
LP
1694int journal_file_move_to_entry_by_offset_for_data(
1695 JournalFile *f,
1696 uint64_t data_offset,
1697 uint64_t p,
1698 direction_t direction,
1699 Object **ret, uint64_t *offset) {
1700
1701 int r;
1702 Object *d;
1703
1704 assert(f);
1705
1706 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1707 if (r < 0)
1708 return r;
1709
1710 return generic_array_bisect_plus_one(f,
1711 le64toh(d->data.entry_offset),
1712 le64toh(d->data.entry_array_offset),
1713 le64toh(d->data.n_entries),
1714 p,
1715 test_object_offset,
1716 direction,
1717 ret, offset, NULL);
1718}
1719
1720int journal_file_move_to_entry_by_monotonic_for_data(
1721 JournalFile *f,
1722 uint64_t data_offset,
1723 sd_id128_t boot_id,
1724 uint64_t monotonic,
1725 direction_t direction,
1726 Object **ret, uint64_t *offset) {
1727
1728 char t[9+32+1] = "_BOOT_ID=";
1729 Object *o, *d;
1730 int r;
1731 uint64_t b, z;
1732
1733 assert(f);
1734
1735 /* First, seek by time */
1736 sd_id128_to_string(boot_id, t + 9);
1737 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1738 if (r < 0)
1739 return r;
1740 if (r == 0)
1741 return -ENOENT;
1742
1743 r = generic_array_bisect_plus_one(f,
1744 le64toh(o->data.entry_offset),
1745 le64toh(o->data.entry_array_offset),
1746 le64toh(o->data.n_entries),
1747 monotonic,
1748 test_object_monotonic,
1749 direction,
1750 NULL, &z, NULL);
1751 if (r <= 0)
1752 return r;
1753
1754 /* And now, continue seeking until we find an entry that
1755 * exists in both bisection arrays */
1756
1757 for (;;) {
1758 Object *qo;
1759 uint64_t p, q;
1760
1761 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1762 if (r < 0)
1763 return r;
1764
1765 r = generic_array_bisect_plus_one(f,
1766 le64toh(d->data.entry_offset),
1767 le64toh(d->data.entry_array_offset),
1768 le64toh(d->data.n_entries),
1769 z,
1770 test_object_offset,
1771 direction,
1772 NULL, &p, NULL);
1773 if (r <= 0)
1774 return r;
1775
1776 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1777 if (r < 0)
1778 return r;
1779
1780 r = generic_array_bisect_plus_one(f,
1781 le64toh(o->data.entry_offset),
1782 le64toh(o->data.entry_array_offset),
1783 le64toh(o->data.n_entries),
1784 p,
1785 test_object_offset,
1786 direction,
1787 &qo, &q, NULL);
1788
1789 if (r <= 0)
1790 return r;
1791
1792 if (p == q) {
1793 if (ret)
1794 *ret = qo;
1795 if (offset)
1796 *offset = q;
1797
1798 return 1;
1799 }
1800
1801 z = q;
1802 }
1803
1804 return 0;
1805}
1806
de190aef
LP
1807int journal_file_move_to_entry_by_seqnum_for_data(
1808 JournalFile *f,
1809 uint64_t data_offset,
1810 uint64_t seqnum,
1811 direction_t direction,
1812 Object **ret, uint64_t *offset) {
cec736d2 1813
de190aef
LP
1814 Object *d;
1815 int r;
cec736d2 1816
91a31dde
LP
1817 assert(f);
1818
de190aef 1819 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1820 if (r < 0)
de190aef 1821 return r;
cec736d2 1822
de190aef
LP
1823 return generic_array_bisect_plus_one(f,
1824 le64toh(d->data.entry_offset),
1825 le64toh(d->data.entry_array_offset),
1826 le64toh(d->data.n_entries),
1827 seqnum,
1828 test_object_seqnum,
1829 direction,
1830 ret, offset, NULL);
1831}
cec736d2 1832
de190aef
LP
1833int journal_file_move_to_entry_by_realtime_for_data(
1834 JournalFile *f,
1835 uint64_t data_offset,
1836 uint64_t realtime,
1837 direction_t direction,
1838 Object **ret, uint64_t *offset) {
1839
1840 Object *d;
1841 int r;
1842
91a31dde
LP
1843 assert(f);
1844
de190aef 1845 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1846 if (r < 0)
de190aef
LP
1847 return r;
1848
1849 return generic_array_bisect_plus_one(f,
1850 le64toh(d->data.entry_offset),
1851 le64toh(d->data.entry_array_offset),
1852 le64toh(d->data.n_entries),
1853 realtime,
1854 test_object_realtime,
1855 direction,
1856 ret, offset, NULL);
cec736d2
LP
1857}
1858
0284adc6 1859void journal_file_dump(JournalFile *f) {
7560fffc 1860 Object *o;
7560fffc 1861 int r;
0284adc6 1862 uint64_t p;
7560fffc
LP
1863
1864 assert(f);
1865
0284adc6 1866 journal_file_print_header(f);
7560fffc 1867
0284adc6
LP
1868 p = le64toh(f->header->header_size);
1869 while (p != 0) {
1870 r = journal_file_move_to_object(f, -1, p, &o);
1871 if (r < 0)
1872 goto fail;
7560fffc 1873
0284adc6 1874 switch (o->object.type) {
d98cc1f2 1875
0284adc6
LP
1876 case OBJECT_UNUSED:
1877 printf("Type: OBJECT_UNUSED\n");
1878 break;
d98cc1f2 1879
0284adc6
LP
1880 case OBJECT_DATA:
1881 printf("Type: OBJECT_DATA\n");
1882 break;
7560fffc 1883
0284adc6 1884 case OBJECT_ENTRY:
f7fab8a5 1885 printf("Type: OBJECT_ENTRY seqnum=%llu monotonic=%llu realtime=%llu\n",
0284adc6
LP
1886 (unsigned long long) le64toh(o->entry.seqnum),
1887 (unsigned long long) le64toh(o->entry.monotonic),
1888 (unsigned long long) le64toh(o->entry.realtime));
1889 break;
7560fffc 1890
0284adc6
LP
1891 case OBJECT_FIELD_HASH_TABLE:
1892 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1893 break;
7560fffc 1894
0284adc6
LP
1895 case OBJECT_DATA_HASH_TABLE:
1896 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1897 break;
7560fffc 1898
0284adc6
LP
1899 case OBJECT_ENTRY_ARRAY:
1900 printf("Type: OBJECT_ENTRY_ARRAY\n");
1901 break;
7560fffc 1902
0284adc6 1903 case OBJECT_TAG:
f7fab8a5
LP
1904 printf("Type: OBJECT_TAG seqnum=%llu epoch=%llu\n",
1905 (unsigned long long) le64toh(o->tag.seqnum),
1906 (unsigned long long) le64toh(o->tag.epoch));
0284adc6
LP
1907 break;
1908 }
7560fffc 1909
0284adc6
LP
1910 if (o->object.flags & OBJECT_COMPRESSED)
1911 printf("Flags: COMPRESSED\n");
7560fffc 1912
0284adc6
LP
1913 if (p == le64toh(f->header->tail_object_offset))
1914 p = 0;
1915 else
1916 p = p + ALIGN64(le64toh(o->object.size));
1917 }
7560fffc 1918
0284adc6
LP
1919 return;
1920fail:
1921 log_error("File corrupt");
7560fffc
LP
1922}
1923
0284adc6
LP
1924void journal_file_print_header(JournalFile *f) {
1925 char a[33], b[33], c[33];
1926 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
a1a03e30
LP
1927 struct stat st;
1928 char bytes[FORMAT_BYTES_MAX];
7560fffc
LP
1929
1930 assert(f);
7560fffc 1931
0284adc6
LP
1932 printf("File Path: %s\n"
1933 "File ID: %s\n"
1934 "Machine ID: %s\n"
1935 "Boot ID: %s\n"
1936 "Sequential Number ID: %s\n"
1937 "State: %s\n"
1938 "Compatible Flags:%s%s\n"
1939 "Incompatible Flags:%s%s\n"
1940 "Header size: %llu\n"
1941 "Arena size: %llu\n"
1942 "Data Hash Table Size: %llu\n"
1943 "Field Hash Table Size: %llu\n"
0284adc6
LP
1944 "Rotate Suggested: %s\n"
1945 "Head Sequential Number: %llu\n"
1946 "Tail Sequential Number: %llu\n"
1947 "Head Realtime Timestamp: %s\n"
3223f44f
LP
1948 "Tail Realtime Timestamp: %s\n"
1949 "Objects: %llu\n"
1950 "Entry Objects: %llu\n",
0284adc6
LP
1951 f->path,
1952 sd_id128_to_string(f->header->file_id, a),
1953 sd_id128_to_string(f->header->machine_id, b),
1954 sd_id128_to_string(f->header->boot_id, c),
1955 sd_id128_to_string(f->header->seqnum_id, c),
3223f44f
LP
1956 f->header->state == STATE_OFFLINE ? "OFFLINE" :
1957 f->header->state == STATE_ONLINE ? "ONLINE" :
1958 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
8088cbd3
LP
1959 JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
1960 (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
1961 JOURNAL_HEADER_COMPRESSED(f->header) ? " COMPRESSED" : "",
1962 (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
0284adc6
LP
1963 (unsigned long long) le64toh(f->header->header_size),
1964 (unsigned long long) le64toh(f->header->arena_size),
1965 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1966 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
0284adc6
LP
1967 yes_no(journal_file_rotate_suggested(f)),
1968 (unsigned long long) le64toh(f->header->head_entry_seqnum),
1969 (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1970 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
3223f44f
LP
1971 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1972 (unsigned long long) le64toh(f->header->n_objects),
1973 (unsigned long long) le64toh(f->header->n_entries));
7560fffc 1974
0284adc6
LP
1975 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1976 printf("Data Objects: %llu\n"
1977 "Data Hash Table Fill: %.1f%%\n",
1978 (unsigned long long) le64toh(f->header->n_data),
1979 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
7560fffc 1980
0284adc6
LP
1981 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1982 printf("Field Objects: %llu\n"
1983 "Field Hash Table Fill: %.1f%%\n",
1984 (unsigned long long) le64toh(f->header->n_fields),
1985 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
3223f44f
LP
1986
1987 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
1988 printf("Tag Objects: %llu\n",
1989 (unsigned long long) le64toh(f->header->n_tags));
1990 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1991 printf("Entry Array Objects: %llu\n",
1992 (unsigned long long) le64toh(f->header->n_entry_arrays));
a1a03e30
LP
1993
1994 if (fstat(f->fd, &st) >= 0)
1995 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
7560fffc
LP
1996}
1997
0284adc6
LP
1998int journal_file_open(
1999 const char *fname,
2000 int flags,
2001 mode_t mode,
2002 bool compress,
baed47c3 2003 bool seal,
0284adc6
LP
2004 JournalMetrics *metrics,
2005 MMapCache *mmap_cache,
2006 JournalFile *template,
2007 JournalFile **ret) {
7560fffc 2008
0284adc6
LP
2009 JournalFile *f;
2010 int r;
2011 bool newly_created = false;
7560fffc 2012
0284adc6 2013 assert(fname);
0559d3a5 2014 assert(ret);
7560fffc 2015
0284adc6
LP
2016 if ((flags & O_ACCMODE) != O_RDONLY &&
2017 (flags & O_ACCMODE) != O_RDWR)
2018 return -EINVAL;
7560fffc 2019
a0108012
LP
2020 if (!endswith(fname, ".journal") &&
2021 !endswith(fname, ".journal~"))
0284adc6 2022 return -EINVAL;
7560fffc 2023
0284adc6
LP
2024 f = new0(JournalFile, 1);
2025 if (!f)
2026 return -ENOMEM;
7560fffc 2027
0284adc6
LP
2028 f->fd = -1;
2029 f->mode = mode;
7560fffc 2030
0284adc6
LP
2031 f->flags = flags;
2032 f->prot = prot_from_flags(flags);
2033 f->writable = (flags & O_ACCMODE) != O_RDONLY;
48b61739 2034#ifdef HAVE_XZ
0284adc6 2035 f->compress = compress;
48b61739 2036#endif
49a32d43 2037#ifdef HAVE_GCRYPT
baed47c3 2038 f->seal = seal;
49a32d43 2039#endif
7560fffc 2040
0284adc6
LP
2041 if (mmap_cache)
2042 f->mmap = mmap_cache_ref(mmap_cache);
2043 else {
84168d80 2044 f->mmap = mmap_cache_new();
0284adc6
LP
2045 if (!f->mmap) {
2046 r = -ENOMEM;
2047 goto fail;
2048 }
2049 }
7560fffc 2050
0284adc6
LP
2051 f->path = strdup(fname);
2052 if (!f->path) {
2053 r = -ENOMEM;
2054 goto fail;
2055 }
7560fffc 2056
0284adc6
LP
2057 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2058 if (f->fd < 0) {
2059 r = -errno;
2060 goto fail;
7560fffc 2061 }
7560fffc 2062
0284adc6
LP
2063 if (fstat(f->fd, &f->last_stat) < 0) {
2064 r = -errno;
2065 goto fail;
2066 }
7560fffc 2067
0284adc6
LP
2068 if (f->last_stat.st_size == 0 && f->writable) {
2069 newly_created = true;
7560fffc 2070
feb12d3e 2071#ifdef HAVE_GCRYPT
0284adc6 2072 /* Try to load the FSPRG state, and if we can't, then
baed47c3 2073 * just don't do sealing */
49a32d43
LP
2074 if (f->seal) {
2075 r = journal_file_fss_load(f);
2076 if (r < 0)
2077 f->seal = false;
2078 }
feb12d3e 2079#endif
7560fffc 2080
0284adc6
LP
2081 r = journal_file_init_header(f, template);
2082 if (r < 0)
2083 goto fail;
7560fffc 2084
0284adc6
LP
2085 if (fstat(f->fd, &f->last_stat) < 0) {
2086 r = -errno;
2087 goto fail;
2088 }
2089 }
7560fffc 2090
0284adc6
LP
2091 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2092 r = -EIO;
2093 goto fail;
2094 }
7560fffc 2095
0284adc6
LP
2096 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2097 if (f->header == MAP_FAILED) {
2098 f->header = NULL;
2099 r = -errno;
2100 goto fail;
2101 }
7560fffc 2102
0284adc6
LP
2103 if (!newly_created) {
2104 r = journal_file_verify_header(f);
2105 if (r < 0)
2106 goto fail;
2107 }
7560fffc 2108
feb12d3e 2109#ifdef HAVE_GCRYPT
0284adc6 2110 if (!newly_created && f->writable) {
baed47c3 2111 r = journal_file_fss_load(f);
0284adc6
LP
2112 if (r < 0)
2113 goto fail;
2114 }
feb12d3e 2115#endif
cec736d2
LP
2116
2117 if (f->writable) {
4a92baf3
LP
2118 if (metrics) {
2119 journal_default_metrics(metrics, f->fd);
2120 f->metrics = *metrics;
2121 } else if (template)
2122 f->metrics = template->metrics;
2123
cec736d2
LP
2124 r = journal_file_refresh_header(f);
2125 if (r < 0)
2126 goto fail;
2127 }
2128
feb12d3e 2129#ifdef HAVE_GCRYPT
baed47c3 2130 r = journal_file_hmac_setup(f);
14d10188
LP
2131 if (r < 0)
2132 goto fail;
feb12d3e 2133#endif
14d10188 2134
cec736d2 2135 if (newly_created) {
de190aef 2136 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
2137 if (r < 0)
2138 goto fail;
2139
de190aef 2140 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
2141 if (r < 0)
2142 goto fail;
7560fffc 2143
feb12d3e 2144#ifdef HAVE_GCRYPT
7560fffc
LP
2145 r = journal_file_append_first_tag(f);
2146 if (r < 0)
2147 goto fail;
feb12d3e 2148#endif
cec736d2
LP
2149 }
2150
de190aef 2151 r = journal_file_map_field_hash_table(f);
cec736d2
LP
2152 if (r < 0)
2153 goto fail;
2154
de190aef 2155 r = journal_file_map_data_hash_table(f);
cec736d2
LP
2156 if (r < 0)
2157 goto fail;
2158
0559d3a5 2159 *ret = f;
cec736d2
LP
2160 return 0;
2161
2162fail:
2163 journal_file_close(f);
2164
2165 return r;
2166}
0ac38b70 2167
baed47c3 2168int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
0ac38b70
LP
2169 char *p;
2170 size_t l;
2171 JournalFile *old_file, *new_file = NULL;
2172 int r;
2173
2174 assert(f);
2175 assert(*f);
2176
2177 old_file = *f;
2178
2179 if (!old_file->writable)
2180 return -EINVAL;
2181
2182 if (!endswith(old_file->path, ".journal"))
2183 return -EINVAL;
2184
2185 l = strlen(old_file->path);
2186
9447a7f1 2187 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
0ac38b70
LP
2188 if (!p)
2189 return -ENOMEM;
2190
2191 memcpy(p, old_file->path, l - 8);
2192 p[l-8] = '@';
2193 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2194 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2195 "-%016llx-%016llx.journal",
beec0085 2196 (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
0ac38b70
LP
2197 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2198
2199 r = rename(old_file->path, p);
2200 free(p);
2201
2202 if (r < 0)
2203 return -errno;
2204
ccdbaf91 2205 old_file->header->state = STATE_ARCHIVED;
0ac38b70 2206
baed47c3 2207 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
0ac38b70
LP
2208 journal_file_close(old_file);
2209
2210 *f = new_file;
2211 return r;
2212}
2213
9447a7f1
LP
2214int journal_file_open_reliably(
2215 const char *fname,
2216 int flags,
2217 mode_t mode,
7560fffc 2218 bool compress,
baed47c3 2219 bool seal,
4a92baf3 2220 JournalMetrics *metrics,
27370278 2221 MMapCache *mmap_cache,
9447a7f1
LP
2222 JournalFile *template,
2223 JournalFile **ret) {
2224
2225 int r;
2226 size_t l;
2227 char *p;
2228
baed47c3 2229 r = journal_file_open(fname, flags, mode, compress, seal,
27370278 2230 metrics, mmap_cache, template, ret);
0071d9f1
LP
2231 if (r != -EBADMSG && /* corrupted */
2232 r != -ENODATA && /* truncated */
2233 r != -EHOSTDOWN && /* other machine */
a1a1898f
LP
2234 r != -EPROTONOSUPPORT && /* incompatible feature */
2235 r != -EBUSY && /* unclean shutdown */
2236 r != -ESHUTDOWN /* already archived */)
9447a7f1
LP
2237 return r;
2238
2239 if ((flags & O_ACCMODE) == O_RDONLY)
2240 return r;
2241
2242 if (!(flags & O_CREAT))
2243 return r;
2244
7560fffc
LP
2245 if (!endswith(fname, ".journal"))
2246 return r;
2247
5c70eab4
LP
2248 /* The file is corrupted. Rotate it away and try it again (but only once) */
2249
9447a7f1
LP
2250 l = strlen(fname);
2251 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2252 (int) (l-8), fname,
2253 (unsigned long long) now(CLOCK_REALTIME),
2254 random_ull()) < 0)
2255 return -ENOMEM;
2256
2257 r = rename(fname, p);
2258 free(p);
2259 if (r < 0)
2260 return -errno;
2261
a1a1898f 2262 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
9447a7f1 2263
baed47c3 2264 return journal_file_open(fname, flags, mode, compress, seal,
27370278 2265 metrics, mmap_cache, template, ret);
9447a7f1
LP
2266}
2267
cf244689
LP
2268
2269int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2270 uint64_t i, n;
2271 uint64_t q, xor_hash = 0;
2272 int r;
2273 EntryItem *items;
2274 dual_timestamp ts;
2275
2276 assert(from);
2277 assert(to);
2278 assert(o);
2279 assert(p);
2280
2281 if (!to->writable)
2282 return -EPERM;
2283
2284 ts.monotonic = le64toh(o->entry.monotonic);
2285 ts.realtime = le64toh(o->entry.realtime);
2286
2287 if (to->tail_entry_monotonic_valid &&
2288 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2289 return -EINVAL;
2290
cf244689
LP
2291 n = journal_file_entry_n_items(o);
2292 items = alloca(sizeof(EntryItem) * n);
2293
2294 for (i = 0; i < n; i++) {
4fd052ae
FC
2295 uint64_t l, h;
2296 le64_t le_hash;
cf244689
LP
2297 size_t t;
2298 void *data;
2299 Object *u;
2300
2301 q = le64toh(o->entry.items[i].object_offset);
2302 le_hash = o->entry.items[i].hash;
2303
2304 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2305 if (r < 0)
2306 return r;
2307
2308 if (le_hash != o->data.hash)
2309 return -EBADMSG;
2310
2311 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2312 t = (size_t) l;
2313
2314 /* We hit the limit on 32bit machines */
2315 if ((uint64_t) t != l)
2316 return -E2BIG;
2317
2318 if (o->object.flags & OBJECT_COMPRESSED) {
2319#ifdef HAVE_XZ
2320 uint64_t rsize;
2321
2322 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2323 return -EBADMSG;
2324
2325 data = from->compress_buffer;
2326 l = rsize;
2327#else
2328 return -EPROTONOSUPPORT;
2329#endif
2330 } else
2331 data = o->data.payload;
2332
2333 r = journal_file_append_data(to, data, l, &u, &h);
2334 if (r < 0)
2335 return r;
2336
2337 xor_hash ^= le64toh(u->data.hash);
2338 items[i].object_offset = htole64(h);
2339 items[i].hash = u->data.hash;
2340
2341 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2342 if (r < 0)
2343 return r;
2344 }
2345
2346 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2347}
babfc091
LP
2348
2349void journal_default_metrics(JournalMetrics *m, int fd) {
2350 uint64_t fs_size = 0;
2351 struct statvfs ss;
a7bc2c2a 2352 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2353
2354 assert(m);
2355 assert(fd >= 0);
2356
2357 if (fstatvfs(fd, &ss) >= 0)
2358 fs_size = ss.f_frsize * ss.f_blocks;
2359
2360 if (m->max_use == (uint64_t) -1) {
2361
2362 if (fs_size > 0) {
2363 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2364
2365 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2366 m->max_use = DEFAULT_MAX_USE_UPPER;
2367
2368 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2369 m->max_use = DEFAULT_MAX_USE_LOWER;
2370 } else
2371 m->max_use = DEFAULT_MAX_USE_LOWER;
2372 } else {
2373 m->max_use = PAGE_ALIGN(m->max_use);
2374
2375 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2376 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2377 }
2378
2379 if (m->max_size == (uint64_t) -1) {
2380 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2381
2382 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2383 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2384 } else
2385 m->max_size = PAGE_ALIGN(m->max_size);
2386
2387 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2388 m->max_size = JOURNAL_FILE_SIZE_MIN;
2389
2390 if (m->max_size*2 > m->max_use)
2391 m->max_use = m->max_size*2;
2392
2393 if (m->min_size == (uint64_t) -1)
2394 m->min_size = JOURNAL_FILE_SIZE_MIN;
2395 else {
2396 m->min_size = PAGE_ALIGN(m->min_size);
2397
2398 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2399 m->min_size = JOURNAL_FILE_SIZE_MIN;
2400
2401 if (m->min_size > m->max_size)
2402 m->max_size = m->min_size;
2403 }
2404
2405 if (m->keep_free == (uint64_t) -1) {
2406
2407 if (fs_size > 0) {
2408 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2409
2410 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2411 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2412
2413 } else
2414 m->keep_free = DEFAULT_KEEP_FREE;
2415 }
2416
2b43f939
LP
2417 log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2418 format_bytes(a, sizeof(a), m->max_use),
2419 format_bytes(b, sizeof(b), m->max_size),
2420 format_bytes(c, sizeof(c), m->min_size),
2421 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2422}
08984293
LP
2423
2424int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
08984293
LP
2425 assert(f);
2426 assert(from || to);
2427
2428 if (from) {
162566a4
LP
2429 if (f->header->head_entry_realtime == 0)
2430 return -ENOENT;
08984293 2431
162566a4 2432 *from = le64toh(f->header->head_entry_realtime);
08984293
LP
2433 }
2434
2435 if (to) {
162566a4
LP
2436 if (f->header->tail_entry_realtime == 0)
2437 return -ENOENT;
08984293 2438
162566a4 2439 *to = le64toh(f->header->tail_entry_realtime);
08984293
LP
2440 }
2441
2442 return 1;
2443}
2444
2445int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2446 char t[9+32+1] = "_BOOT_ID=";
2447 Object *o;
2448 uint64_t p;
2449 int r;
2450
2451 assert(f);
2452 assert(from || to);
2453
2454 sd_id128_to_string(boot_id, t + 9);
2455
2456 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2457 if (r <= 0)
2458 return r;
2459
2460 if (le64toh(o->data.n_entries) <= 0)
2461 return 0;
2462
2463 if (from) {
2464 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2465 if (r < 0)
2466 return r;
2467
2468 *from = le64toh(o->entry.monotonic);
2469 }
2470
2471 if (to) {
2472 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2473 if (r < 0)
2474 return r;
2475
2476 r = generic_array_get_plus_one(f,
2477 le64toh(o->data.entry_offset),
2478 le64toh(o->data.entry_array_offset),
2479 le64toh(o->data.n_entries)-1,
2480 &o, NULL);
2481 if (r <= 0)
2482 return r;
2483
2484 *to = le64toh(o->entry.monotonic);
2485 }
2486
2487 return 1;
2488}
dca6219e
LP
2489
2490bool journal_file_rotate_suggested(JournalFile *f) {
2491 assert(f);
2492
2493 /* If we gained new header fields we gained new features,
2494 * hence suggest a rotation */
361f9cbc
LP
2495 if (le64toh(f->header->header_size) < sizeof(Header)) {
2496 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
dca6219e 2497 return true;
361f9cbc 2498 }
dca6219e
LP
2499
2500 /* Let's check if the hash tables grew over a certain fill
2501 * level (75%, borrowing this value from Java's hash table
2502 * implementation), and if so suggest a rotation. To calculate
2503 * the fill level we need the n_data field, which only exists
2504 * in newer versions. */
2505
2506 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
361f9cbc
LP
2507 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2508 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2509 f->path,
2510 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2511 (unsigned long long) le64toh(f->header->n_data),
2512 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2513 (unsigned long long) (f->last_stat.st_size),
2514 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
dca6219e 2515 return true;
361f9cbc 2516 }
dca6219e
LP
2517
2518 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
361f9cbc
LP
2519 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2520 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2521 f->path,
2522 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2523 (unsigned long long) le64toh(f->header->n_fields),
2524 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
dca6219e 2525 return true;
361f9cbc 2526 }
dca6219e
LP
2527
2528 return false;
2529}