]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
units: rework systemd-exit.service to terminate systemd via signal rather than bus
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
fb0951b0
LP
30#ifdef HAVE_XATTR
31#include <attr/xattr.h>
32#endif
33
cec736d2
LP
34#include "journal-def.h"
35#include "journal-file.h"
0284adc6 36#include "journal-authenticate.h"
cec736d2 37#include "lookup3.h"
807e17f0 38#include "compress.h"
7560fffc 39#include "fsprg.h"
cec736d2 40
4a92baf3
LP
41#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
42#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
cec736d2 43
be19b7df 44#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 45
babfc091 46/* This is the minimum journal file size */
b47ffcfd 47#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
babfc091
LP
48
49/* These are the lower and upper bounds if we deduce the max_use value
50 * from the file system size */
51#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
52#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
53
54/* This is the upper bound if we deduce max_size from max_use */
71100051 55#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
56
57/* This is the upper bound if we deduce the keep_free value from the
58 * file system size */
59#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
60
61/* This is the keep_free value when we can't determine the system
62 * size */
63#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
64
dca6219e
LP
65/* n_data was the first entry we added after the initial file format design */
66#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
cec736d2 67
cec736d2 68void journal_file_close(JournalFile *f) {
de190aef 69 assert(f);
cec736d2 70
feb12d3e 71#ifdef HAVE_GCRYPT
b0af6f41 72 /* Write the final tag */
c586dbf1 73 if (f->seal && f->writable)
b0af6f41 74 journal_file_append_tag(f);
feb12d3e 75#endif
b0af6f41 76
7560fffc 77 /* Sync everything to disk, before we mark the file offline */
16e9f408
LP
78 if (f->mmap && f->fd >= 0)
79 mmap_cache_close_fd(f->mmap, f->fd);
7560fffc
LP
80
81 if (f->writable && f->fd >= 0)
82 fdatasync(f->fd);
83
d384c7a8 84 if (f->header) {
cd96b3b8
LP
85 /* Mark the file offline. Don't override the archived state if it already is set */
86 if (f->writable && f->header->state == STATE_ONLINE)
d384c7a8 87 f->header->state = STATE_OFFLINE;
cec736d2 88
d384c7a8
MS
89 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
90 }
cec736d2 91
0ac38b70
LP
92 if (f->fd >= 0)
93 close_nointr_nofail(f->fd);
94
cec736d2 95 free(f->path);
807e17f0 96
16e9f408
LP
97 if (f->mmap)
98 mmap_cache_unref(f->mmap);
99
807e17f0
LP
100#ifdef HAVE_XZ
101 free(f->compress_buffer);
102#endif
103
7560fffc 104#ifdef HAVE_GCRYPT
baed47c3
LP
105 if (f->fss_file)
106 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
b7c9ae91
LP
107 else if (f->fsprg_state)
108 free(f->fsprg_state);
109
110 free(f->fsprg_seed);
7560fffc
LP
111
112 if (f->hmac)
113 gcry_md_close(f->hmac);
114#endif
115
cec736d2
LP
116 free(f);
117}
118
0ac38b70 119static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
120 Header h;
121 ssize_t k;
122 int r;
123
124 assert(f);
125
126 zero(h);
7560fffc 127 memcpy(h.signature, HEADER_SIGNATURE, 8);
23b0b2b2 128 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2 129
7560fffc
LP
130 h.incompatible_flags =
131 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
132
133 h.compatible_flags =
baed47c3 134 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
7560fffc 135
cec736d2
LP
136 r = sd_id128_randomize(&h.file_id);
137 if (r < 0)
138 return r;
139
0ac38b70
LP
140 if (template) {
141 h.seqnum_id = template->header->seqnum_id;
beec0085 142 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
0ac38b70
LP
143 } else
144 h.seqnum_id = h.file_id;
cec736d2
LP
145
146 k = pwrite(f->fd, &h, sizeof(h), 0);
147 if (k < 0)
148 return -errno;
149
150 if (k != sizeof(h))
151 return -EIO;
152
153 return 0;
154}
155
156static int journal_file_refresh_header(JournalFile *f) {
157 int r;
de190aef 158 sd_id128_t boot_id;
cec736d2
LP
159
160 assert(f);
161
162 r = sd_id128_get_machine(&f->header->machine_id);
163 if (r < 0)
164 return r;
165
de190aef 166 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
167 if (r < 0)
168 return r;
169
de190aef
LP
170 if (sd_id128_equal(boot_id, f->header->boot_id))
171 f->tail_entry_monotonic_valid = true;
172
173 f->header->boot_id = boot_id;
174
175 f->header->state = STATE_ONLINE;
b788cc23 176
7560fffc
LP
177 /* Sync the online state to disk */
178 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
179 fdatasync(f->fd);
b788cc23 180
cec736d2
LP
181 return 0;
182}
183
184static int journal_file_verify_header(JournalFile *f) {
185 assert(f);
186
7560fffc 187 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
cec736d2
LP
188 return -EBADMSG;
189
7560fffc
LP
190 /* In both read and write mode we refuse to open files with
191 * incompatible flags we don't know */
807e17f0 192#ifdef HAVE_XZ
7560fffc 193 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
807e17f0
LP
194 return -EPROTONOSUPPORT;
195#else
cec736d2
LP
196 if (f->header->incompatible_flags != 0)
197 return -EPROTONOSUPPORT;
807e17f0 198#endif
cec736d2 199
7560fffc
LP
200 /* When open for writing we refuse to open files with
201 * compatible flags, too */
202 if (f->writable) {
203#ifdef HAVE_GCRYPT
baed47c3 204 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
7560fffc
LP
205 return -EPROTONOSUPPORT;
206#else
207 if (f->header->compatible_flags != 0)
208 return -EPROTONOSUPPORT;
209#endif
210 }
211
db11ac1a
LP
212 if (f->header->state >= _STATE_MAX)
213 return -EBADMSG;
214
dca6219e
LP
215 /* The first addition was n_data, so check that we are at least this large */
216 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
23b0b2b2
LP
217 return -EBADMSG;
218
8088cbd3 219 if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
beec0085
LP
220 return -EBADMSG;
221
db11ac1a
LP
222 if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
223 return -ENODATA;
224
225 if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
226 return -ENODATA;
227
7762e02b
LP
228 if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
229 !VALID64(le64toh(f->header->field_hash_table_offset)) ||
230 !VALID64(le64toh(f->header->tail_object_offset)) ||
231 !VALID64(le64toh(f->header->entry_array_offset)))
232 return -ENODATA;
233
234 if (le64toh(f->header->data_hash_table_offset) < le64toh(f->header->header_size) ||
235 le64toh(f->header->field_hash_table_offset) < le64toh(f->header->header_size) ||
236 le64toh(f->header->tail_object_offset) < le64toh(f->header->header_size) ||
237 le64toh(f->header->entry_array_offset) < le64toh(f->header->header_size))
cec736d2
LP
238 return -ENODATA;
239
240 if (f->writable) {
ccdbaf91 241 uint8_t state;
cec736d2
LP
242 sd_id128_t machine_id;
243 int r;
244
245 r = sd_id128_get_machine(&machine_id);
246 if (r < 0)
247 return r;
248
249 if (!sd_id128_equal(machine_id, f->header->machine_id))
250 return -EHOSTDOWN;
251
de190aef 252 state = f->header->state;
cec736d2 253
71fa6f00
LP
254 if (state == STATE_ONLINE) {
255 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
256 return -EBUSY;
257 } else if (state == STATE_ARCHIVED)
cec736d2 258 return -ESHUTDOWN;
71fa6f00
LP
259 else if (state != STATE_OFFLINE) {
260 log_debug("Journal file %s has unknown state %u.", f->path, state);
261 return -EBUSY;
262 }
cec736d2
LP
263 }
264
8088cbd3 265 f->compress = JOURNAL_HEADER_COMPRESSED(f->header);
c586dbf1 266
f1889c91 267 f->seal = JOURNAL_HEADER_SEALED(f->header);
7560fffc 268
cec736d2
LP
269 return 0;
270}
271
272static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
cec736d2 273 uint64_t old_size, new_size;
fec2aa2f 274 int r;
cec736d2
LP
275
276 assert(f);
277
cec736d2 278 /* We assume that this file is not sparse, and we know that
38ac38b2 279 * for sure, since we always call posix_fallocate()
cec736d2
LP
280 * ourselves */
281
282 old_size =
23b0b2b2 283 le64toh(f->header->header_size) +
cec736d2
LP
284 le64toh(f->header->arena_size);
285
bc85bfee 286 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
287 if (new_size < le64toh(f->header->header_size))
288 new_size = le64toh(f->header->header_size);
bc85bfee
LP
289
290 if (new_size <= old_size)
cec736d2
LP
291 return 0;
292
bc85bfee
LP
293 if (f->metrics.max_size > 0 &&
294 new_size > f->metrics.max_size)
295 return -E2BIG;
cec736d2 296
bc85bfee
LP
297 if (new_size > f->metrics.min_size &&
298 f->metrics.keep_free > 0) {
cec736d2
LP
299 struct statvfs svfs;
300
301 if (fstatvfs(f->fd, &svfs) >= 0) {
302 uint64_t available;
303
304 available = svfs.f_bfree * svfs.f_bsize;
305
bc85bfee
LP
306 if (available >= f->metrics.keep_free)
307 available -= f->metrics.keep_free;
cec736d2
LP
308 else
309 available = 0;
310
311 if (new_size - old_size > available)
312 return -E2BIG;
313 }
314 }
315
bc85bfee
LP
316 /* Note that the glibc fallocate() fallback is very
317 inefficient, hence we try to minimize the allocation area
318 as we can. */
fec2aa2f
GV
319 r = posix_fallocate(f->fd, old_size, new_size - old_size);
320 if (r != 0)
321 return -r;
cec736d2
LP
322
323 if (fstat(f->fd, &f->last_stat) < 0)
324 return -errno;
325
23b0b2b2 326 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
327
328 return 0;
329}
330
fcde2389 331static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
cec736d2 332 assert(f);
cec736d2
LP
333 assert(ret);
334
7762e02b
LP
335 if (size <= 0)
336 return -EINVAL;
337
2a59ea54 338 /* Avoid SIGBUS on invalid accesses */
4bbdcdb3
LP
339 if (offset + size > (uint64_t) f->last_stat.st_size) {
340 /* Hmm, out of range? Let's refresh the fstat() data
341 * first, before we trust that check. */
342
343 if (fstat(f->fd, &f->last_stat) < 0 ||
344 offset + size > (uint64_t) f->last_stat.st_size)
345 return -EADDRNOTAVAIL;
346 }
347
fcde2389 348 return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
cec736d2
LP
349}
350
16e9f408
LP
351static uint64_t minimum_header_size(Object *o) {
352
353 static uint64_t table[] = {
354 [OBJECT_DATA] = sizeof(DataObject),
355 [OBJECT_FIELD] = sizeof(FieldObject),
356 [OBJECT_ENTRY] = sizeof(EntryObject),
357 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
358 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
359 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
360 [OBJECT_TAG] = sizeof(TagObject),
361 };
362
363 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
364 return sizeof(ObjectHeader);
365
366 return table[o->object.type];
367}
368
de190aef 369int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
370 int r;
371 void *t;
372 Object *o;
373 uint64_t s;
16e9f408 374 unsigned context;
cec736d2
LP
375
376 assert(f);
377 assert(ret);
378
db11ac1a
LP
379 /* Objects may only be located at multiple of 64 bit */
380 if (!VALID64(offset))
381 return -EFAULT;
382
16e9f408
LP
383 /* One context for each type, plus one catch-all for the rest */
384 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
385
fcde2389 386 r = journal_file_move_to(f, context, false, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
387 if (r < 0)
388 return r;
389
390 o = (Object*) t;
391 s = le64toh(o->object.size);
392
393 if (s < sizeof(ObjectHeader))
394 return -EBADMSG;
395
16e9f408
LP
396 if (o->object.type <= OBJECT_UNUSED)
397 return -EBADMSG;
398
399 if (s < minimum_header_size(o))
400 return -EBADMSG;
401
de190aef 402 if (type >= 0 && o->object.type != type)
cec736d2
LP
403 return -EBADMSG;
404
405 if (s > sizeof(ObjectHeader)) {
fcde2389 406 r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
cec736d2
LP
407 if (r < 0)
408 return r;
409
410 o = (Object*) t;
411 }
412
cec736d2
LP
413 *ret = o;
414 return 0;
415}
416
d98cc1f2 417static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
418 uint64_t r;
419
420 assert(f);
421
beec0085 422 r = le64toh(f->header->tail_entry_seqnum) + 1;
c2373f84
LP
423
424 if (seqnum) {
de190aef 425 /* If an external seqnum counter was passed, we update
c2373f84
LP
426 * both the local and the external one, and set it to
427 * the maximum of both */
428
429 if (*seqnum + 1 > r)
430 r = *seqnum + 1;
431
432 *seqnum = r;
433 }
434
beec0085 435 f->header->tail_entry_seqnum = htole64(r);
cec736d2 436
beec0085
LP
437 if (f->header->head_entry_seqnum == 0)
438 f->header->head_entry_seqnum = htole64(r);
de190aef 439
cec736d2
LP
440 return r;
441}
442
0284adc6 443int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
444 int r;
445 uint64_t p;
446 Object *tail, *o;
447 void *t;
448
449 assert(f);
16e9f408 450 assert(type > 0 && type < _OBJECT_TYPE_MAX);
cec736d2
LP
451 assert(size >= sizeof(ObjectHeader));
452 assert(offset);
453 assert(ret);
454
455 p = le64toh(f->header->tail_object_offset);
cec736d2 456 if (p == 0)
23b0b2b2 457 p = le64toh(f->header->header_size);
cec736d2 458 else {
de190aef 459 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
460 if (r < 0)
461 return r;
462
463 p += ALIGN64(le64toh(tail->object.size));
464 }
465
466 r = journal_file_allocate(f, p, size);
467 if (r < 0)
468 return r;
469
fcde2389 470 r = journal_file_move_to(f, type, false, p, size, &t);
cec736d2
LP
471 if (r < 0)
472 return r;
473
474 o = (Object*) t;
475
476 zero(o->object);
de190aef 477 o->object.type = type;
cec736d2
LP
478 o->object.size = htole64(size);
479
480 f->header->tail_object_offset = htole64(p);
cec736d2
LP
481 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
482
483 *ret = o;
484 *offset = p;
485
486 return 0;
487}
488
de190aef 489static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
490 uint64_t s, p;
491 Object *o;
492 int r;
493
494 assert(f);
495
dfabe643 496 /* We estimate that we need 1 hash table entry per 768 of
4a92baf3
LP
497 journal file and we want to make sure we never get beyond
498 75% fill level. Calculate the hash table size for the
499 maximum file size based on these metrics. */
500
dfabe643 501 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
4a92baf3
LP
502 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
503 s = DEFAULT_DATA_HASH_TABLE_SIZE;
504
2b43f939 505 log_debug("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
4a92baf3 506
de190aef
LP
507 r = journal_file_append_object(f,
508 OBJECT_DATA_HASH_TABLE,
509 offsetof(Object, hash_table.items) + s,
510 &o, &p);
cec736d2
LP
511 if (r < 0)
512 return r;
513
de190aef 514 memset(o->hash_table.items, 0, s);
cec736d2 515
de190aef
LP
516 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
517 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
518
519 return 0;
520}
521
de190aef 522static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
523 uint64_t s, p;
524 Object *o;
525 int r;
526
527 assert(f);
528
de190aef
LP
529 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
530 r = journal_file_append_object(f,
531 OBJECT_FIELD_HASH_TABLE,
532 offsetof(Object, hash_table.items) + s,
533 &o, &p);
cec736d2
LP
534 if (r < 0)
535 return r;
536
de190aef 537 memset(o->hash_table.items, 0, s);
cec736d2 538
de190aef
LP
539 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
540 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
541
542 return 0;
543}
544
de190aef 545static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
546 uint64_t s, p;
547 void *t;
548 int r;
549
550 assert(f);
551
de190aef
LP
552 p = le64toh(f->header->data_hash_table_offset);
553 s = le64toh(f->header->data_hash_table_size);
cec736d2 554
de190aef 555 r = journal_file_move_to(f,
16e9f408 556 OBJECT_DATA_HASH_TABLE,
fcde2389 557 true,
de190aef
LP
558 p, s,
559 &t);
cec736d2
LP
560 if (r < 0)
561 return r;
562
de190aef 563 f->data_hash_table = t;
cec736d2
LP
564 return 0;
565}
566
de190aef 567static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
568 uint64_t s, p;
569 void *t;
570 int r;
571
572 assert(f);
573
de190aef
LP
574 p = le64toh(f->header->field_hash_table_offset);
575 s = le64toh(f->header->field_hash_table_size);
cec736d2 576
de190aef 577 r = journal_file_move_to(f,
16e9f408 578 OBJECT_FIELD_HASH_TABLE,
fcde2389 579 true,
de190aef
LP
580 p, s,
581 &t);
cec736d2
LP
582 if (r < 0)
583 return r;
584
de190aef 585 f->field_hash_table = t;
cec736d2
LP
586 return 0;
587}
588
de190aef
LP
589static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
590 uint64_t p, h;
cec736d2
LP
591 int r;
592
593 assert(f);
594 assert(o);
595 assert(offset > 0);
b588975f
LP
596
597 if (o->object.type != OBJECT_DATA)
598 return -EINVAL;
cec736d2 599
48496df6
LP
600 /* This might alter the window we are looking at */
601
de190aef
LP
602 o->data.next_hash_offset = o->data.next_field_offset = 0;
603 o->data.entry_offset = o->data.entry_array_offset = 0;
604 o->data.n_entries = 0;
cec736d2 605
de190aef 606 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
8db4213e 607 p = le64toh(f->data_hash_table[h].tail_hash_offset);
cec736d2
LP
608 if (p == 0) {
609 /* Only entry in the hash table is easy */
de190aef 610 f->data_hash_table[h].head_hash_offset = htole64(offset);
cec736d2 611 } else {
48496df6
LP
612 /* Move back to the previous data object, to patch in
613 * pointer */
cec736d2 614
de190aef 615 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
616 if (r < 0)
617 return r;
618
de190aef 619 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
620 }
621
de190aef 622 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2 623
dca6219e
LP
624 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
625 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
626
cec736d2
LP
627 return 0;
628}
629
de190aef
LP
630int journal_file_find_data_object_with_hash(
631 JournalFile *f,
632 const void *data, uint64_t size, uint64_t hash,
633 Object **ret, uint64_t *offset) {
48496df6 634
de190aef 635 uint64_t p, osize, h;
cec736d2
LP
636 int r;
637
638 assert(f);
639 assert(data || size == 0);
640
641 osize = offsetof(Object, data.payload) + size;
642
bc85bfee
LP
643 if (f->header->data_hash_table_size == 0)
644 return -EBADMSG;
645
de190aef
LP
646 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
647 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 648
de190aef
LP
649 while (p > 0) {
650 Object *o;
cec736d2 651
de190aef 652 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
653 if (r < 0)
654 return r;
655
807e17f0 656 if (le64toh(o->data.hash) != hash)
85a131e8 657 goto next;
807e17f0
LP
658
659 if (o->object.flags & OBJECT_COMPRESSED) {
660#ifdef HAVE_XZ
b785c858 661 uint64_t l, rsize;
cec736d2 662
807e17f0
LP
663 l = le64toh(o->object.size);
664 if (l <= offsetof(Object, data.payload))
cec736d2
LP
665 return -EBADMSG;
666
807e17f0
LP
667 l -= offsetof(Object, data.payload);
668
669 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
670 return -EBADMSG;
671
b785c858 672 if (rsize == size &&
807e17f0
LP
673 memcmp(f->compress_buffer, data, size) == 0) {
674
675 if (ret)
676 *ret = o;
677
678 if (offset)
679 *offset = p;
680
681 return 1;
682 }
683#else
684 return -EPROTONOSUPPORT;
685#endif
686
687 } else if (le64toh(o->object.size) == osize &&
688 memcmp(o->data.payload, data, size) == 0) {
689
cec736d2
LP
690 if (ret)
691 *ret = o;
692
693 if (offset)
694 *offset = p;
695
de190aef 696 return 1;
cec736d2
LP
697 }
698
85a131e8 699 next:
cec736d2
LP
700 p = le64toh(o->data.next_hash_offset);
701 }
702
de190aef
LP
703 return 0;
704}
705
706int journal_file_find_data_object(
707 JournalFile *f,
708 const void *data, uint64_t size,
709 Object **ret, uint64_t *offset) {
710
711 uint64_t hash;
712
713 assert(f);
714 assert(data || size == 0);
715
716 hash = hash64(data, size);
717
718 return journal_file_find_data_object_with_hash(f,
719 data, size, hash,
720 ret, offset);
721}
722
48496df6
LP
723static int journal_file_append_data(
724 JournalFile *f,
725 const void *data, uint64_t size,
726 Object **ret, uint64_t *offset) {
727
de190aef
LP
728 uint64_t hash, p;
729 uint64_t osize;
730 Object *o;
731 int r;
807e17f0 732 bool compressed = false;
de190aef
LP
733
734 assert(f);
735 assert(data || size == 0);
736
737 hash = hash64(data, size);
738
739 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
740 if (r < 0)
741 return r;
742 else if (r > 0) {
743
744 if (ret)
745 *ret = o;
746
747 if (offset)
748 *offset = p;
749
750 return 0;
751 }
752
753 osize = offsetof(Object, data.payload) + size;
754 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
755 if (r < 0)
756 return r;
757
cec736d2 758 o->data.hash = htole64(hash);
807e17f0
LP
759
760#ifdef HAVE_XZ
761 if (f->compress &&
762 size >= COMPRESSION_SIZE_THRESHOLD) {
763 uint64_t rsize;
764
765 compressed = compress_blob(data, size, o->data.payload, &rsize);
766
767 if (compressed) {
768 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
769 o->object.flags |= OBJECT_COMPRESSED;
770
807e17f0
LP
771 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
772 }
773 }
774#endif
775
64825d3c 776 if (!compressed && size > 0)
807e17f0 777 memcpy(o->data.payload, data, size);
cec736d2 778
de190aef 779 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
780 if (r < 0)
781 return r;
782
48496df6
LP
783 /* The linking might have altered the window, so let's
784 * refresh our pointer */
785 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
786 if (r < 0)
787 return r;
788
5996c7c2
LP
789#ifdef HAVE_GCRYPT
790 r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
791 if (r < 0)
792 return r;
793#endif
794
cec736d2
LP
795 if (ret)
796 *ret = o;
797
798 if (offset)
de190aef 799 *offset = p;
cec736d2
LP
800
801 return 0;
802}
803
804uint64_t journal_file_entry_n_items(Object *o) {
805 assert(o);
b588975f
LP
806
807 if (o->object.type != OBJECT_ENTRY)
808 return 0;
cec736d2
LP
809
810 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
811}
812
0284adc6 813uint64_t journal_file_entry_array_n_items(Object *o) {
de190aef 814 assert(o);
b588975f
LP
815
816 if (o->object.type != OBJECT_ENTRY_ARRAY)
817 return 0;
de190aef
LP
818
819 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
820}
821
fb9a24b6
LP
822uint64_t journal_file_hash_table_n_items(Object *o) {
823 assert(o);
b588975f
LP
824
825 if (o->object.type != OBJECT_DATA_HASH_TABLE &&
826 o->object.type != OBJECT_FIELD_HASH_TABLE)
827 return 0;
fb9a24b6
LP
828
829 return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
830}
831
de190aef 832static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
833 le64_t *first,
834 le64_t *idx,
de190aef 835 uint64_t p) {
cec736d2 836 int r;
de190aef
LP
837 uint64_t n = 0, ap = 0, q, i, a, hidx;
838 Object *o;
839
cec736d2 840 assert(f);
de190aef
LP
841 assert(first);
842 assert(idx);
843 assert(p > 0);
cec736d2 844
de190aef
LP
845 a = le64toh(*first);
846 i = hidx = le64toh(*idx);
847 while (a > 0) {
848
849 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
850 if (r < 0)
851 return r;
cec736d2 852
de190aef
LP
853 n = journal_file_entry_array_n_items(o);
854 if (i < n) {
855 o->entry_array.items[i] = htole64(p);
856 *idx = htole64(hidx + 1);
857 return 0;
858 }
cec736d2 859
de190aef
LP
860 i -= n;
861 ap = a;
862 a = le64toh(o->entry_array.next_entry_array_offset);
863 }
864
865 if (hidx > n)
866 n = (hidx+1) * 2;
867 else
868 n = n * 2;
869
870 if (n < 4)
871 n = 4;
872
873 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
874 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
875 &o, &q);
cec736d2
LP
876 if (r < 0)
877 return r;
878
feb12d3e 879#ifdef HAVE_GCRYPT
5996c7c2 880 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
b0af6f41
LP
881 if (r < 0)
882 return r;
feb12d3e 883#endif
b0af6f41 884
de190aef 885 o->entry_array.items[i] = htole64(p);
cec736d2 886
de190aef 887 if (ap == 0)
7be3aa17 888 *first = htole64(q);
cec736d2 889 else {
de190aef 890 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
891 if (r < 0)
892 return r;
893
de190aef
LP
894 o->entry_array.next_entry_array_offset = htole64(q);
895 }
cec736d2 896
2dee23eb
LP
897 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
898 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
899
de190aef
LP
900 *idx = htole64(hidx + 1);
901
902 return 0;
903}
cec736d2 904
de190aef 905static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
906 le64_t *extra,
907 le64_t *first,
908 le64_t *idx,
de190aef
LP
909 uint64_t p) {
910
911 int r;
912
913 assert(f);
914 assert(extra);
915 assert(first);
916 assert(idx);
917 assert(p > 0);
918
919 if (*idx == 0)
920 *extra = htole64(p);
921 else {
4fd052ae 922 le64_t i;
de190aef 923
7be3aa17 924 i = htole64(le64toh(*idx) - 1);
de190aef
LP
925 r = link_entry_into_array(f, first, &i, p);
926 if (r < 0)
927 return r;
cec736d2
LP
928 }
929
de190aef
LP
930 *idx = htole64(le64toh(*idx) + 1);
931 return 0;
932}
933
934static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
935 uint64_t p;
936 int r;
937 assert(f);
938 assert(o);
939 assert(offset > 0);
940
941 p = le64toh(o->entry.items[i].object_offset);
942 if (p == 0)
943 return -EINVAL;
944
945 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
946 if (r < 0)
947 return r;
948
de190aef
LP
949 return link_entry_into_array_plus_one(f,
950 &o->data.entry_offset,
951 &o->data.entry_array_offset,
952 &o->data.n_entries,
953 offset);
cec736d2
LP
954}
955
956static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 957 uint64_t n, i;
cec736d2
LP
958 int r;
959
960 assert(f);
961 assert(o);
962 assert(offset > 0);
b588975f
LP
963
964 if (o->object.type != OBJECT_ENTRY)
965 return -EINVAL;
cec736d2 966
b788cc23
LP
967 __sync_synchronize();
968
cec736d2 969 /* Link up the entry itself */
de190aef
LP
970 r = link_entry_into_array(f,
971 &f->header->entry_array_offset,
972 &f->header->n_entries,
973 offset);
974 if (r < 0)
975 return r;
cec736d2 976
aaf53376 977 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
cec736d2 978
de190aef 979 if (f->header->head_entry_realtime == 0)
0ac38b70 980 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 981
0ac38b70 982 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
983 f->header->tail_entry_monotonic = o->entry.monotonic;
984
985 f->tail_entry_monotonic_valid = true;
cec736d2
LP
986
987 /* Link up the items */
988 n = journal_file_entry_n_items(o);
989 for (i = 0; i < n; i++) {
990 r = journal_file_link_entry_item(f, o, offset, i);
991 if (r < 0)
992 return r;
993 }
994
cec736d2
LP
995 return 0;
996}
997
998static int journal_file_append_entry_internal(
999 JournalFile *f,
1000 const dual_timestamp *ts,
1001 uint64_t xor_hash,
1002 const EntryItem items[], unsigned n_items,
de190aef 1003 uint64_t *seqnum,
cec736d2
LP
1004 Object **ret, uint64_t *offset) {
1005 uint64_t np;
1006 uint64_t osize;
1007 Object *o;
1008 int r;
1009
1010 assert(f);
1011 assert(items || n_items == 0);
de190aef 1012 assert(ts);
cec736d2
LP
1013
1014 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1015
de190aef 1016 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
1017 if (r < 0)
1018 return r;
1019
d98cc1f2 1020 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
cec736d2 1021 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
1022 o->entry.realtime = htole64(ts->realtime);
1023 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
1024 o->entry.xor_hash = htole64(xor_hash);
1025 o->entry.boot_id = f->header->boot_id;
1026
feb12d3e 1027#ifdef HAVE_GCRYPT
5996c7c2 1028 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
b0af6f41
LP
1029 if (r < 0)
1030 return r;
feb12d3e 1031#endif
b0af6f41 1032
cec736d2
LP
1033 r = journal_file_link_entry(f, o, np);
1034 if (r < 0)
1035 return r;
1036
1037 if (ret)
1038 *ret = o;
1039
1040 if (offset)
1041 *offset = np;
1042
1043 return 0;
1044}
1045
cf244689 1046void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
1047 assert(f);
1048
1049 /* inotify() does not receive IN_MODIFY events from file
1050 * accesses done via mmap(). After each access we hence
1051 * trigger IN_MODIFY by truncating the journal file to its
1052 * current size which triggers IN_MODIFY. */
1053
bc85bfee
LP
1054 __sync_synchronize();
1055
50f20cfd 1056 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
c5315881 1057 log_error("Failed to truncate file to its own size: %m");
50f20cfd
LP
1058}
1059
1f2da9ec
LP
1060static int entry_item_cmp(const void *_a, const void *_b) {
1061 const EntryItem *a = _a, *b = _b;
1062
1063 if (le64toh(a->object_offset) < le64toh(b->object_offset))
1064 return -1;
1065 if (le64toh(a->object_offset) > le64toh(b->object_offset))
1066 return 1;
1067 return 0;
1068}
1069
de190aef 1070int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1071 unsigned i;
1072 EntryItem *items;
1073 int r;
1074 uint64_t xor_hash = 0;
de190aef 1075 struct dual_timestamp _ts;
cec736d2
LP
1076
1077 assert(f);
1078 assert(iovec || n_iovec == 0);
1079
de190aef
LP
1080 if (!f->writable)
1081 return -EPERM;
1082
1083 if (!ts) {
1084 dual_timestamp_get(&_ts);
1085 ts = &_ts;
1086 }
1087
1088 if (f->tail_entry_monotonic_valid &&
1089 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1090 return -EINVAL;
1091
feb12d3e 1092#ifdef HAVE_GCRYPT
7560fffc
LP
1093 r = journal_file_maybe_append_tag(f, ts->realtime);
1094 if (r < 0)
1095 return r;
feb12d3e 1096#endif
7560fffc 1097
64825d3c
LP
1098 /* alloca() can't take 0, hence let's allocate at least one */
1099 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
cec736d2
LP
1100
1101 for (i = 0; i < n_iovec; i++) {
1102 uint64_t p;
1103 Object *o;
1104
1105 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1106 if (r < 0)
cf244689 1107 return r;
cec736d2
LP
1108
1109 xor_hash ^= le64toh(o->data.hash);
1110 items[i].object_offset = htole64(p);
de7b95cd 1111 items[i].hash = o->data.hash;
cec736d2
LP
1112 }
1113
1f2da9ec
LP
1114 /* Order by the position on disk, in order to improve seek
1115 * times for rotating media. */
1116 qsort(items, n_iovec, sizeof(EntryItem), entry_item_cmp);
1117
de190aef 1118 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1119
50f20cfd
LP
1120 journal_file_post_change(f);
1121
cec736d2
LP
1122 return r;
1123}
1124
de190aef
LP
1125static int generic_array_get(JournalFile *f,
1126 uint64_t first,
1127 uint64_t i,
1128 Object **ret, uint64_t *offset) {
1129
cec736d2 1130 Object *o;
6c8a39b8 1131 uint64_t p = 0, a;
cec736d2
LP
1132 int r;
1133
1134 assert(f);
1135
de190aef
LP
1136 a = first;
1137 while (a > 0) {
1138 uint64_t n;
cec736d2 1139
de190aef
LP
1140 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1141 if (r < 0)
1142 return r;
cec736d2 1143
de190aef
LP
1144 n = journal_file_entry_array_n_items(o);
1145 if (i < n) {
1146 p = le64toh(o->entry_array.items[i]);
1147 break;
cec736d2
LP
1148 }
1149
de190aef
LP
1150 i -= n;
1151 a = le64toh(o->entry_array.next_entry_array_offset);
1152 }
1153
1154 if (a <= 0 || p <= 0)
1155 return 0;
1156
1157 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1158 if (r < 0)
1159 return r;
1160
1161 if (ret)
1162 *ret = o;
1163
1164 if (offset)
1165 *offset = p;
1166
1167 return 1;
1168}
1169
1170static int generic_array_get_plus_one(JournalFile *f,
1171 uint64_t extra,
1172 uint64_t first,
1173 uint64_t i,
1174 Object **ret, uint64_t *offset) {
1175
1176 Object *o;
1177
1178 assert(f);
1179
1180 if (i == 0) {
1181 int r;
1182
1183 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1184 if (r < 0)
1185 return r;
1186
de190aef
LP
1187 if (ret)
1188 *ret = o;
cec736d2 1189
de190aef
LP
1190 if (offset)
1191 *offset = extra;
cec736d2 1192
de190aef 1193 return 1;
cec736d2
LP
1194 }
1195
de190aef
LP
1196 return generic_array_get(f, first, i-1, ret, offset);
1197}
cec736d2 1198
de190aef
LP
1199enum {
1200 TEST_FOUND,
1201 TEST_LEFT,
1202 TEST_RIGHT
1203};
cec736d2 1204
de190aef
LP
1205static int generic_array_bisect(JournalFile *f,
1206 uint64_t first,
1207 uint64_t n,
1208 uint64_t needle,
1209 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1210 direction_t direction,
1211 Object **ret,
1212 uint64_t *offset,
1213 uint64_t *idx) {
1214
1215 uint64_t a, p, t = 0, i = 0, last_p = 0;
1216 bool subtract_one = false;
1217 Object *o, *array = NULL;
1218 int r;
cec736d2 1219
de190aef
LP
1220 assert(f);
1221 assert(test_object);
cec736d2 1222
de190aef
LP
1223 a = first;
1224 while (a > 0) {
1225 uint64_t left, right, k, lp;
1226
1227 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1228 if (r < 0)
1229 return r;
1230
de190aef
LP
1231 k = journal_file_entry_array_n_items(array);
1232 right = MIN(k, n);
1233 if (right <= 0)
1234 return 0;
cec736d2 1235
de190aef
LP
1236 i = right - 1;
1237 lp = p = le64toh(array->entry_array.items[i]);
1238 if (p <= 0)
1239 return -EBADMSG;
cec736d2 1240
de190aef
LP
1241 r = test_object(f, p, needle);
1242 if (r < 0)
1243 return r;
cec736d2 1244
de190aef
LP
1245 if (r == TEST_FOUND)
1246 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1247
1248 if (r == TEST_RIGHT) {
1249 left = 0;
1250 right -= 1;
1251 for (;;) {
1252 if (left == right) {
1253 if (direction == DIRECTION_UP)
1254 subtract_one = true;
1255
1256 i = left;
1257 goto found;
1258 }
1259
1260 assert(left < right);
1261
1262 i = (left + right) / 2;
1263 p = le64toh(array->entry_array.items[i]);
1264 if (p <= 0)
1265 return -EBADMSG;
1266
1267 r = test_object(f, p, needle);
1268 if (r < 0)
1269 return r;
cec736d2 1270
de190aef
LP
1271 if (r == TEST_FOUND)
1272 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1273
1274 if (r == TEST_RIGHT)
1275 right = i;
1276 else
1277 left = i + 1;
1278 }
1279 }
1280
cbdca852
LP
1281 if (k > n) {
1282 if (direction == DIRECTION_UP) {
1283 i = n;
1284 subtract_one = true;
1285 goto found;
1286 }
1287
cec736d2 1288 return 0;
cbdca852 1289 }
cec736d2 1290
de190aef
LP
1291 last_p = lp;
1292
1293 n -= k;
1294 t += k;
1295 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1296 }
1297
1298 return 0;
de190aef
LP
1299
1300found:
1301 if (subtract_one && t == 0 && i == 0)
1302 return 0;
1303
1304 if (subtract_one && i == 0)
1305 p = last_p;
1306 else if (subtract_one)
1307 p = le64toh(array->entry_array.items[i-1]);
1308 else
1309 p = le64toh(array->entry_array.items[i]);
1310
1311 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1312 if (r < 0)
1313 return r;
1314
1315 if (ret)
1316 *ret = o;
1317
1318 if (offset)
1319 *offset = p;
1320
1321 if (idx)
cbdca852 1322 *idx = t + i + (subtract_one ? -1 : 0);
de190aef
LP
1323
1324 return 1;
cec736d2
LP
1325}
1326
de190aef
LP
1327static int generic_array_bisect_plus_one(JournalFile *f,
1328 uint64_t extra,
1329 uint64_t first,
1330 uint64_t n,
1331 uint64_t needle,
1332 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1333 direction_t direction,
1334 Object **ret,
1335 uint64_t *offset,
1336 uint64_t *idx) {
1337
cec736d2 1338 int r;
cbdca852
LP
1339 bool step_back = false;
1340 Object *o;
cec736d2
LP
1341
1342 assert(f);
de190aef 1343 assert(test_object);
cec736d2 1344
de190aef
LP
1345 if (n <= 0)
1346 return 0;
cec736d2 1347
de190aef
LP
1348 /* This bisects the array in object 'first', but first checks
1349 * an extra */
de190aef
LP
1350 r = test_object(f, extra, needle);
1351 if (r < 0)
1352 return r;
a536e261
LP
1353
1354 if (r == TEST_FOUND)
1355 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1356
cbdca852
LP
1357 /* if we are looking with DIRECTION_UP then we need to first
1358 see if in the actual array there is a matching entry, and
1359 return the last one of that. But if there isn't any we need
1360 to return this one. Hence remember this, and return it
1361 below. */
1362 if (r == TEST_LEFT)
1363 step_back = direction == DIRECTION_UP;
de190aef 1364
cbdca852
LP
1365 if (r == TEST_RIGHT) {
1366 if (direction == DIRECTION_DOWN)
1367 goto found;
1368 else
1369 return 0;
a536e261 1370 }
cec736d2 1371
de190aef
LP
1372 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1373
cbdca852
LP
1374 if (r == 0 && step_back)
1375 goto found;
1376
ecf68b1d 1377 if (r > 0 && idx)
de190aef
LP
1378 (*idx) ++;
1379
1380 return r;
cbdca852
LP
1381
1382found:
1383 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1384 if (r < 0)
1385 return r;
1386
1387 if (ret)
1388 *ret = o;
1389
1390 if (offset)
1391 *offset = extra;
1392
1393 if (idx)
1394 *idx = 0;
1395
1396 return 1;
1397}
1398
1399static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1400 assert(f);
1401 assert(p > 0);
1402
1403 if (p == needle)
1404 return TEST_FOUND;
1405 else if (p < needle)
1406 return TEST_LEFT;
1407 else
1408 return TEST_RIGHT;
1409}
1410
1411int journal_file_move_to_entry_by_offset(
1412 JournalFile *f,
1413 uint64_t p,
1414 direction_t direction,
1415 Object **ret,
1416 uint64_t *offset) {
1417
1418 return generic_array_bisect(f,
1419 le64toh(f->header->entry_array_offset),
1420 le64toh(f->header->n_entries),
1421 p,
1422 test_object_offset,
1423 direction,
1424 ret, offset, NULL);
de190aef
LP
1425}
1426
cbdca852 1427
de190aef
LP
1428static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1429 Object *o;
1430 int r;
1431
1432 assert(f);
1433 assert(p > 0);
1434
1435 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1436 if (r < 0)
1437 return r;
1438
de190aef
LP
1439 if (le64toh(o->entry.seqnum) == needle)
1440 return TEST_FOUND;
1441 else if (le64toh(o->entry.seqnum) < needle)
1442 return TEST_LEFT;
1443 else
1444 return TEST_RIGHT;
1445}
cec736d2 1446
de190aef
LP
1447int journal_file_move_to_entry_by_seqnum(
1448 JournalFile *f,
1449 uint64_t seqnum,
1450 direction_t direction,
1451 Object **ret,
1452 uint64_t *offset) {
1453
1454 return generic_array_bisect(f,
1455 le64toh(f->header->entry_array_offset),
1456 le64toh(f->header->n_entries),
1457 seqnum,
1458 test_object_seqnum,
1459 direction,
1460 ret, offset, NULL);
1461}
cec736d2 1462
de190aef
LP
1463static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1464 Object *o;
1465 int r;
1466
1467 assert(f);
1468 assert(p > 0);
1469
1470 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1471 if (r < 0)
1472 return r;
1473
1474 if (le64toh(o->entry.realtime) == needle)
1475 return TEST_FOUND;
1476 else if (le64toh(o->entry.realtime) < needle)
1477 return TEST_LEFT;
1478 else
1479 return TEST_RIGHT;
cec736d2
LP
1480}
1481
de190aef
LP
1482int journal_file_move_to_entry_by_realtime(
1483 JournalFile *f,
1484 uint64_t realtime,
1485 direction_t direction,
1486 Object **ret,
1487 uint64_t *offset) {
1488
1489 return generic_array_bisect(f,
1490 le64toh(f->header->entry_array_offset),
1491 le64toh(f->header->n_entries),
1492 realtime,
1493 test_object_realtime,
1494 direction,
1495 ret, offset, NULL);
1496}
1497
1498static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1499 Object *o;
1500 int r;
1501
1502 assert(f);
1503 assert(p > 0);
1504
1505 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1506 if (r < 0)
1507 return r;
1508
1509 if (le64toh(o->entry.monotonic) == needle)
1510 return TEST_FOUND;
1511 else if (le64toh(o->entry.monotonic) < needle)
1512 return TEST_LEFT;
1513 else
1514 return TEST_RIGHT;
1515}
1516
1517int journal_file_move_to_entry_by_monotonic(
1518 JournalFile *f,
1519 sd_id128_t boot_id,
1520 uint64_t monotonic,
1521 direction_t direction,
1522 Object **ret,
1523 uint64_t *offset) {
1524
10b6f904 1525 char t[9+32+1] = "_BOOT_ID=";
de190aef
LP
1526 Object *o;
1527 int r;
1528
cbdca852 1529 assert(f);
de190aef 1530
cbdca852 1531 sd_id128_to_string(boot_id, t + 9);
de190aef
LP
1532 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1533 if (r < 0)
1534 return r;
cbdca852 1535 if (r == 0)
de190aef
LP
1536 return -ENOENT;
1537
1538 return generic_array_bisect_plus_one(f,
1539 le64toh(o->data.entry_offset),
1540 le64toh(o->data.entry_array_offset),
1541 le64toh(o->data.n_entries),
1542 monotonic,
1543 test_object_monotonic,
1544 direction,
1545 ret, offset, NULL);
1546}
1547
de190aef
LP
1548int journal_file_next_entry(
1549 JournalFile *f,
1550 Object *o, uint64_t p,
1551 direction_t direction,
1552 Object **ret, uint64_t *offset) {
1553
1554 uint64_t i, n;
cec736d2
LP
1555 int r;
1556
1557 assert(f);
de190aef
LP
1558 assert(p > 0 || !o);
1559
1560 n = le64toh(f->header->n_entries);
1561 if (n <= 0)
1562 return 0;
cec736d2
LP
1563
1564 if (!o)
de190aef 1565 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1566 else {
de190aef 1567 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1568 return -EINVAL;
1569
de190aef
LP
1570 r = generic_array_bisect(f,
1571 le64toh(f->header->entry_array_offset),
1572 le64toh(f->header->n_entries),
1573 p,
1574 test_object_offset,
1575 DIRECTION_DOWN,
1576 NULL, NULL,
1577 &i);
1578 if (r <= 0)
1579 return r;
1580
1581 if (direction == DIRECTION_DOWN) {
1582 if (i >= n - 1)
1583 return 0;
1584
1585 i++;
1586 } else {
1587 if (i <= 0)
1588 return 0;
1589
1590 i--;
1591 }
cec736d2
LP
1592 }
1593
de190aef
LP
1594 /* And jump to it */
1595 return generic_array_get(f,
1596 le64toh(f->header->entry_array_offset),
1597 i,
1598 ret, offset);
1599}
cec736d2 1600
de190aef
LP
1601int journal_file_skip_entry(
1602 JournalFile *f,
1603 Object *o, uint64_t p,
1604 int64_t skip,
1605 Object **ret, uint64_t *offset) {
1606
1607 uint64_t i, n;
1608 int r;
1609
1610 assert(f);
1611 assert(o);
1612 assert(p > 0);
1613
1614 if (o->object.type != OBJECT_ENTRY)
1615 return -EINVAL;
1616
1617 r = generic_array_bisect(f,
1618 le64toh(f->header->entry_array_offset),
1619 le64toh(f->header->n_entries),
1620 p,
1621 test_object_offset,
1622 DIRECTION_DOWN,
1623 NULL, NULL,
1624 &i);
1625 if (r <= 0)
cec736d2
LP
1626 return r;
1627
de190aef
LP
1628 /* Calculate new index */
1629 if (skip < 0) {
1630 if ((uint64_t) -skip >= i)
1631 i = 0;
1632 else
1633 i = i - (uint64_t) -skip;
1634 } else
1635 i += (uint64_t) skip;
cec736d2 1636
de190aef
LP
1637 n = le64toh(f->header->n_entries);
1638 if (n <= 0)
1639 return -EBADMSG;
cec736d2 1640
de190aef
LP
1641 if (i >= n)
1642 i = n-1;
1643
1644 return generic_array_get(f,
1645 le64toh(f->header->entry_array_offset),
1646 i,
1647 ret, offset);
cec736d2
LP
1648}
1649
de190aef
LP
1650int journal_file_next_entry_for_data(
1651 JournalFile *f,
1652 Object *o, uint64_t p,
1653 uint64_t data_offset,
1654 direction_t direction,
1655 Object **ret, uint64_t *offset) {
1656
1657 uint64_t n, i;
cec736d2 1658 int r;
de190aef 1659 Object *d;
cec736d2
LP
1660
1661 assert(f);
de190aef 1662 assert(p > 0 || !o);
cec736d2 1663
de190aef 1664 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 1665 if (r < 0)
de190aef 1666 return r;
cec736d2 1667
de190aef
LP
1668 n = le64toh(d->data.n_entries);
1669 if (n <= 0)
1670 return n;
cec736d2 1671
de190aef
LP
1672 if (!o)
1673 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1674 else {
1675 if (o->object.type != OBJECT_ENTRY)
1676 return -EINVAL;
cec736d2 1677
de190aef
LP
1678 r = generic_array_bisect_plus_one(f,
1679 le64toh(d->data.entry_offset),
1680 le64toh(d->data.entry_array_offset),
1681 le64toh(d->data.n_entries),
1682 p,
1683 test_object_offset,
1684 DIRECTION_DOWN,
1685 NULL, NULL,
1686 &i);
1687
1688 if (r <= 0)
cec736d2
LP
1689 return r;
1690
de190aef
LP
1691 if (direction == DIRECTION_DOWN) {
1692 if (i >= n - 1)
1693 return 0;
cec736d2 1694
de190aef
LP
1695 i++;
1696 } else {
1697 if (i <= 0)
1698 return 0;
cec736d2 1699
de190aef
LP
1700 i--;
1701 }
cec736d2 1702
de190aef 1703 }
cec736d2 1704
de190aef
LP
1705 return generic_array_get_plus_one(f,
1706 le64toh(d->data.entry_offset),
1707 le64toh(d->data.entry_array_offset),
1708 i,
1709 ret, offset);
1710}
cec736d2 1711
cbdca852
LP
1712int journal_file_move_to_entry_by_offset_for_data(
1713 JournalFile *f,
1714 uint64_t data_offset,
1715 uint64_t p,
1716 direction_t direction,
1717 Object **ret, uint64_t *offset) {
1718
1719 int r;
1720 Object *d;
1721
1722 assert(f);
1723
1724 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1725 if (r < 0)
1726 return r;
1727
1728 return generic_array_bisect_plus_one(f,
1729 le64toh(d->data.entry_offset),
1730 le64toh(d->data.entry_array_offset),
1731 le64toh(d->data.n_entries),
1732 p,
1733 test_object_offset,
1734 direction,
1735 ret, offset, NULL);
1736}
1737
1738int journal_file_move_to_entry_by_monotonic_for_data(
1739 JournalFile *f,
1740 uint64_t data_offset,
1741 sd_id128_t boot_id,
1742 uint64_t monotonic,
1743 direction_t direction,
1744 Object **ret, uint64_t *offset) {
1745
1746 char t[9+32+1] = "_BOOT_ID=";
1747 Object *o, *d;
1748 int r;
1749 uint64_t b, z;
1750
1751 assert(f);
1752
1753 /* First, seek by time */
1754 sd_id128_to_string(boot_id, t + 9);
1755 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1756 if (r < 0)
1757 return r;
1758 if (r == 0)
1759 return -ENOENT;
1760
1761 r = generic_array_bisect_plus_one(f,
1762 le64toh(o->data.entry_offset),
1763 le64toh(o->data.entry_array_offset),
1764 le64toh(o->data.n_entries),
1765 monotonic,
1766 test_object_monotonic,
1767 direction,
1768 NULL, &z, NULL);
1769 if (r <= 0)
1770 return r;
1771
1772 /* And now, continue seeking until we find an entry that
1773 * exists in both bisection arrays */
1774
1775 for (;;) {
1776 Object *qo;
1777 uint64_t p, q;
1778
1779 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1780 if (r < 0)
1781 return r;
1782
1783 r = generic_array_bisect_plus_one(f,
1784 le64toh(d->data.entry_offset),
1785 le64toh(d->data.entry_array_offset),
1786 le64toh(d->data.n_entries),
1787 z,
1788 test_object_offset,
1789 direction,
1790 NULL, &p, NULL);
1791 if (r <= 0)
1792 return r;
1793
1794 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1795 if (r < 0)
1796 return r;
1797
1798 r = generic_array_bisect_plus_one(f,
1799 le64toh(o->data.entry_offset),
1800 le64toh(o->data.entry_array_offset),
1801 le64toh(o->data.n_entries),
1802 p,
1803 test_object_offset,
1804 direction,
1805 &qo, &q, NULL);
1806
1807 if (r <= 0)
1808 return r;
1809
1810 if (p == q) {
1811 if (ret)
1812 *ret = qo;
1813 if (offset)
1814 *offset = q;
1815
1816 return 1;
1817 }
1818
1819 z = q;
1820 }
1821
1822 return 0;
1823}
1824
de190aef
LP
1825int journal_file_move_to_entry_by_seqnum_for_data(
1826 JournalFile *f,
1827 uint64_t data_offset,
1828 uint64_t seqnum,
1829 direction_t direction,
1830 Object **ret, uint64_t *offset) {
cec736d2 1831
de190aef
LP
1832 Object *d;
1833 int r;
cec736d2 1834
91a31dde
LP
1835 assert(f);
1836
de190aef 1837 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1838 if (r < 0)
de190aef 1839 return r;
cec736d2 1840
de190aef
LP
1841 return generic_array_bisect_plus_one(f,
1842 le64toh(d->data.entry_offset),
1843 le64toh(d->data.entry_array_offset),
1844 le64toh(d->data.n_entries),
1845 seqnum,
1846 test_object_seqnum,
1847 direction,
1848 ret, offset, NULL);
1849}
cec736d2 1850
de190aef
LP
1851int journal_file_move_to_entry_by_realtime_for_data(
1852 JournalFile *f,
1853 uint64_t data_offset,
1854 uint64_t realtime,
1855 direction_t direction,
1856 Object **ret, uint64_t *offset) {
1857
1858 Object *d;
1859 int r;
1860
91a31dde
LP
1861 assert(f);
1862
de190aef 1863 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1864 if (r < 0)
de190aef
LP
1865 return r;
1866
1867 return generic_array_bisect_plus_one(f,
1868 le64toh(d->data.entry_offset),
1869 le64toh(d->data.entry_array_offset),
1870 le64toh(d->data.n_entries),
1871 realtime,
1872 test_object_realtime,
1873 direction,
1874 ret, offset, NULL);
cec736d2
LP
1875}
1876
0284adc6 1877void journal_file_dump(JournalFile *f) {
7560fffc 1878 Object *o;
7560fffc 1879 int r;
0284adc6 1880 uint64_t p;
7560fffc
LP
1881
1882 assert(f);
1883
0284adc6 1884 journal_file_print_header(f);
7560fffc 1885
0284adc6
LP
1886 p = le64toh(f->header->header_size);
1887 while (p != 0) {
1888 r = journal_file_move_to_object(f, -1, p, &o);
1889 if (r < 0)
1890 goto fail;
7560fffc 1891
0284adc6 1892 switch (o->object.type) {
d98cc1f2 1893
0284adc6
LP
1894 case OBJECT_UNUSED:
1895 printf("Type: OBJECT_UNUSED\n");
1896 break;
d98cc1f2 1897
0284adc6
LP
1898 case OBJECT_DATA:
1899 printf("Type: OBJECT_DATA\n");
1900 break;
7560fffc 1901
0284adc6 1902 case OBJECT_ENTRY:
f7fab8a5 1903 printf("Type: OBJECT_ENTRY seqnum=%llu monotonic=%llu realtime=%llu\n",
0284adc6
LP
1904 (unsigned long long) le64toh(o->entry.seqnum),
1905 (unsigned long long) le64toh(o->entry.monotonic),
1906 (unsigned long long) le64toh(o->entry.realtime));
1907 break;
7560fffc 1908
0284adc6
LP
1909 case OBJECT_FIELD_HASH_TABLE:
1910 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1911 break;
7560fffc 1912
0284adc6
LP
1913 case OBJECT_DATA_HASH_TABLE:
1914 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1915 break;
7560fffc 1916
0284adc6
LP
1917 case OBJECT_ENTRY_ARRAY:
1918 printf("Type: OBJECT_ENTRY_ARRAY\n");
1919 break;
7560fffc 1920
0284adc6 1921 case OBJECT_TAG:
f7fab8a5
LP
1922 printf("Type: OBJECT_TAG seqnum=%llu epoch=%llu\n",
1923 (unsigned long long) le64toh(o->tag.seqnum),
1924 (unsigned long long) le64toh(o->tag.epoch));
0284adc6
LP
1925 break;
1926 }
7560fffc 1927
0284adc6
LP
1928 if (o->object.flags & OBJECT_COMPRESSED)
1929 printf("Flags: COMPRESSED\n");
7560fffc 1930
0284adc6
LP
1931 if (p == le64toh(f->header->tail_object_offset))
1932 p = 0;
1933 else
1934 p = p + ALIGN64(le64toh(o->object.size));
1935 }
7560fffc 1936
0284adc6
LP
1937 return;
1938fail:
1939 log_error("File corrupt");
7560fffc
LP
1940}
1941
0284adc6
LP
1942void journal_file_print_header(JournalFile *f) {
1943 char a[33], b[33], c[33];
1944 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
a1a03e30
LP
1945 struct stat st;
1946 char bytes[FORMAT_BYTES_MAX];
7560fffc
LP
1947
1948 assert(f);
7560fffc 1949
0284adc6
LP
1950 printf("File Path: %s\n"
1951 "File ID: %s\n"
1952 "Machine ID: %s\n"
1953 "Boot ID: %s\n"
1954 "Sequential Number ID: %s\n"
1955 "State: %s\n"
1956 "Compatible Flags:%s%s\n"
1957 "Incompatible Flags:%s%s\n"
1958 "Header size: %llu\n"
1959 "Arena size: %llu\n"
1960 "Data Hash Table Size: %llu\n"
1961 "Field Hash Table Size: %llu\n"
0284adc6
LP
1962 "Rotate Suggested: %s\n"
1963 "Head Sequential Number: %llu\n"
1964 "Tail Sequential Number: %llu\n"
1965 "Head Realtime Timestamp: %s\n"
3223f44f
LP
1966 "Tail Realtime Timestamp: %s\n"
1967 "Objects: %llu\n"
1968 "Entry Objects: %llu\n",
0284adc6
LP
1969 f->path,
1970 sd_id128_to_string(f->header->file_id, a),
1971 sd_id128_to_string(f->header->machine_id, b),
1972 sd_id128_to_string(f->header->boot_id, c),
1973 sd_id128_to_string(f->header->seqnum_id, c),
3223f44f
LP
1974 f->header->state == STATE_OFFLINE ? "OFFLINE" :
1975 f->header->state == STATE_ONLINE ? "ONLINE" :
1976 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
8088cbd3
LP
1977 JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
1978 (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
1979 JOURNAL_HEADER_COMPRESSED(f->header) ? " COMPRESSED" : "",
1980 (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
0284adc6
LP
1981 (unsigned long long) le64toh(f->header->header_size),
1982 (unsigned long long) le64toh(f->header->arena_size),
1983 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1984 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
fb0951b0 1985 yes_no(journal_file_rotate_suggested(f, 0)),
0284adc6
LP
1986 (unsigned long long) le64toh(f->header->head_entry_seqnum),
1987 (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1988 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
3223f44f
LP
1989 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1990 (unsigned long long) le64toh(f->header->n_objects),
1991 (unsigned long long) le64toh(f->header->n_entries));
7560fffc 1992
0284adc6
LP
1993 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1994 printf("Data Objects: %llu\n"
1995 "Data Hash Table Fill: %.1f%%\n",
1996 (unsigned long long) le64toh(f->header->n_data),
1997 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
7560fffc 1998
0284adc6
LP
1999 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2000 printf("Field Objects: %llu\n"
2001 "Field Hash Table Fill: %.1f%%\n",
2002 (unsigned long long) le64toh(f->header->n_fields),
2003 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
3223f44f
LP
2004
2005 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
2006 printf("Tag Objects: %llu\n",
2007 (unsigned long long) le64toh(f->header->n_tags));
2008 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
2009 printf("Entry Array Objects: %llu\n",
2010 (unsigned long long) le64toh(f->header->n_entry_arrays));
a1a03e30
LP
2011
2012 if (fstat(f->fd, &st) >= 0)
2013 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
7560fffc
LP
2014}
2015
0284adc6
LP
2016int journal_file_open(
2017 const char *fname,
2018 int flags,
2019 mode_t mode,
2020 bool compress,
baed47c3 2021 bool seal,
0284adc6
LP
2022 JournalMetrics *metrics,
2023 MMapCache *mmap_cache,
2024 JournalFile *template,
2025 JournalFile **ret) {
7560fffc 2026
0284adc6
LP
2027 JournalFile *f;
2028 int r;
2029 bool newly_created = false;
7560fffc 2030
0284adc6 2031 assert(fname);
0559d3a5 2032 assert(ret);
7560fffc 2033
0284adc6
LP
2034 if ((flags & O_ACCMODE) != O_RDONLY &&
2035 (flags & O_ACCMODE) != O_RDWR)
2036 return -EINVAL;
7560fffc 2037
a0108012
LP
2038 if (!endswith(fname, ".journal") &&
2039 !endswith(fname, ".journal~"))
0284adc6 2040 return -EINVAL;
7560fffc 2041
0284adc6
LP
2042 f = new0(JournalFile, 1);
2043 if (!f)
2044 return -ENOMEM;
7560fffc 2045
0284adc6
LP
2046 f->fd = -1;
2047 f->mode = mode;
7560fffc 2048
0284adc6
LP
2049 f->flags = flags;
2050 f->prot = prot_from_flags(flags);
2051 f->writable = (flags & O_ACCMODE) != O_RDONLY;
48b61739 2052#ifdef HAVE_XZ
0284adc6 2053 f->compress = compress;
48b61739 2054#endif
49a32d43 2055#ifdef HAVE_GCRYPT
baed47c3 2056 f->seal = seal;
49a32d43 2057#endif
7560fffc 2058
0284adc6
LP
2059 if (mmap_cache)
2060 f->mmap = mmap_cache_ref(mmap_cache);
2061 else {
84168d80 2062 f->mmap = mmap_cache_new();
0284adc6
LP
2063 if (!f->mmap) {
2064 r = -ENOMEM;
2065 goto fail;
2066 }
2067 }
7560fffc 2068
0284adc6
LP
2069 f->path = strdup(fname);
2070 if (!f->path) {
2071 r = -ENOMEM;
2072 goto fail;
2073 }
7560fffc 2074
0284adc6
LP
2075 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2076 if (f->fd < 0) {
2077 r = -errno;
2078 goto fail;
7560fffc 2079 }
7560fffc 2080
0284adc6
LP
2081 if (fstat(f->fd, &f->last_stat) < 0) {
2082 r = -errno;
2083 goto fail;
2084 }
7560fffc 2085
0284adc6 2086 if (f->last_stat.st_size == 0 && f->writable) {
fb0951b0
LP
2087#ifdef HAVE_XATTR
2088 uint64_t crtime;
2089
2090 /* Let's attach the creation time to the journal file,
2091 * so that the vacuuming code knows the age of this
2092 * file even if the file might end up corrupted one
2093 * day... Ideally we'd just use the creation time many
2094 * file systems maintain for each file, but there is
2095 * currently no usable API to query this, hence let's
2096 * emulate this via extended attributes. If extended
2097 * attributes are not supported we'll just skip this,
2098 * and rely solely on mtime/atime/ctime of the file.*/
2099
2100 crtime = htole64((uint64_t) now(CLOCK_REALTIME));
2101 fsetxattr(f->fd, "user.crtime_usec", &crtime, sizeof(crtime), XATTR_CREATE);
2102#endif
7560fffc 2103
feb12d3e 2104#ifdef HAVE_GCRYPT
0284adc6 2105 /* Try to load the FSPRG state, and if we can't, then
baed47c3 2106 * just don't do sealing */
49a32d43
LP
2107 if (f->seal) {
2108 r = journal_file_fss_load(f);
2109 if (r < 0)
2110 f->seal = false;
2111 }
feb12d3e 2112#endif
7560fffc 2113
0284adc6
LP
2114 r = journal_file_init_header(f, template);
2115 if (r < 0)
2116 goto fail;
7560fffc 2117
0284adc6
LP
2118 if (fstat(f->fd, &f->last_stat) < 0) {
2119 r = -errno;
2120 goto fail;
2121 }
fb0951b0
LP
2122
2123 newly_created = true;
0284adc6 2124 }
7560fffc 2125
0284adc6
LP
2126 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2127 r = -EIO;
2128 goto fail;
2129 }
7560fffc 2130
0284adc6
LP
2131 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2132 if (f->header == MAP_FAILED) {
2133 f->header = NULL;
2134 r = -errno;
2135 goto fail;
2136 }
7560fffc 2137
0284adc6
LP
2138 if (!newly_created) {
2139 r = journal_file_verify_header(f);
2140 if (r < 0)
2141 goto fail;
2142 }
7560fffc 2143
feb12d3e 2144#ifdef HAVE_GCRYPT
0284adc6 2145 if (!newly_created && f->writable) {
baed47c3 2146 r = journal_file_fss_load(f);
0284adc6
LP
2147 if (r < 0)
2148 goto fail;
2149 }
feb12d3e 2150#endif
cec736d2
LP
2151
2152 if (f->writable) {
4a92baf3
LP
2153 if (metrics) {
2154 journal_default_metrics(metrics, f->fd);
2155 f->metrics = *metrics;
2156 } else if (template)
2157 f->metrics = template->metrics;
2158
cec736d2
LP
2159 r = journal_file_refresh_header(f);
2160 if (r < 0)
2161 goto fail;
2162 }
2163
feb12d3e 2164#ifdef HAVE_GCRYPT
baed47c3 2165 r = journal_file_hmac_setup(f);
14d10188
LP
2166 if (r < 0)
2167 goto fail;
feb12d3e 2168#endif
14d10188 2169
cec736d2 2170 if (newly_created) {
de190aef 2171 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
2172 if (r < 0)
2173 goto fail;
2174
de190aef 2175 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
2176 if (r < 0)
2177 goto fail;
7560fffc 2178
feb12d3e 2179#ifdef HAVE_GCRYPT
7560fffc
LP
2180 r = journal_file_append_first_tag(f);
2181 if (r < 0)
2182 goto fail;
feb12d3e 2183#endif
cec736d2
LP
2184 }
2185
de190aef 2186 r = journal_file_map_field_hash_table(f);
cec736d2
LP
2187 if (r < 0)
2188 goto fail;
2189
de190aef 2190 r = journal_file_map_data_hash_table(f);
cec736d2
LP
2191 if (r < 0)
2192 goto fail;
2193
0559d3a5 2194 *ret = f;
cec736d2
LP
2195 return 0;
2196
2197fail:
2198 journal_file_close(f);
2199
2200 return r;
2201}
0ac38b70 2202
baed47c3 2203int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
0ac38b70
LP
2204 char *p;
2205 size_t l;
2206 JournalFile *old_file, *new_file = NULL;
2207 int r;
2208
2209 assert(f);
2210 assert(*f);
2211
2212 old_file = *f;
2213
2214 if (!old_file->writable)
2215 return -EINVAL;
2216
2217 if (!endswith(old_file->path, ".journal"))
2218 return -EINVAL;
2219
2220 l = strlen(old_file->path);
2221
9447a7f1 2222 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
0ac38b70
LP
2223 if (!p)
2224 return -ENOMEM;
2225
2226 memcpy(p, old_file->path, l - 8);
2227 p[l-8] = '@';
2228 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2229 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2230 "-%016llx-%016llx.journal",
fb0951b0
LP
2231 (unsigned long long) le64toh((*f)->header->head_entry_seqnum),
2232 (unsigned long long) le64toh((*f)->header->head_entry_realtime));
0ac38b70
LP
2233
2234 r = rename(old_file->path, p);
2235 free(p);
2236
2237 if (r < 0)
2238 return -errno;
2239
ccdbaf91 2240 old_file->header->state = STATE_ARCHIVED;
0ac38b70 2241
baed47c3 2242 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
0ac38b70
LP
2243 journal_file_close(old_file);
2244
2245 *f = new_file;
2246 return r;
2247}
2248
9447a7f1
LP
2249int journal_file_open_reliably(
2250 const char *fname,
2251 int flags,
2252 mode_t mode,
7560fffc 2253 bool compress,
baed47c3 2254 bool seal,
4a92baf3 2255 JournalMetrics *metrics,
27370278 2256 MMapCache *mmap_cache,
9447a7f1
LP
2257 JournalFile *template,
2258 JournalFile **ret) {
2259
2260 int r;
2261 size_t l;
2262 char *p;
2263
baed47c3 2264 r = journal_file_open(fname, flags, mode, compress, seal,
27370278 2265 metrics, mmap_cache, template, ret);
0071d9f1
LP
2266 if (r != -EBADMSG && /* corrupted */
2267 r != -ENODATA && /* truncated */
2268 r != -EHOSTDOWN && /* other machine */
a1a1898f
LP
2269 r != -EPROTONOSUPPORT && /* incompatible feature */
2270 r != -EBUSY && /* unclean shutdown */
2271 r != -ESHUTDOWN /* already archived */)
9447a7f1
LP
2272 return r;
2273
2274 if ((flags & O_ACCMODE) == O_RDONLY)
2275 return r;
2276
2277 if (!(flags & O_CREAT))
2278 return r;
2279
7560fffc
LP
2280 if (!endswith(fname, ".journal"))
2281 return r;
2282
5c70eab4
LP
2283 /* The file is corrupted. Rotate it away and try it again (but only once) */
2284
9447a7f1
LP
2285 l = strlen(fname);
2286 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2287 (int) (l-8), fname,
2288 (unsigned long long) now(CLOCK_REALTIME),
2289 random_ull()) < 0)
2290 return -ENOMEM;
2291
2292 r = rename(fname, p);
2293 free(p);
2294 if (r < 0)
2295 return -errno;
2296
a1a1898f 2297 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
9447a7f1 2298
baed47c3 2299 return journal_file_open(fname, flags, mode, compress, seal,
27370278 2300 metrics, mmap_cache, template, ret);
9447a7f1
LP
2301}
2302
cf244689
LP
2303
2304int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2305 uint64_t i, n;
2306 uint64_t q, xor_hash = 0;
2307 int r;
2308 EntryItem *items;
2309 dual_timestamp ts;
2310
2311 assert(from);
2312 assert(to);
2313 assert(o);
2314 assert(p);
2315
2316 if (!to->writable)
2317 return -EPERM;
2318
2319 ts.monotonic = le64toh(o->entry.monotonic);
2320 ts.realtime = le64toh(o->entry.realtime);
2321
2322 if (to->tail_entry_monotonic_valid &&
2323 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2324 return -EINVAL;
2325
cf244689
LP
2326 n = journal_file_entry_n_items(o);
2327 items = alloca(sizeof(EntryItem) * n);
2328
2329 for (i = 0; i < n; i++) {
4fd052ae
FC
2330 uint64_t l, h;
2331 le64_t le_hash;
cf244689
LP
2332 size_t t;
2333 void *data;
2334 Object *u;
2335
2336 q = le64toh(o->entry.items[i].object_offset);
2337 le_hash = o->entry.items[i].hash;
2338
2339 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2340 if (r < 0)
2341 return r;
2342
2343 if (le_hash != o->data.hash)
2344 return -EBADMSG;
2345
2346 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2347 t = (size_t) l;
2348
2349 /* We hit the limit on 32bit machines */
2350 if ((uint64_t) t != l)
2351 return -E2BIG;
2352
2353 if (o->object.flags & OBJECT_COMPRESSED) {
2354#ifdef HAVE_XZ
2355 uint64_t rsize;
2356
2357 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2358 return -EBADMSG;
2359
2360 data = from->compress_buffer;
2361 l = rsize;
2362#else
2363 return -EPROTONOSUPPORT;
2364#endif
2365 } else
2366 data = o->data.payload;
2367
2368 r = journal_file_append_data(to, data, l, &u, &h);
2369 if (r < 0)
2370 return r;
2371
2372 xor_hash ^= le64toh(u->data.hash);
2373 items[i].object_offset = htole64(h);
2374 items[i].hash = u->data.hash;
2375
2376 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2377 if (r < 0)
2378 return r;
2379 }
2380
2381 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2382}
babfc091
LP
2383
2384void journal_default_metrics(JournalMetrics *m, int fd) {
2385 uint64_t fs_size = 0;
2386 struct statvfs ss;
a7bc2c2a 2387 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2388
2389 assert(m);
2390 assert(fd >= 0);
2391
2392 if (fstatvfs(fd, &ss) >= 0)
2393 fs_size = ss.f_frsize * ss.f_blocks;
2394
2395 if (m->max_use == (uint64_t) -1) {
2396
2397 if (fs_size > 0) {
2398 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2399
2400 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2401 m->max_use = DEFAULT_MAX_USE_UPPER;
2402
2403 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2404 m->max_use = DEFAULT_MAX_USE_LOWER;
2405 } else
2406 m->max_use = DEFAULT_MAX_USE_LOWER;
2407 } else {
2408 m->max_use = PAGE_ALIGN(m->max_use);
2409
2410 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2411 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2412 }
2413
2414 if (m->max_size == (uint64_t) -1) {
2415 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2416
2417 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2418 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2419 } else
2420 m->max_size = PAGE_ALIGN(m->max_size);
2421
2422 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2423 m->max_size = JOURNAL_FILE_SIZE_MIN;
2424
2425 if (m->max_size*2 > m->max_use)
2426 m->max_use = m->max_size*2;
2427
2428 if (m->min_size == (uint64_t) -1)
2429 m->min_size = JOURNAL_FILE_SIZE_MIN;
2430 else {
2431 m->min_size = PAGE_ALIGN(m->min_size);
2432
2433 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2434 m->min_size = JOURNAL_FILE_SIZE_MIN;
2435
2436 if (m->min_size > m->max_size)
2437 m->max_size = m->min_size;
2438 }
2439
2440 if (m->keep_free == (uint64_t) -1) {
2441
2442 if (fs_size > 0) {
2443 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2444
2445 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2446 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2447
2448 } else
2449 m->keep_free = DEFAULT_KEEP_FREE;
2450 }
2451
2b43f939
LP
2452 log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2453 format_bytes(a, sizeof(a), m->max_use),
2454 format_bytes(b, sizeof(b), m->max_size),
2455 format_bytes(c, sizeof(c), m->min_size),
2456 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2457}
08984293
LP
2458
2459int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
08984293
LP
2460 assert(f);
2461 assert(from || to);
2462
2463 if (from) {
162566a4
LP
2464 if (f->header->head_entry_realtime == 0)
2465 return -ENOENT;
08984293 2466
162566a4 2467 *from = le64toh(f->header->head_entry_realtime);
08984293
LP
2468 }
2469
2470 if (to) {
162566a4
LP
2471 if (f->header->tail_entry_realtime == 0)
2472 return -ENOENT;
08984293 2473
162566a4 2474 *to = le64toh(f->header->tail_entry_realtime);
08984293
LP
2475 }
2476
2477 return 1;
2478}
2479
2480int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2481 char t[9+32+1] = "_BOOT_ID=";
2482 Object *o;
2483 uint64_t p;
2484 int r;
2485
2486 assert(f);
2487 assert(from || to);
2488
2489 sd_id128_to_string(boot_id, t + 9);
2490
2491 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2492 if (r <= 0)
2493 return r;
2494
2495 if (le64toh(o->data.n_entries) <= 0)
2496 return 0;
2497
2498 if (from) {
2499 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2500 if (r < 0)
2501 return r;
2502
2503 *from = le64toh(o->entry.monotonic);
2504 }
2505
2506 if (to) {
2507 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2508 if (r < 0)
2509 return r;
2510
2511 r = generic_array_get_plus_one(f,
2512 le64toh(o->data.entry_offset),
2513 le64toh(o->data.entry_array_offset),
2514 le64toh(o->data.n_entries)-1,
2515 &o, NULL);
2516 if (r <= 0)
2517 return r;
2518
2519 *to = le64toh(o->entry.monotonic);
2520 }
2521
2522 return 1;
2523}
dca6219e 2524
fb0951b0 2525bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
dca6219e
LP
2526 assert(f);
2527
2528 /* If we gained new header fields we gained new features,
2529 * hence suggest a rotation */
361f9cbc
LP
2530 if (le64toh(f->header->header_size) < sizeof(Header)) {
2531 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
dca6219e 2532 return true;
361f9cbc 2533 }
dca6219e
LP
2534
2535 /* Let's check if the hash tables grew over a certain fill
2536 * level (75%, borrowing this value from Java's hash table
2537 * implementation), and if so suggest a rotation. To calculate
2538 * the fill level we need the n_data field, which only exists
2539 * in newer versions. */
2540
2541 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
361f9cbc
LP
2542 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2543 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2544 f->path,
2545 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2546 (unsigned long long) le64toh(f->header->n_data),
2547 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2548 (unsigned long long) (f->last_stat.st_size),
2549 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
dca6219e 2550 return true;
361f9cbc 2551 }
dca6219e
LP
2552
2553 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
361f9cbc
LP
2554 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2555 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2556 f->path,
2557 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2558 (unsigned long long) le64toh(f->header->n_fields),
2559 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
dca6219e 2560 return true;
361f9cbc 2561 }
dca6219e 2562
fb0951b0
LP
2563 if (max_file_usec > 0) {
2564 usec_t t, h;
2565
2566 h = le64toh(f->header->head_entry_realtime);
2567 t = now(CLOCK_REALTIME);
2568
2569 if (h > 0 && t > h + max_file_usec)
2570 return true;
2571 }
2572
dca6219e
LP
2573 return false;
2574}