]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
journal: fix tag ordering check
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
30#include "journal-def.h"
31#include "journal-file.h"
0284adc6 32#include "journal-authenticate.h"
cec736d2 33#include "lookup3.h"
807e17f0 34#include "compress.h"
7560fffc 35#include "fsprg.h"
cec736d2 36
4a92baf3
LP
37#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
cec736d2 39
be19b7df 40#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 41
babfc091 42/* This is the minimum journal file size */
b47ffcfd 43#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
babfc091
LP
44
45/* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
49
50/* This is the upper bound if we deduce max_size from max_use */
71100051 51#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
52
53/* This is the upper bound if we deduce the keep_free value from the
54 * file system size */
55#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57/* This is the keep_free value when we can't determine the system
58 * size */
59#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
60
dca6219e
LP
61/* n_data was the first entry we added after the initial file format design */
62#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
cec736d2 63
cec736d2 64void journal_file_close(JournalFile *f) {
de190aef 65 assert(f);
cec736d2 66
b0af6f41 67 /* Write the final tag */
c586dbf1 68 if (f->seal && f->writable)
b0af6f41
LP
69 journal_file_append_tag(f);
70
7560fffc 71 /* Sync everything to disk, before we mark the file offline */
16e9f408
LP
72 if (f->mmap && f->fd >= 0)
73 mmap_cache_close_fd(f->mmap, f->fd);
7560fffc
LP
74
75 if (f->writable && f->fd >= 0)
76 fdatasync(f->fd);
77
d384c7a8 78 if (f->header) {
cd96b3b8
LP
79 /* Mark the file offline. Don't override the archived state if it already is set */
80 if (f->writable && f->header->state == STATE_ONLINE)
d384c7a8 81 f->header->state = STATE_OFFLINE;
cec736d2 82
d384c7a8
MS
83 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
84 }
cec736d2 85
0ac38b70
LP
86 if (f->fd >= 0)
87 close_nointr_nofail(f->fd);
88
cec736d2 89 free(f->path);
807e17f0 90
16e9f408
LP
91 if (f->mmap)
92 mmap_cache_unref(f->mmap);
93
807e17f0
LP
94#ifdef HAVE_XZ
95 free(f->compress_buffer);
96#endif
97
7560fffc 98#ifdef HAVE_GCRYPT
baed47c3
LP
99 if (f->fss_file)
100 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
b7c9ae91
LP
101 else if (f->fsprg_state)
102 free(f->fsprg_state);
103
104 free(f->fsprg_seed);
7560fffc
LP
105
106 if (f->hmac)
107 gcry_md_close(f->hmac);
108#endif
109
cec736d2
LP
110 free(f);
111}
112
0ac38b70 113static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
114 Header h;
115 ssize_t k;
116 int r;
117
118 assert(f);
119
120 zero(h);
7560fffc 121 memcpy(h.signature, HEADER_SIGNATURE, 8);
23b0b2b2 122 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2 123
7560fffc
LP
124 h.incompatible_flags =
125 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
126
127 h.compatible_flags =
baed47c3 128 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
7560fffc 129
cec736d2
LP
130 r = sd_id128_randomize(&h.file_id);
131 if (r < 0)
132 return r;
133
0ac38b70
LP
134 if (template) {
135 h.seqnum_id = template->header->seqnum_id;
beec0085 136 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
0ac38b70
LP
137 } else
138 h.seqnum_id = h.file_id;
cec736d2
LP
139
140 k = pwrite(f->fd, &h, sizeof(h), 0);
141 if (k < 0)
142 return -errno;
143
144 if (k != sizeof(h))
145 return -EIO;
146
147 return 0;
148}
149
150static int journal_file_refresh_header(JournalFile *f) {
151 int r;
de190aef 152 sd_id128_t boot_id;
cec736d2
LP
153
154 assert(f);
155
156 r = sd_id128_get_machine(&f->header->machine_id);
157 if (r < 0)
158 return r;
159
de190aef 160 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
161 if (r < 0)
162 return r;
163
de190aef
LP
164 if (sd_id128_equal(boot_id, f->header->boot_id))
165 f->tail_entry_monotonic_valid = true;
166
167 f->header->boot_id = boot_id;
168
169 f->header->state = STATE_ONLINE;
b788cc23 170
7560fffc
LP
171 /* Sync the online state to disk */
172 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
173 fdatasync(f->fd);
b788cc23 174
cec736d2
LP
175 return 0;
176}
177
178static int journal_file_verify_header(JournalFile *f) {
179 assert(f);
180
7560fffc 181 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
cec736d2
LP
182 return -EBADMSG;
183
7560fffc
LP
184 /* In both read and write mode we refuse to open files with
185 * incompatible flags we don't know */
807e17f0 186#ifdef HAVE_XZ
7560fffc 187 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
807e17f0
LP
188 return -EPROTONOSUPPORT;
189#else
cec736d2
LP
190 if (f->header->incompatible_flags != 0)
191 return -EPROTONOSUPPORT;
807e17f0 192#endif
cec736d2 193
7560fffc
LP
194 /* When open for writing we refuse to open files with
195 * compatible flags, too */
196 if (f->writable) {
197#ifdef HAVE_GCRYPT
baed47c3 198 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
7560fffc
LP
199 return -EPROTONOSUPPORT;
200#else
201 if (f->header->compatible_flags != 0)
202 return -EPROTONOSUPPORT;
203#endif
204 }
205
db11ac1a
LP
206 if (f->header->state >= _STATE_MAX)
207 return -EBADMSG;
208
dca6219e
LP
209 /* The first addition was n_data, so check that we are at least this large */
210 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
23b0b2b2
LP
211 return -EBADMSG;
212
baed47c3
LP
213 if ((le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_SEALED) &&
214 !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
beec0085
LP
215 return -EBADMSG;
216
db11ac1a
LP
217 if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
218 return -ENODATA;
219
220 if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
221 return -ENODATA;
222
223 if (!VALID64(f->header->data_hash_table_offset) ||
224 !VALID64(f->header->field_hash_table_offset) ||
225 !VALID64(f->header->tail_object_offset) ||
226 !VALID64(f->header->entry_array_offset))
cec736d2
LP
227 return -ENODATA;
228
229 if (f->writable) {
ccdbaf91 230 uint8_t state;
cec736d2
LP
231 sd_id128_t machine_id;
232 int r;
233
234 r = sd_id128_get_machine(&machine_id);
235 if (r < 0)
236 return r;
237
238 if (!sd_id128_equal(machine_id, f->header->machine_id))
239 return -EHOSTDOWN;
240
de190aef 241 state = f->header->state;
cec736d2 242
71fa6f00
LP
243 if (state == STATE_ONLINE) {
244 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
245 return -EBUSY;
246 } else if (state == STATE_ARCHIVED)
cec736d2 247 return -ESHUTDOWN;
71fa6f00
LP
248 else if (state != STATE_OFFLINE) {
249 log_debug("Journal file %s has unknown state %u.", f->path, state);
250 return -EBUSY;
251 }
cec736d2
LP
252 }
253
7560fffc 254 f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
c586dbf1
LP
255
256 if (f->writable)
257 f->seal = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_SEALED);
7560fffc 258
cec736d2
LP
259 return 0;
260}
261
262static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
cec736d2 263 uint64_t old_size, new_size;
fec2aa2f 264 int r;
cec736d2
LP
265
266 assert(f);
267
cec736d2 268 /* We assume that this file is not sparse, and we know that
38ac38b2 269 * for sure, since we always call posix_fallocate()
cec736d2
LP
270 * ourselves */
271
272 old_size =
23b0b2b2 273 le64toh(f->header->header_size) +
cec736d2
LP
274 le64toh(f->header->arena_size);
275
bc85bfee 276 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
277 if (new_size < le64toh(f->header->header_size))
278 new_size = le64toh(f->header->header_size);
bc85bfee
LP
279
280 if (new_size <= old_size)
cec736d2
LP
281 return 0;
282
bc85bfee
LP
283 if (f->metrics.max_size > 0 &&
284 new_size > f->metrics.max_size)
285 return -E2BIG;
cec736d2 286
bc85bfee
LP
287 if (new_size > f->metrics.min_size &&
288 f->metrics.keep_free > 0) {
cec736d2
LP
289 struct statvfs svfs;
290
291 if (fstatvfs(f->fd, &svfs) >= 0) {
292 uint64_t available;
293
294 available = svfs.f_bfree * svfs.f_bsize;
295
bc85bfee
LP
296 if (available >= f->metrics.keep_free)
297 available -= f->metrics.keep_free;
cec736d2
LP
298 else
299 available = 0;
300
301 if (new_size - old_size > available)
302 return -E2BIG;
303 }
304 }
305
bc85bfee
LP
306 /* Note that the glibc fallocate() fallback is very
307 inefficient, hence we try to minimize the allocation area
308 as we can. */
fec2aa2f
GV
309 r = posix_fallocate(f->fd, old_size, new_size - old_size);
310 if (r != 0)
311 return -r;
cec736d2 312
f65425cb
LP
313 mmap_cache_close_fd_range(f->mmap, f->fd, old_size);
314
cec736d2
LP
315 if (fstat(f->fd, &f->last_stat) < 0)
316 return -errno;
317
23b0b2b2 318 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
319
320 return 0;
321}
322
16e9f408 323static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, uint64_t size, void **ret) {
cec736d2 324 assert(f);
cec736d2
LP
325 assert(ret);
326
2a59ea54 327 /* Avoid SIGBUS on invalid accesses */
4bbdcdb3
LP
328 if (offset + size > (uint64_t) f->last_stat.st_size) {
329 /* Hmm, out of range? Let's refresh the fstat() data
330 * first, before we trust that check. */
331
332 if (fstat(f->fd, &f->last_stat) < 0 ||
333 offset + size > (uint64_t) f->last_stat.st_size)
334 return -EADDRNOTAVAIL;
335 }
336
16e9f408 337 return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
cec736d2
LP
338}
339
16e9f408
LP
340static uint64_t minimum_header_size(Object *o) {
341
342 static uint64_t table[] = {
343 [OBJECT_DATA] = sizeof(DataObject),
344 [OBJECT_FIELD] = sizeof(FieldObject),
345 [OBJECT_ENTRY] = sizeof(EntryObject),
346 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
347 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
348 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
349 [OBJECT_TAG] = sizeof(TagObject),
350 };
351
352 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
353 return sizeof(ObjectHeader);
354
355 return table[o->object.type];
356}
357
de190aef 358int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
359 int r;
360 void *t;
361 Object *o;
362 uint64_t s;
16e9f408 363 unsigned context;
cec736d2
LP
364
365 assert(f);
366 assert(ret);
367
db11ac1a
LP
368 /* Objects may only be located at multiple of 64 bit */
369 if (!VALID64(offset))
370 return -EFAULT;
371
16e9f408
LP
372 /* One context for each type, plus one catch-all for the rest */
373 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
374
375 r = journal_file_move_to(f, context, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
376 if (r < 0)
377 return r;
378
379 o = (Object*) t;
380 s = le64toh(o->object.size);
381
382 if (s < sizeof(ObjectHeader))
383 return -EBADMSG;
384
16e9f408
LP
385 if (o->object.type <= OBJECT_UNUSED)
386 return -EBADMSG;
387
388 if (s < minimum_header_size(o))
389 return -EBADMSG;
390
de190aef 391 if (type >= 0 && o->object.type != type)
cec736d2
LP
392 return -EBADMSG;
393
394 if (s > sizeof(ObjectHeader)) {
de190aef 395 r = journal_file_move_to(f, o->object.type, offset, s, &t);
cec736d2
LP
396 if (r < 0)
397 return r;
398
399 o = (Object*) t;
400 }
401
cec736d2
LP
402 *ret = o;
403 return 0;
404}
405
d98cc1f2 406static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
407 uint64_t r;
408
409 assert(f);
410
beec0085 411 r = le64toh(f->header->tail_entry_seqnum) + 1;
c2373f84
LP
412
413 if (seqnum) {
de190aef 414 /* If an external seqnum counter was passed, we update
c2373f84
LP
415 * both the local and the external one, and set it to
416 * the maximum of both */
417
418 if (*seqnum + 1 > r)
419 r = *seqnum + 1;
420
421 *seqnum = r;
422 }
423
beec0085 424 f->header->tail_entry_seqnum = htole64(r);
cec736d2 425
beec0085
LP
426 if (f->header->head_entry_seqnum == 0)
427 f->header->head_entry_seqnum = htole64(r);
de190aef 428
cec736d2
LP
429 return r;
430}
431
0284adc6 432int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
433 int r;
434 uint64_t p;
435 Object *tail, *o;
436 void *t;
437
438 assert(f);
16e9f408 439 assert(type > 0 && type < _OBJECT_TYPE_MAX);
cec736d2
LP
440 assert(size >= sizeof(ObjectHeader));
441 assert(offset);
442 assert(ret);
443
444 p = le64toh(f->header->tail_object_offset);
cec736d2 445 if (p == 0)
23b0b2b2 446 p = le64toh(f->header->header_size);
cec736d2 447 else {
de190aef 448 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
449 if (r < 0)
450 return r;
451
452 p += ALIGN64(le64toh(tail->object.size));
453 }
454
455 r = journal_file_allocate(f, p, size);
456 if (r < 0)
457 return r;
458
de190aef 459 r = journal_file_move_to(f, type, p, size, &t);
cec736d2
LP
460 if (r < 0)
461 return r;
462
463 o = (Object*) t;
464
465 zero(o->object);
de190aef 466 o->object.type = type;
cec736d2
LP
467 o->object.size = htole64(size);
468
469 f->header->tail_object_offset = htole64(p);
cec736d2
LP
470 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
471
472 *ret = o;
473 *offset = p;
474
475 return 0;
476}
477
de190aef 478static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
479 uint64_t s, p;
480 Object *o;
481 int r;
482
483 assert(f);
484
dfabe643 485 /* We estimate that we need 1 hash table entry per 768 of
4a92baf3
LP
486 journal file and we want to make sure we never get beyond
487 75% fill level. Calculate the hash table size for the
488 maximum file size based on these metrics. */
489
dfabe643 490 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
4a92baf3
LP
491 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
492 s = DEFAULT_DATA_HASH_TABLE_SIZE;
493
dfabe643 494 log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
4a92baf3 495
de190aef
LP
496 r = journal_file_append_object(f,
497 OBJECT_DATA_HASH_TABLE,
498 offsetof(Object, hash_table.items) + s,
499 &o, &p);
cec736d2
LP
500 if (r < 0)
501 return r;
502
de190aef 503 memset(o->hash_table.items, 0, s);
cec736d2 504
de190aef
LP
505 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
506 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
507
508 return 0;
509}
510
de190aef 511static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
512 uint64_t s, p;
513 Object *o;
514 int r;
515
516 assert(f);
517
de190aef
LP
518 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
519 r = journal_file_append_object(f,
520 OBJECT_FIELD_HASH_TABLE,
521 offsetof(Object, hash_table.items) + s,
522 &o, &p);
cec736d2
LP
523 if (r < 0)
524 return r;
525
de190aef 526 memset(o->hash_table.items, 0, s);
cec736d2 527
de190aef
LP
528 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
529 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
530
531 return 0;
532}
533
de190aef 534static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
535 uint64_t s, p;
536 void *t;
537 int r;
538
539 assert(f);
540
de190aef
LP
541 p = le64toh(f->header->data_hash_table_offset);
542 s = le64toh(f->header->data_hash_table_size);
cec736d2 543
de190aef 544 r = journal_file_move_to(f,
16e9f408 545 OBJECT_DATA_HASH_TABLE,
de190aef
LP
546 p, s,
547 &t);
cec736d2
LP
548 if (r < 0)
549 return r;
550
de190aef 551 f->data_hash_table = t;
cec736d2
LP
552 return 0;
553}
554
de190aef 555static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
556 uint64_t s, p;
557 void *t;
558 int r;
559
560 assert(f);
561
de190aef
LP
562 p = le64toh(f->header->field_hash_table_offset);
563 s = le64toh(f->header->field_hash_table_size);
cec736d2 564
de190aef 565 r = journal_file_move_to(f,
16e9f408 566 OBJECT_FIELD_HASH_TABLE,
de190aef
LP
567 p, s,
568 &t);
cec736d2
LP
569 if (r < 0)
570 return r;
571
de190aef 572 f->field_hash_table = t;
cec736d2
LP
573 return 0;
574}
575
de190aef
LP
576static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
577 uint64_t p, h;
cec736d2
LP
578 int r;
579
580 assert(f);
581 assert(o);
582 assert(offset > 0);
de190aef 583 assert(o->object.type == OBJECT_DATA);
cec736d2 584
48496df6
LP
585 /* This might alter the window we are looking at */
586
de190aef
LP
587 o->data.next_hash_offset = o->data.next_field_offset = 0;
588 o->data.entry_offset = o->data.entry_array_offset = 0;
589 o->data.n_entries = 0;
cec736d2 590
de190aef 591 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
8db4213e 592 p = le64toh(f->data_hash_table[h].tail_hash_offset);
cec736d2
LP
593 if (p == 0) {
594 /* Only entry in the hash table is easy */
de190aef 595 f->data_hash_table[h].head_hash_offset = htole64(offset);
cec736d2 596 } else {
48496df6
LP
597 /* Move back to the previous data object, to patch in
598 * pointer */
cec736d2 599
de190aef 600 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
601 if (r < 0)
602 return r;
603
de190aef 604 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
605 }
606
de190aef 607 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2 608
dca6219e
LP
609 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
610 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
611
cec736d2
LP
612 return 0;
613}
614
de190aef
LP
615int journal_file_find_data_object_with_hash(
616 JournalFile *f,
617 const void *data, uint64_t size, uint64_t hash,
618 Object **ret, uint64_t *offset) {
48496df6 619
de190aef 620 uint64_t p, osize, h;
cec736d2
LP
621 int r;
622
623 assert(f);
624 assert(data || size == 0);
625
626 osize = offsetof(Object, data.payload) + size;
627
bc85bfee
LP
628 if (f->header->data_hash_table_size == 0)
629 return -EBADMSG;
630
de190aef
LP
631 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
632 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 633
de190aef
LP
634 while (p > 0) {
635 Object *o;
cec736d2 636
de190aef 637 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
638 if (r < 0)
639 return r;
640
807e17f0 641 if (le64toh(o->data.hash) != hash)
85a131e8 642 goto next;
807e17f0
LP
643
644 if (o->object.flags & OBJECT_COMPRESSED) {
645#ifdef HAVE_XZ
b785c858 646 uint64_t l, rsize;
cec736d2 647
807e17f0
LP
648 l = le64toh(o->object.size);
649 if (l <= offsetof(Object, data.payload))
cec736d2
LP
650 return -EBADMSG;
651
807e17f0
LP
652 l -= offsetof(Object, data.payload);
653
654 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
655 return -EBADMSG;
656
b785c858 657 if (rsize == size &&
807e17f0
LP
658 memcmp(f->compress_buffer, data, size) == 0) {
659
660 if (ret)
661 *ret = o;
662
663 if (offset)
664 *offset = p;
665
666 return 1;
667 }
668#else
669 return -EPROTONOSUPPORT;
670#endif
671
672 } else if (le64toh(o->object.size) == osize &&
673 memcmp(o->data.payload, data, size) == 0) {
674
cec736d2
LP
675 if (ret)
676 *ret = o;
677
678 if (offset)
679 *offset = p;
680
de190aef 681 return 1;
cec736d2
LP
682 }
683
85a131e8 684 next:
cec736d2
LP
685 p = le64toh(o->data.next_hash_offset);
686 }
687
de190aef
LP
688 return 0;
689}
690
691int journal_file_find_data_object(
692 JournalFile *f,
693 const void *data, uint64_t size,
694 Object **ret, uint64_t *offset) {
695
696 uint64_t hash;
697
698 assert(f);
699 assert(data || size == 0);
700
701 hash = hash64(data, size);
702
703 return journal_file_find_data_object_with_hash(f,
704 data, size, hash,
705 ret, offset);
706}
707
48496df6
LP
708static int journal_file_append_data(
709 JournalFile *f,
710 const void *data, uint64_t size,
711 Object **ret, uint64_t *offset) {
712
de190aef
LP
713 uint64_t hash, p;
714 uint64_t osize;
715 Object *o;
716 int r;
807e17f0 717 bool compressed = false;
de190aef
LP
718
719 assert(f);
720 assert(data || size == 0);
721
722 hash = hash64(data, size);
723
724 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
725 if (r < 0)
726 return r;
727 else if (r > 0) {
728
729 if (ret)
730 *ret = o;
731
732 if (offset)
733 *offset = p;
734
735 return 0;
736 }
737
738 osize = offsetof(Object, data.payload) + size;
739 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
740 if (r < 0)
741 return r;
742
cec736d2 743 o->data.hash = htole64(hash);
807e17f0
LP
744
745#ifdef HAVE_XZ
746 if (f->compress &&
747 size >= COMPRESSION_SIZE_THRESHOLD) {
748 uint64_t rsize;
749
750 compressed = compress_blob(data, size, o->data.payload, &rsize);
751
752 if (compressed) {
753 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
754 o->object.flags |= OBJECT_COMPRESSED;
755
807e17f0
LP
756 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
757 }
758 }
759#endif
760
64825d3c 761 if (!compressed && size > 0)
807e17f0 762 memcpy(o->data.payload, data, size);
cec736d2 763
de190aef 764 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
765 if (r < 0)
766 return r;
767
b0af6f41
LP
768 r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
769 if (r < 0)
770 return r;
771
48496df6
LP
772 /* The linking might have altered the window, so let's
773 * refresh our pointer */
774 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
775 if (r < 0)
776 return r;
777
cec736d2
LP
778 if (ret)
779 *ret = o;
780
781 if (offset)
de190aef 782 *offset = p;
cec736d2
LP
783
784 return 0;
785}
786
787uint64_t journal_file_entry_n_items(Object *o) {
788 assert(o);
7be3aa17 789 assert(o->object.type == OBJECT_ENTRY);
cec736d2
LP
790
791 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
792}
793
0284adc6 794uint64_t journal_file_entry_array_n_items(Object *o) {
de190aef 795 assert(o);
7be3aa17 796 assert(o->object.type == OBJECT_ENTRY_ARRAY);
de190aef
LP
797
798 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
799}
800
fb9a24b6
LP
801uint64_t journal_file_hash_table_n_items(Object *o) {
802 assert(o);
803 assert(o->object.type == OBJECT_DATA_HASH_TABLE ||
804 o->object.type == OBJECT_FIELD_HASH_TABLE);
805
806 return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
807}
808
de190aef 809static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
810 le64_t *first,
811 le64_t *idx,
de190aef 812 uint64_t p) {
cec736d2 813 int r;
de190aef
LP
814 uint64_t n = 0, ap = 0, q, i, a, hidx;
815 Object *o;
816
cec736d2 817 assert(f);
de190aef
LP
818 assert(first);
819 assert(idx);
820 assert(p > 0);
cec736d2 821
de190aef
LP
822 a = le64toh(*first);
823 i = hidx = le64toh(*idx);
824 while (a > 0) {
825
826 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
827 if (r < 0)
828 return r;
cec736d2 829
de190aef
LP
830 n = journal_file_entry_array_n_items(o);
831 if (i < n) {
832 o->entry_array.items[i] = htole64(p);
833 *idx = htole64(hidx + 1);
834 return 0;
835 }
cec736d2 836
de190aef
LP
837 i -= n;
838 ap = a;
839 a = le64toh(o->entry_array.next_entry_array_offset);
840 }
841
842 if (hidx > n)
843 n = (hidx+1) * 2;
844 else
845 n = n * 2;
846
847 if (n < 4)
848 n = 4;
849
850 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
851 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
852 &o, &q);
cec736d2
LP
853 if (r < 0)
854 return r;
855
b0af6f41
LP
856 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
857 if (r < 0)
858 return r;
859
de190aef 860 o->entry_array.items[i] = htole64(p);
cec736d2 861
de190aef 862 if (ap == 0)
7be3aa17 863 *first = htole64(q);
cec736d2 864 else {
de190aef 865 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
866 if (r < 0)
867 return r;
868
de190aef
LP
869 o->entry_array.next_entry_array_offset = htole64(q);
870 }
cec736d2 871
2dee23eb
LP
872 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
873 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
874
de190aef
LP
875 *idx = htole64(hidx + 1);
876
877 return 0;
878}
cec736d2 879
de190aef 880static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
881 le64_t *extra,
882 le64_t *first,
883 le64_t *idx,
de190aef
LP
884 uint64_t p) {
885
886 int r;
887
888 assert(f);
889 assert(extra);
890 assert(first);
891 assert(idx);
892 assert(p > 0);
893
894 if (*idx == 0)
895 *extra = htole64(p);
896 else {
4fd052ae 897 le64_t i;
de190aef 898
7be3aa17 899 i = htole64(le64toh(*idx) - 1);
de190aef
LP
900 r = link_entry_into_array(f, first, &i, p);
901 if (r < 0)
902 return r;
cec736d2
LP
903 }
904
de190aef
LP
905 *idx = htole64(le64toh(*idx) + 1);
906 return 0;
907}
908
909static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
910 uint64_t p;
911 int r;
912 assert(f);
913 assert(o);
914 assert(offset > 0);
915
916 p = le64toh(o->entry.items[i].object_offset);
917 if (p == 0)
918 return -EINVAL;
919
920 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
921 if (r < 0)
922 return r;
923
de190aef
LP
924 return link_entry_into_array_plus_one(f,
925 &o->data.entry_offset,
926 &o->data.entry_array_offset,
927 &o->data.n_entries,
928 offset);
cec736d2
LP
929}
930
931static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 932 uint64_t n, i;
cec736d2
LP
933 int r;
934
935 assert(f);
936 assert(o);
937 assert(offset > 0);
de190aef 938 assert(o->object.type == OBJECT_ENTRY);
cec736d2 939
b788cc23
LP
940 __sync_synchronize();
941
cec736d2 942 /* Link up the entry itself */
de190aef
LP
943 r = link_entry_into_array(f,
944 &f->header->entry_array_offset,
945 &f->header->n_entries,
946 offset);
947 if (r < 0)
948 return r;
cec736d2 949
aaf53376 950 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
cec736d2 951
de190aef 952 if (f->header->head_entry_realtime == 0)
0ac38b70 953 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 954
0ac38b70 955 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
956 f->header->tail_entry_monotonic = o->entry.monotonic;
957
958 f->tail_entry_monotonic_valid = true;
cec736d2
LP
959
960 /* Link up the items */
961 n = journal_file_entry_n_items(o);
962 for (i = 0; i < n; i++) {
963 r = journal_file_link_entry_item(f, o, offset, i);
964 if (r < 0)
965 return r;
966 }
967
cec736d2
LP
968 return 0;
969}
970
971static int journal_file_append_entry_internal(
972 JournalFile *f,
973 const dual_timestamp *ts,
974 uint64_t xor_hash,
975 const EntryItem items[], unsigned n_items,
de190aef 976 uint64_t *seqnum,
cec736d2
LP
977 Object **ret, uint64_t *offset) {
978 uint64_t np;
979 uint64_t osize;
980 Object *o;
981 int r;
982
983 assert(f);
984 assert(items || n_items == 0);
de190aef 985 assert(ts);
cec736d2
LP
986
987 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
988
de190aef 989 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
990 if (r < 0)
991 return r;
992
d98cc1f2 993 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
cec736d2 994 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
995 o->entry.realtime = htole64(ts->realtime);
996 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
997 o->entry.xor_hash = htole64(xor_hash);
998 o->entry.boot_id = f->header->boot_id;
999
b0af6f41
LP
1000 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
1001 if (r < 0)
1002 return r;
1003
cec736d2
LP
1004 r = journal_file_link_entry(f, o, np);
1005 if (r < 0)
1006 return r;
1007
1008 if (ret)
1009 *ret = o;
1010
1011 if (offset)
1012 *offset = np;
1013
1014 return 0;
1015}
1016
cf244689 1017void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
1018 assert(f);
1019
1020 /* inotify() does not receive IN_MODIFY events from file
1021 * accesses done via mmap(). After each access we hence
1022 * trigger IN_MODIFY by truncating the journal file to its
1023 * current size which triggers IN_MODIFY. */
1024
bc85bfee
LP
1025 __sync_synchronize();
1026
50f20cfd
LP
1027 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1028 log_error("Failed to to truncate file to its own size: %m");
1029}
1030
de190aef 1031int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1032 unsigned i;
1033 EntryItem *items;
1034 int r;
1035 uint64_t xor_hash = 0;
de190aef 1036 struct dual_timestamp _ts;
cec736d2
LP
1037
1038 assert(f);
1039 assert(iovec || n_iovec == 0);
1040
de190aef
LP
1041 if (!f->writable)
1042 return -EPERM;
1043
1044 if (!ts) {
1045 dual_timestamp_get(&_ts);
1046 ts = &_ts;
1047 }
1048
1049 if (f->tail_entry_monotonic_valid &&
1050 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1051 return -EINVAL;
1052
7560fffc
LP
1053 r = journal_file_maybe_append_tag(f, ts->realtime);
1054 if (r < 0)
1055 return r;
1056
64825d3c
LP
1057 /* alloca() can't take 0, hence let's allocate at least one */
1058 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
cec736d2
LP
1059
1060 for (i = 0; i < n_iovec; i++) {
1061 uint64_t p;
1062 Object *o;
1063
1064 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1065 if (r < 0)
cf244689 1066 return r;
cec736d2
LP
1067
1068 xor_hash ^= le64toh(o->data.hash);
1069 items[i].object_offset = htole64(p);
de7b95cd 1070 items[i].hash = o->data.hash;
cec736d2
LP
1071 }
1072
de190aef 1073 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1074
50f20cfd
LP
1075 journal_file_post_change(f);
1076
cec736d2
LP
1077 return r;
1078}
1079
de190aef
LP
1080static int generic_array_get(JournalFile *f,
1081 uint64_t first,
1082 uint64_t i,
1083 Object **ret, uint64_t *offset) {
1084
cec736d2 1085 Object *o;
6c8a39b8 1086 uint64_t p = 0, a;
cec736d2
LP
1087 int r;
1088
1089 assert(f);
1090
de190aef
LP
1091 a = first;
1092 while (a > 0) {
1093 uint64_t n;
cec736d2 1094
de190aef
LP
1095 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1096 if (r < 0)
1097 return r;
cec736d2 1098
de190aef
LP
1099 n = journal_file_entry_array_n_items(o);
1100 if (i < n) {
1101 p = le64toh(o->entry_array.items[i]);
1102 break;
cec736d2
LP
1103 }
1104
de190aef
LP
1105 i -= n;
1106 a = le64toh(o->entry_array.next_entry_array_offset);
1107 }
1108
1109 if (a <= 0 || p <= 0)
1110 return 0;
1111
1112 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1113 if (r < 0)
1114 return r;
1115
1116 if (ret)
1117 *ret = o;
1118
1119 if (offset)
1120 *offset = p;
1121
1122 return 1;
1123}
1124
1125static int generic_array_get_plus_one(JournalFile *f,
1126 uint64_t extra,
1127 uint64_t first,
1128 uint64_t i,
1129 Object **ret, uint64_t *offset) {
1130
1131 Object *o;
1132
1133 assert(f);
1134
1135 if (i == 0) {
1136 int r;
1137
1138 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1139 if (r < 0)
1140 return r;
1141
de190aef
LP
1142 if (ret)
1143 *ret = o;
cec736d2 1144
de190aef
LP
1145 if (offset)
1146 *offset = extra;
cec736d2 1147
de190aef 1148 return 1;
cec736d2
LP
1149 }
1150
de190aef
LP
1151 return generic_array_get(f, first, i-1, ret, offset);
1152}
cec736d2 1153
de190aef
LP
1154enum {
1155 TEST_FOUND,
1156 TEST_LEFT,
1157 TEST_RIGHT
1158};
cec736d2 1159
de190aef
LP
1160static int generic_array_bisect(JournalFile *f,
1161 uint64_t first,
1162 uint64_t n,
1163 uint64_t needle,
1164 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1165 direction_t direction,
1166 Object **ret,
1167 uint64_t *offset,
1168 uint64_t *idx) {
1169
1170 uint64_t a, p, t = 0, i = 0, last_p = 0;
1171 bool subtract_one = false;
1172 Object *o, *array = NULL;
1173 int r;
cec736d2 1174
de190aef
LP
1175 assert(f);
1176 assert(test_object);
cec736d2 1177
de190aef
LP
1178 a = first;
1179 while (a > 0) {
1180 uint64_t left, right, k, lp;
1181
1182 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1183 if (r < 0)
1184 return r;
1185
de190aef
LP
1186 k = journal_file_entry_array_n_items(array);
1187 right = MIN(k, n);
1188 if (right <= 0)
1189 return 0;
cec736d2 1190
de190aef
LP
1191 i = right - 1;
1192 lp = p = le64toh(array->entry_array.items[i]);
1193 if (p <= 0)
1194 return -EBADMSG;
cec736d2 1195
de190aef
LP
1196 r = test_object(f, p, needle);
1197 if (r < 0)
1198 return r;
cec736d2 1199
de190aef
LP
1200 if (r == TEST_FOUND)
1201 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1202
1203 if (r == TEST_RIGHT) {
1204 left = 0;
1205 right -= 1;
1206 for (;;) {
1207 if (left == right) {
1208 if (direction == DIRECTION_UP)
1209 subtract_one = true;
1210
1211 i = left;
1212 goto found;
1213 }
1214
1215 assert(left < right);
1216
1217 i = (left + right) / 2;
1218 p = le64toh(array->entry_array.items[i]);
1219 if (p <= 0)
1220 return -EBADMSG;
1221
1222 r = test_object(f, p, needle);
1223 if (r < 0)
1224 return r;
cec736d2 1225
de190aef
LP
1226 if (r == TEST_FOUND)
1227 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1228
1229 if (r == TEST_RIGHT)
1230 right = i;
1231 else
1232 left = i + 1;
1233 }
1234 }
1235
cbdca852
LP
1236 if (k > n) {
1237 if (direction == DIRECTION_UP) {
1238 i = n;
1239 subtract_one = true;
1240 goto found;
1241 }
1242
cec736d2 1243 return 0;
cbdca852 1244 }
cec736d2 1245
de190aef
LP
1246 last_p = lp;
1247
1248 n -= k;
1249 t += k;
1250 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1251 }
1252
1253 return 0;
de190aef
LP
1254
1255found:
1256 if (subtract_one && t == 0 && i == 0)
1257 return 0;
1258
1259 if (subtract_one && i == 0)
1260 p = last_p;
1261 else if (subtract_one)
1262 p = le64toh(array->entry_array.items[i-1]);
1263 else
1264 p = le64toh(array->entry_array.items[i]);
1265
1266 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1267 if (r < 0)
1268 return r;
1269
1270 if (ret)
1271 *ret = o;
1272
1273 if (offset)
1274 *offset = p;
1275
1276 if (idx)
cbdca852 1277 *idx = t + i + (subtract_one ? -1 : 0);
de190aef
LP
1278
1279 return 1;
cec736d2
LP
1280}
1281
de190aef
LP
1282static int generic_array_bisect_plus_one(JournalFile *f,
1283 uint64_t extra,
1284 uint64_t first,
1285 uint64_t n,
1286 uint64_t needle,
1287 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1288 direction_t direction,
1289 Object **ret,
1290 uint64_t *offset,
1291 uint64_t *idx) {
1292
cec736d2 1293 int r;
cbdca852
LP
1294 bool step_back = false;
1295 Object *o;
cec736d2
LP
1296
1297 assert(f);
de190aef 1298 assert(test_object);
cec736d2 1299
de190aef
LP
1300 if (n <= 0)
1301 return 0;
cec736d2 1302
de190aef
LP
1303 /* This bisects the array in object 'first', but first checks
1304 * an extra */
de190aef
LP
1305 r = test_object(f, extra, needle);
1306 if (r < 0)
1307 return r;
a536e261
LP
1308
1309 if (r == TEST_FOUND)
1310 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1311
cbdca852
LP
1312 /* if we are looking with DIRECTION_UP then we need to first
1313 see if in the actual array there is a matching entry, and
1314 return the last one of that. But if there isn't any we need
1315 to return this one. Hence remember this, and return it
1316 below. */
1317 if (r == TEST_LEFT)
1318 step_back = direction == DIRECTION_UP;
de190aef 1319
cbdca852
LP
1320 if (r == TEST_RIGHT) {
1321 if (direction == DIRECTION_DOWN)
1322 goto found;
1323 else
1324 return 0;
a536e261 1325 }
cec736d2 1326
de190aef
LP
1327 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1328
cbdca852
LP
1329 if (r == 0 && step_back)
1330 goto found;
1331
ecf68b1d 1332 if (r > 0 && idx)
de190aef
LP
1333 (*idx) ++;
1334
1335 return r;
cbdca852
LP
1336
1337found:
1338 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1339 if (r < 0)
1340 return r;
1341
1342 if (ret)
1343 *ret = o;
1344
1345 if (offset)
1346 *offset = extra;
1347
1348 if (idx)
1349 *idx = 0;
1350
1351 return 1;
1352}
1353
1354static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1355 assert(f);
1356 assert(p > 0);
1357
1358 if (p == needle)
1359 return TEST_FOUND;
1360 else if (p < needle)
1361 return TEST_LEFT;
1362 else
1363 return TEST_RIGHT;
1364}
1365
1366int journal_file_move_to_entry_by_offset(
1367 JournalFile *f,
1368 uint64_t p,
1369 direction_t direction,
1370 Object **ret,
1371 uint64_t *offset) {
1372
1373 return generic_array_bisect(f,
1374 le64toh(f->header->entry_array_offset),
1375 le64toh(f->header->n_entries),
1376 p,
1377 test_object_offset,
1378 direction,
1379 ret, offset, NULL);
de190aef
LP
1380}
1381
cbdca852 1382
de190aef
LP
1383static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1384 Object *o;
1385 int r;
1386
1387 assert(f);
1388 assert(p > 0);
1389
1390 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1391 if (r < 0)
1392 return r;
1393
de190aef
LP
1394 if (le64toh(o->entry.seqnum) == needle)
1395 return TEST_FOUND;
1396 else if (le64toh(o->entry.seqnum) < needle)
1397 return TEST_LEFT;
1398 else
1399 return TEST_RIGHT;
1400}
cec736d2 1401
de190aef
LP
1402int journal_file_move_to_entry_by_seqnum(
1403 JournalFile *f,
1404 uint64_t seqnum,
1405 direction_t direction,
1406 Object **ret,
1407 uint64_t *offset) {
1408
1409 return generic_array_bisect(f,
1410 le64toh(f->header->entry_array_offset),
1411 le64toh(f->header->n_entries),
1412 seqnum,
1413 test_object_seqnum,
1414 direction,
1415 ret, offset, NULL);
1416}
cec736d2 1417
de190aef
LP
1418static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1419 Object *o;
1420 int r;
1421
1422 assert(f);
1423 assert(p > 0);
1424
1425 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1426 if (r < 0)
1427 return r;
1428
1429 if (le64toh(o->entry.realtime) == needle)
1430 return TEST_FOUND;
1431 else if (le64toh(o->entry.realtime) < needle)
1432 return TEST_LEFT;
1433 else
1434 return TEST_RIGHT;
cec736d2
LP
1435}
1436
de190aef
LP
1437int journal_file_move_to_entry_by_realtime(
1438 JournalFile *f,
1439 uint64_t realtime,
1440 direction_t direction,
1441 Object **ret,
1442 uint64_t *offset) {
1443
1444 return generic_array_bisect(f,
1445 le64toh(f->header->entry_array_offset),
1446 le64toh(f->header->n_entries),
1447 realtime,
1448 test_object_realtime,
1449 direction,
1450 ret, offset, NULL);
1451}
1452
1453static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1454 Object *o;
1455 int r;
1456
1457 assert(f);
1458 assert(p > 0);
1459
1460 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1461 if (r < 0)
1462 return r;
1463
1464 if (le64toh(o->entry.monotonic) == needle)
1465 return TEST_FOUND;
1466 else if (le64toh(o->entry.monotonic) < needle)
1467 return TEST_LEFT;
1468 else
1469 return TEST_RIGHT;
1470}
1471
1472int journal_file_move_to_entry_by_monotonic(
1473 JournalFile *f,
1474 sd_id128_t boot_id,
1475 uint64_t monotonic,
1476 direction_t direction,
1477 Object **ret,
1478 uint64_t *offset) {
1479
10b6f904 1480 char t[9+32+1] = "_BOOT_ID=";
de190aef
LP
1481 Object *o;
1482 int r;
1483
cbdca852 1484 assert(f);
de190aef 1485
cbdca852 1486 sd_id128_to_string(boot_id, t + 9);
de190aef
LP
1487 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1488 if (r < 0)
1489 return r;
cbdca852 1490 if (r == 0)
de190aef
LP
1491 return -ENOENT;
1492
1493 return generic_array_bisect_plus_one(f,
1494 le64toh(o->data.entry_offset),
1495 le64toh(o->data.entry_array_offset),
1496 le64toh(o->data.n_entries),
1497 monotonic,
1498 test_object_monotonic,
1499 direction,
1500 ret, offset, NULL);
1501}
1502
de190aef
LP
1503int journal_file_next_entry(
1504 JournalFile *f,
1505 Object *o, uint64_t p,
1506 direction_t direction,
1507 Object **ret, uint64_t *offset) {
1508
1509 uint64_t i, n;
cec736d2
LP
1510 int r;
1511
1512 assert(f);
de190aef
LP
1513 assert(p > 0 || !o);
1514
1515 n = le64toh(f->header->n_entries);
1516 if (n <= 0)
1517 return 0;
cec736d2
LP
1518
1519 if (!o)
de190aef 1520 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1521 else {
de190aef 1522 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1523 return -EINVAL;
1524
de190aef
LP
1525 r = generic_array_bisect(f,
1526 le64toh(f->header->entry_array_offset),
1527 le64toh(f->header->n_entries),
1528 p,
1529 test_object_offset,
1530 DIRECTION_DOWN,
1531 NULL, NULL,
1532 &i);
1533 if (r <= 0)
1534 return r;
1535
1536 if (direction == DIRECTION_DOWN) {
1537 if (i >= n - 1)
1538 return 0;
1539
1540 i++;
1541 } else {
1542 if (i <= 0)
1543 return 0;
1544
1545 i--;
1546 }
cec736d2
LP
1547 }
1548
de190aef
LP
1549 /* And jump to it */
1550 return generic_array_get(f,
1551 le64toh(f->header->entry_array_offset),
1552 i,
1553 ret, offset);
1554}
cec736d2 1555
de190aef
LP
1556int journal_file_skip_entry(
1557 JournalFile *f,
1558 Object *o, uint64_t p,
1559 int64_t skip,
1560 Object **ret, uint64_t *offset) {
1561
1562 uint64_t i, n;
1563 int r;
1564
1565 assert(f);
1566 assert(o);
1567 assert(p > 0);
1568
1569 if (o->object.type != OBJECT_ENTRY)
1570 return -EINVAL;
1571
1572 r = generic_array_bisect(f,
1573 le64toh(f->header->entry_array_offset),
1574 le64toh(f->header->n_entries),
1575 p,
1576 test_object_offset,
1577 DIRECTION_DOWN,
1578 NULL, NULL,
1579 &i);
1580 if (r <= 0)
cec736d2
LP
1581 return r;
1582
de190aef
LP
1583 /* Calculate new index */
1584 if (skip < 0) {
1585 if ((uint64_t) -skip >= i)
1586 i = 0;
1587 else
1588 i = i - (uint64_t) -skip;
1589 } else
1590 i += (uint64_t) skip;
cec736d2 1591
de190aef
LP
1592 n = le64toh(f->header->n_entries);
1593 if (n <= 0)
1594 return -EBADMSG;
cec736d2 1595
de190aef
LP
1596 if (i >= n)
1597 i = n-1;
1598
1599 return generic_array_get(f,
1600 le64toh(f->header->entry_array_offset),
1601 i,
1602 ret, offset);
cec736d2
LP
1603}
1604
de190aef
LP
1605int journal_file_next_entry_for_data(
1606 JournalFile *f,
1607 Object *o, uint64_t p,
1608 uint64_t data_offset,
1609 direction_t direction,
1610 Object **ret, uint64_t *offset) {
1611
1612 uint64_t n, i;
cec736d2 1613 int r;
de190aef 1614 Object *d;
cec736d2
LP
1615
1616 assert(f);
de190aef 1617 assert(p > 0 || !o);
cec736d2 1618
de190aef 1619 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 1620 if (r < 0)
de190aef 1621 return r;
cec736d2 1622
de190aef
LP
1623 n = le64toh(d->data.n_entries);
1624 if (n <= 0)
1625 return n;
cec736d2 1626
de190aef
LP
1627 if (!o)
1628 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1629 else {
1630 if (o->object.type != OBJECT_ENTRY)
1631 return -EINVAL;
cec736d2 1632
de190aef
LP
1633 r = generic_array_bisect_plus_one(f,
1634 le64toh(d->data.entry_offset),
1635 le64toh(d->data.entry_array_offset),
1636 le64toh(d->data.n_entries),
1637 p,
1638 test_object_offset,
1639 DIRECTION_DOWN,
1640 NULL, NULL,
1641 &i);
1642
1643 if (r <= 0)
cec736d2
LP
1644 return r;
1645
de190aef
LP
1646 if (direction == DIRECTION_DOWN) {
1647 if (i >= n - 1)
1648 return 0;
cec736d2 1649
de190aef
LP
1650 i++;
1651 } else {
1652 if (i <= 0)
1653 return 0;
cec736d2 1654
de190aef
LP
1655 i--;
1656 }
cec736d2 1657
de190aef 1658 }
cec736d2 1659
de190aef
LP
1660 return generic_array_get_plus_one(f,
1661 le64toh(d->data.entry_offset),
1662 le64toh(d->data.entry_array_offset),
1663 i,
1664 ret, offset);
1665}
cec736d2 1666
cbdca852
LP
1667int journal_file_move_to_entry_by_offset_for_data(
1668 JournalFile *f,
1669 uint64_t data_offset,
1670 uint64_t p,
1671 direction_t direction,
1672 Object **ret, uint64_t *offset) {
1673
1674 int r;
1675 Object *d;
1676
1677 assert(f);
1678
1679 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1680 if (r < 0)
1681 return r;
1682
1683 return generic_array_bisect_plus_one(f,
1684 le64toh(d->data.entry_offset),
1685 le64toh(d->data.entry_array_offset),
1686 le64toh(d->data.n_entries),
1687 p,
1688 test_object_offset,
1689 direction,
1690 ret, offset, NULL);
1691}
1692
1693int journal_file_move_to_entry_by_monotonic_for_data(
1694 JournalFile *f,
1695 uint64_t data_offset,
1696 sd_id128_t boot_id,
1697 uint64_t monotonic,
1698 direction_t direction,
1699 Object **ret, uint64_t *offset) {
1700
1701 char t[9+32+1] = "_BOOT_ID=";
1702 Object *o, *d;
1703 int r;
1704 uint64_t b, z;
1705
1706 assert(f);
1707
1708 /* First, seek by time */
1709 sd_id128_to_string(boot_id, t + 9);
1710 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1711 if (r < 0)
1712 return r;
1713 if (r == 0)
1714 return -ENOENT;
1715
1716 r = generic_array_bisect_plus_one(f,
1717 le64toh(o->data.entry_offset),
1718 le64toh(o->data.entry_array_offset),
1719 le64toh(o->data.n_entries),
1720 monotonic,
1721 test_object_monotonic,
1722 direction,
1723 NULL, &z, NULL);
1724 if (r <= 0)
1725 return r;
1726
1727 /* And now, continue seeking until we find an entry that
1728 * exists in both bisection arrays */
1729
1730 for (;;) {
1731 Object *qo;
1732 uint64_t p, q;
1733
1734 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1735 if (r < 0)
1736 return r;
1737
1738 r = generic_array_bisect_plus_one(f,
1739 le64toh(d->data.entry_offset),
1740 le64toh(d->data.entry_array_offset),
1741 le64toh(d->data.n_entries),
1742 z,
1743 test_object_offset,
1744 direction,
1745 NULL, &p, NULL);
1746 if (r <= 0)
1747 return r;
1748
1749 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1750 if (r < 0)
1751 return r;
1752
1753 r = generic_array_bisect_plus_one(f,
1754 le64toh(o->data.entry_offset),
1755 le64toh(o->data.entry_array_offset),
1756 le64toh(o->data.n_entries),
1757 p,
1758 test_object_offset,
1759 direction,
1760 &qo, &q, NULL);
1761
1762 if (r <= 0)
1763 return r;
1764
1765 if (p == q) {
1766 if (ret)
1767 *ret = qo;
1768 if (offset)
1769 *offset = q;
1770
1771 return 1;
1772 }
1773
1774 z = q;
1775 }
1776
1777 return 0;
1778}
1779
de190aef
LP
1780int journal_file_move_to_entry_by_seqnum_for_data(
1781 JournalFile *f,
1782 uint64_t data_offset,
1783 uint64_t seqnum,
1784 direction_t direction,
1785 Object **ret, uint64_t *offset) {
cec736d2 1786
de190aef
LP
1787 Object *d;
1788 int r;
cec736d2 1789
91a31dde
LP
1790 assert(f);
1791
de190aef 1792 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1793 if (r < 0)
de190aef 1794 return r;
cec736d2 1795
de190aef
LP
1796 return generic_array_bisect_plus_one(f,
1797 le64toh(d->data.entry_offset),
1798 le64toh(d->data.entry_array_offset),
1799 le64toh(d->data.n_entries),
1800 seqnum,
1801 test_object_seqnum,
1802 direction,
1803 ret, offset, NULL);
1804}
cec736d2 1805
de190aef
LP
1806int journal_file_move_to_entry_by_realtime_for_data(
1807 JournalFile *f,
1808 uint64_t data_offset,
1809 uint64_t realtime,
1810 direction_t direction,
1811 Object **ret, uint64_t *offset) {
1812
1813 Object *d;
1814 int r;
1815
91a31dde
LP
1816 assert(f);
1817
de190aef 1818 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1819 if (r < 0)
de190aef
LP
1820 return r;
1821
1822 return generic_array_bisect_plus_one(f,
1823 le64toh(d->data.entry_offset),
1824 le64toh(d->data.entry_array_offset),
1825 le64toh(d->data.n_entries),
1826 realtime,
1827 test_object_realtime,
1828 direction,
1829 ret, offset, NULL);
cec736d2
LP
1830}
1831
0284adc6 1832void journal_file_dump(JournalFile *f) {
7560fffc 1833 Object *o;
7560fffc 1834 int r;
0284adc6 1835 uint64_t p;
7560fffc
LP
1836
1837 assert(f);
1838
0284adc6 1839 journal_file_print_header(f);
7560fffc 1840
0284adc6
LP
1841 p = le64toh(f->header->header_size);
1842 while (p != 0) {
1843 r = journal_file_move_to_object(f, -1, p, &o);
1844 if (r < 0)
1845 goto fail;
7560fffc 1846
0284adc6 1847 switch (o->object.type) {
d98cc1f2 1848
0284adc6
LP
1849 case OBJECT_UNUSED:
1850 printf("Type: OBJECT_UNUSED\n");
1851 break;
d98cc1f2 1852
0284adc6
LP
1853 case OBJECT_DATA:
1854 printf("Type: OBJECT_DATA\n");
1855 break;
7560fffc 1856
0284adc6 1857 case OBJECT_ENTRY:
f7fab8a5 1858 printf("Type: OBJECT_ENTRY seqnum=%llu monotonic=%llu realtime=%llu\n",
0284adc6
LP
1859 (unsigned long long) le64toh(o->entry.seqnum),
1860 (unsigned long long) le64toh(o->entry.monotonic),
1861 (unsigned long long) le64toh(o->entry.realtime));
1862 break;
7560fffc 1863
0284adc6
LP
1864 case OBJECT_FIELD_HASH_TABLE:
1865 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1866 break;
7560fffc 1867
0284adc6
LP
1868 case OBJECT_DATA_HASH_TABLE:
1869 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1870 break;
7560fffc 1871
0284adc6
LP
1872 case OBJECT_ENTRY_ARRAY:
1873 printf("Type: OBJECT_ENTRY_ARRAY\n");
1874 break;
7560fffc 1875
0284adc6 1876 case OBJECT_TAG:
f7fab8a5
LP
1877 printf("Type: OBJECT_TAG seqnum=%llu epoch=%llu\n",
1878 (unsigned long long) le64toh(o->tag.seqnum),
1879 (unsigned long long) le64toh(o->tag.epoch));
0284adc6
LP
1880 break;
1881 }
7560fffc 1882
0284adc6
LP
1883 if (o->object.flags & OBJECT_COMPRESSED)
1884 printf("Flags: COMPRESSED\n");
7560fffc 1885
0284adc6
LP
1886 if (p == le64toh(f->header->tail_object_offset))
1887 p = 0;
1888 else
1889 p = p + ALIGN64(le64toh(o->object.size));
1890 }
7560fffc 1891
0284adc6
LP
1892 return;
1893fail:
1894 log_error("File corrupt");
7560fffc
LP
1895}
1896
0284adc6
LP
1897void journal_file_print_header(JournalFile *f) {
1898 char a[33], b[33], c[33];
1899 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
7560fffc
LP
1900
1901 assert(f);
7560fffc 1902
0284adc6
LP
1903 printf("File Path: %s\n"
1904 "File ID: %s\n"
1905 "Machine ID: %s\n"
1906 "Boot ID: %s\n"
1907 "Sequential Number ID: %s\n"
1908 "State: %s\n"
1909 "Compatible Flags:%s%s\n"
1910 "Incompatible Flags:%s%s\n"
1911 "Header size: %llu\n"
1912 "Arena size: %llu\n"
1913 "Data Hash Table Size: %llu\n"
1914 "Field Hash Table Size: %llu\n"
0284adc6
LP
1915 "Rotate Suggested: %s\n"
1916 "Head Sequential Number: %llu\n"
1917 "Tail Sequential Number: %llu\n"
1918 "Head Realtime Timestamp: %s\n"
3223f44f
LP
1919 "Tail Realtime Timestamp: %s\n"
1920 "Objects: %llu\n"
1921 "Entry Objects: %llu\n",
0284adc6
LP
1922 f->path,
1923 sd_id128_to_string(f->header->file_id, a),
1924 sd_id128_to_string(f->header->machine_id, b),
1925 sd_id128_to_string(f->header->boot_id, c),
1926 sd_id128_to_string(f->header->seqnum_id, c),
3223f44f
LP
1927 f->header->state == STATE_OFFLINE ? "OFFLINE" :
1928 f->header->state == STATE_ONLINE ? "ONLINE" :
1929 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
baed47c3
LP
1930 (f->header->compatible_flags & HEADER_COMPATIBLE_SEALED) ? " SEALED" : "",
1931 (f->header->compatible_flags & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
0284adc6
LP
1932 (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
1933 (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1934 (unsigned long long) le64toh(f->header->header_size),
1935 (unsigned long long) le64toh(f->header->arena_size),
1936 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1937 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
0284adc6
LP
1938 yes_no(journal_file_rotate_suggested(f)),
1939 (unsigned long long) le64toh(f->header->head_entry_seqnum),
1940 (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1941 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
3223f44f
LP
1942 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1943 (unsigned long long) le64toh(f->header->n_objects),
1944 (unsigned long long) le64toh(f->header->n_entries));
7560fffc 1945
0284adc6
LP
1946 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1947 printf("Data Objects: %llu\n"
1948 "Data Hash Table Fill: %.1f%%\n",
1949 (unsigned long long) le64toh(f->header->n_data),
1950 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
7560fffc 1951
0284adc6
LP
1952 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1953 printf("Field Objects: %llu\n"
1954 "Field Hash Table Fill: %.1f%%\n",
1955 (unsigned long long) le64toh(f->header->n_fields),
1956 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
3223f44f
LP
1957
1958 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
1959 printf("Tag Objects: %llu\n",
1960 (unsigned long long) le64toh(f->header->n_tags));
1961 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1962 printf("Entry Array Objects: %llu\n",
1963 (unsigned long long) le64toh(f->header->n_entry_arrays));
7560fffc
LP
1964}
1965
0284adc6
LP
1966int journal_file_open(
1967 const char *fname,
1968 int flags,
1969 mode_t mode,
1970 bool compress,
baed47c3 1971 bool seal,
0284adc6
LP
1972 JournalMetrics *metrics,
1973 MMapCache *mmap_cache,
1974 JournalFile *template,
1975 JournalFile **ret) {
7560fffc 1976
0284adc6
LP
1977 JournalFile *f;
1978 int r;
1979 bool newly_created = false;
7560fffc 1980
0284adc6 1981 assert(fname);
7560fffc 1982
0284adc6
LP
1983 if ((flags & O_ACCMODE) != O_RDONLY &&
1984 (flags & O_ACCMODE) != O_RDWR)
1985 return -EINVAL;
7560fffc 1986
a0108012
LP
1987 if (!endswith(fname, ".journal") &&
1988 !endswith(fname, ".journal~"))
0284adc6 1989 return -EINVAL;
7560fffc 1990
0284adc6
LP
1991 f = new0(JournalFile, 1);
1992 if (!f)
1993 return -ENOMEM;
7560fffc 1994
0284adc6
LP
1995 f->fd = -1;
1996 f->mode = mode;
7560fffc 1997
0284adc6
LP
1998 f->flags = flags;
1999 f->prot = prot_from_flags(flags);
2000 f->writable = (flags & O_ACCMODE) != O_RDONLY;
2001 f->compress = compress;
baed47c3 2002 f->seal = seal;
7560fffc 2003
0284adc6
LP
2004 if (mmap_cache)
2005 f->mmap = mmap_cache_ref(mmap_cache);
2006 else {
84168d80 2007 f->mmap = mmap_cache_new();
0284adc6
LP
2008 if (!f->mmap) {
2009 r = -ENOMEM;
2010 goto fail;
2011 }
2012 }
7560fffc 2013
0284adc6
LP
2014 f->path = strdup(fname);
2015 if (!f->path) {
2016 r = -ENOMEM;
2017 goto fail;
2018 }
7560fffc 2019
0284adc6
LP
2020 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2021 if (f->fd < 0) {
2022 r = -errno;
2023 goto fail;
7560fffc 2024 }
7560fffc 2025
0284adc6
LP
2026 if (fstat(f->fd, &f->last_stat) < 0) {
2027 r = -errno;
2028 goto fail;
2029 }
7560fffc 2030
0284adc6
LP
2031 if (f->last_stat.st_size == 0 && f->writable) {
2032 newly_created = true;
7560fffc 2033
0284adc6 2034 /* Try to load the FSPRG state, and if we can't, then
baed47c3
LP
2035 * just don't do sealing */
2036 r = journal_file_fss_load(f);
0284adc6 2037 if (r < 0)
baed47c3 2038 f->seal = false;
7560fffc 2039
0284adc6
LP
2040 r = journal_file_init_header(f, template);
2041 if (r < 0)
2042 goto fail;
7560fffc 2043
0284adc6
LP
2044 if (fstat(f->fd, &f->last_stat) < 0) {
2045 r = -errno;
2046 goto fail;
2047 }
2048 }
7560fffc 2049
0284adc6
LP
2050 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2051 r = -EIO;
2052 goto fail;
2053 }
7560fffc 2054
0284adc6
LP
2055 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2056 if (f->header == MAP_FAILED) {
2057 f->header = NULL;
2058 r = -errno;
2059 goto fail;
2060 }
7560fffc 2061
0284adc6
LP
2062 if (!newly_created) {
2063 r = journal_file_verify_header(f);
2064 if (r < 0)
2065 goto fail;
2066 }
7560fffc 2067
0284adc6 2068 if (!newly_created && f->writable) {
baed47c3 2069 r = journal_file_fss_load(f);
0284adc6
LP
2070 if (r < 0)
2071 goto fail;
2072 }
cec736d2
LP
2073
2074 if (f->writable) {
4a92baf3
LP
2075 if (metrics) {
2076 journal_default_metrics(metrics, f->fd);
2077 f->metrics = *metrics;
2078 } else if (template)
2079 f->metrics = template->metrics;
2080
cec736d2
LP
2081 r = journal_file_refresh_header(f);
2082 if (r < 0)
2083 goto fail;
2084 }
2085
baed47c3 2086 r = journal_file_hmac_setup(f);
14d10188
LP
2087 if (r < 0)
2088 goto fail;
2089
cec736d2 2090 if (newly_created) {
de190aef 2091 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
2092 if (r < 0)
2093 goto fail;
2094
de190aef 2095 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
2096 if (r < 0)
2097 goto fail;
7560fffc
LP
2098
2099 r = journal_file_append_first_tag(f);
2100 if (r < 0)
2101 goto fail;
cec736d2
LP
2102 }
2103
de190aef 2104 r = journal_file_map_field_hash_table(f);
cec736d2
LP
2105 if (r < 0)
2106 goto fail;
2107
de190aef 2108 r = journal_file_map_data_hash_table(f);
cec736d2
LP
2109 if (r < 0)
2110 goto fail;
2111
2112 if (ret)
2113 *ret = f;
2114
2115 return 0;
2116
2117fail:
2118 journal_file_close(f);
2119
2120 return r;
2121}
0ac38b70 2122
baed47c3 2123int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
0ac38b70
LP
2124 char *p;
2125 size_t l;
2126 JournalFile *old_file, *new_file = NULL;
2127 int r;
2128
2129 assert(f);
2130 assert(*f);
2131
2132 old_file = *f;
2133
2134 if (!old_file->writable)
2135 return -EINVAL;
2136
2137 if (!endswith(old_file->path, ".journal"))
2138 return -EINVAL;
2139
2140 l = strlen(old_file->path);
2141
9447a7f1 2142 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
0ac38b70
LP
2143 if (!p)
2144 return -ENOMEM;
2145
2146 memcpy(p, old_file->path, l - 8);
2147 p[l-8] = '@';
2148 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2149 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2150 "-%016llx-%016llx.journal",
beec0085 2151 (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
0ac38b70
LP
2152 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2153
2154 r = rename(old_file->path, p);
2155 free(p);
2156
2157 if (r < 0)
2158 return -errno;
2159
ccdbaf91 2160 old_file->header->state = STATE_ARCHIVED;
0ac38b70 2161
baed47c3 2162 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
0ac38b70
LP
2163 journal_file_close(old_file);
2164
2165 *f = new_file;
2166 return r;
2167}
2168
9447a7f1
LP
2169int journal_file_open_reliably(
2170 const char *fname,
2171 int flags,
2172 mode_t mode,
7560fffc 2173 bool compress,
baed47c3 2174 bool seal,
4a92baf3 2175 JournalMetrics *metrics,
27370278 2176 MMapCache *mmap_cache,
9447a7f1
LP
2177 JournalFile *template,
2178 JournalFile **ret) {
2179
2180 int r;
2181 size_t l;
2182 char *p;
2183
baed47c3 2184 r = journal_file_open(fname, flags, mode, compress, seal,
27370278 2185 metrics, mmap_cache, template, ret);
0071d9f1
LP
2186 if (r != -EBADMSG && /* corrupted */
2187 r != -ENODATA && /* truncated */
2188 r != -EHOSTDOWN && /* other machine */
a1a1898f
LP
2189 r != -EPROTONOSUPPORT && /* incompatible feature */
2190 r != -EBUSY && /* unclean shutdown */
2191 r != -ESHUTDOWN /* already archived */)
9447a7f1
LP
2192 return r;
2193
2194 if ((flags & O_ACCMODE) == O_RDONLY)
2195 return r;
2196
2197 if (!(flags & O_CREAT))
2198 return r;
2199
7560fffc
LP
2200 if (!endswith(fname, ".journal"))
2201 return r;
2202
5c70eab4
LP
2203 /* The file is corrupted. Rotate it away and try it again (but only once) */
2204
9447a7f1
LP
2205 l = strlen(fname);
2206 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2207 (int) (l-8), fname,
2208 (unsigned long long) now(CLOCK_REALTIME),
2209 random_ull()) < 0)
2210 return -ENOMEM;
2211
2212 r = rename(fname, p);
2213 free(p);
2214 if (r < 0)
2215 return -errno;
2216
a1a1898f 2217 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
9447a7f1 2218
baed47c3 2219 return journal_file_open(fname, flags, mode, compress, seal,
27370278 2220 metrics, mmap_cache, template, ret);
9447a7f1
LP
2221}
2222
cf244689
LP
2223
2224int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2225 uint64_t i, n;
2226 uint64_t q, xor_hash = 0;
2227 int r;
2228 EntryItem *items;
2229 dual_timestamp ts;
2230
2231 assert(from);
2232 assert(to);
2233 assert(o);
2234 assert(p);
2235
2236 if (!to->writable)
2237 return -EPERM;
2238
2239 ts.monotonic = le64toh(o->entry.monotonic);
2240 ts.realtime = le64toh(o->entry.realtime);
2241
2242 if (to->tail_entry_monotonic_valid &&
2243 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2244 return -EINVAL;
2245
cf244689
LP
2246 n = journal_file_entry_n_items(o);
2247 items = alloca(sizeof(EntryItem) * n);
2248
2249 for (i = 0; i < n; i++) {
4fd052ae
FC
2250 uint64_t l, h;
2251 le64_t le_hash;
cf244689
LP
2252 size_t t;
2253 void *data;
2254 Object *u;
2255
2256 q = le64toh(o->entry.items[i].object_offset);
2257 le_hash = o->entry.items[i].hash;
2258
2259 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2260 if (r < 0)
2261 return r;
2262
2263 if (le_hash != o->data.hash)
2264 return -EBADMSG;
2265
2266 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2267 t = (size_t) l;
2268
2269 /* We hit the limit on 32bit machines */
2270 if ((uint64_t) t != l)
2271 return -E2BIG;
2272
2273 if (o->object.flags & OBJECT_COMPRESSED) {
2274#ifdef HAVE_XZ
2275 uint64_t rsize;
2276
2277 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2278 return -EBADMSG;
2279
2280 data = from->compress_buffer;
2281 l = rsize;
2282#else
2283 return -EPROTONOSUPPORT;
2284#endif
2285 } else
2286 data = o->data.payload;
2287
2288 r = journal_file_append_data(to, data, l, &u, &h);
2289 if (r < 0)
2290 return r;
2291
2292 xor_hash ^= le64toh(u->data.hash);
2293 items[i].object_offset = htole64(h);
2294 items[i].hash = u->data.hash;
2295
2296 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2297 if (r < 0)
2298 return r;
2299 }
2300
2301 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2302}
babfc091
LP
2303
2304void journal_default_metrics(JournalMetrics *m, int fd) {
2305 uint64_t fs_size = 0;
2306 struct statvfs ss;
a7bc2c2a 2307 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2308
2309 assert(m);
2310 assert(fd >= 0);
2311
2312 if (fstatvfs(fd, &ss) >= 0)
2313 fs_size = ss.f_frsize * ss.f_blocks;
2314
2315 if (m->max_use == (uint64_t) -1) {
2316
2317 if (fs_size > 0) {
2318 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2319
2320 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2321 m->max_use = DEFAULT_MAX_USE_UPPER;
2322
2323 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2324 m->max_use = DEFAULT_MAX_USE_LOWER;
2325 } else
2326 m->max_use = DEFAULT_MAX_USE_LOWER;
2327 } else {
2328 m->max_use = PAGE_ALIGN(m->max_use);
2329
2330 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2331 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2332 }
2333
2334 if (m->max_size == (uint64_t) -1) {
2335 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2336
2337 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2338 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2339 } else
2340 m->max_size = PAGE_ALIGN(m->max_size);
2341
2342 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2343 m->max_size = JOURNAL_FILE_SIZE_MIN;
2344
2345 if (m->max_size*2 > m->max_use)
2346 m->max_use = m->max_size*2;
2347
2348 if (m->min_size == (uint64_t) -1)
2349 m->min_size = JOURNAL_FILE_SIZE_MIN;
2350 else {
2351 m->min_size = PAGE_ALIGN(m->min_size);
2352
2353 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2354 m->min_size = JOURNAL_FILE_SIZE_MIN;
2355
2356 if (m->min_size > m->max_size)
2357 m->max_size = m->min_size;
2358 }
2359
2360 if (m->keep_free == (uint64_t) -1) {
2361
2362 if (fs_size > 0) {
2363 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2364
2365 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2366 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2367
2368 } else
2369 m->keep_free = DEFAULT_KEEP_FREE;
2370 }
2371
e7bf07b3
LP
2372 log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2373 format_bytes(a, sizeof(a), m->max_use),
2374 format_bytes(b, sizeof(b), m->max_size),
2375 format_bytes(c, sizeof(c), m->min_size),
2376 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2377}
08984293
LP
2378
2379int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
08984293
LP
2380 assert(f);
2381 assert(from || to);
2382
2383 if (from) {
162566a4
LP
2384 if (f->header->head_entry_realtime == 0)
2385 return -ENOENT;
08984293 2386
162566a4 2387 *from = le64toh(f->header->head_entry_realtime);
08984293
LP
2388 }
2389
2390 if (to) {
162566a4
LP
2391 if (f->header->tail_entry_realtime == 0)
2392 return -ENOENT;
08984293 2393
162566a4 2394 *to = le64toh(f->header->tail_entry_realtime);
08984293
LP
2395 }
2396
2397 return 1;
2398}
2399
2400int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2401 char t[9+32+1] = "_BOOT_ID=";
2402 Object *o;
2403 uint64_t p;
2404 int r;
2405
2406 assert(f);
2407 assert(from || to);
2408
2409 sd_id128_to_string(boot_id, t + 9);
2410
2411 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2412 if (r <= 0)
2413 return r;
2414
2415 if (le64toh(o->data.n_entries) <= 0)
2416 return 0;
2417
2418 if (from) {
2419 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2420 if (r < 0)
2421 return r;
2422
2423 *from = le64toh(o->entry.monotonic);
2424 }
2425
2426 if (to) {
2427 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2428 if (r < 0)
2429 return r;
2430
2431 r = generic_array_get_plus_one(f,
2432 le64toh(o->data.entry_offset),
2433 le64toh(o->data.entry_array_offset),
2434 le64toh(o->data.n_entries)-1,
2435 &o, NULL);
2436 if (r <= 0)
2437 return r;
2438
2439 *to = le64toh(o->entry.monotonic);
2440 }
2441
2442 return 1;
2443}
dca6219e
LP
2444
2445bool journal_file_rotate_suggested(JournalFile *f) {
2446 assert(f);
2447
2448 /* If we gained new header fields we gained new features,
2449 * hence suggest a rotation */
361f9cbc
LP
2450 if (le64toh(f->header->header_size) < sizeof(Header)) {
2451 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
dca6219e 2452 return true;
361f9cbc 2453 }
dca6219e
LP
2454
2455 /* Let's check if the hash tables grew over a certain fill
2456 * level (75%, borrowing this value from Java's hash table
2457 * implementation), and if so suggest a rotation. To calculate
2458 * the fill level we need the n_data field, which only exists
2459 * in newer versions. */
2460
2461 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
361f9cbc
LP
2462 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2463 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2464 f->path,
2465 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2466 (unsigned long long) le64toh(f->header->n_data),
2467 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2468 (unsigned long long) (f->last_stat.st_size),
2469 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
dca6219e 2470 return true;
361f9cbc 2471 }
dca6219e
LP
2472
2473 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
361f9cbc
LP
2474 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2475 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2476 f->path,
2477 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2478 (unsigned long long) le64toh(f->header->n_fields),
2479 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
dca6219e 2480 return true;
361f9cbc 2481 }
dca6219e
LP
2482
2483 return false;
2484}