]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journal-file.c
journal: use a macro to check for file header flags
[thirdparty/systemd.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "journal-authenticate.h"
33 #include "lookup3.h"
34 #include "compress.h"
35 #include "fsprg.h"
36
37 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
39
40 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
41
42 /* This is the minimum journal file size */
43 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
44
45 /* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
49
50 /* This is the upper bound if we deduce max_size from max_use */
51 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
52
53 /* This is the upper bound if we deduce the keep_free value from the
54 * file system size */
55 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57 /* This is the keep_free value when we can't determine the system
58 * size */
59 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
60
61 /* n_data was the first entry we added after the initial file format design */
62 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
63
64 void journal_file_close(JournalFile *f) {
65 assert(f);
66
67 /* Write the final tag */
68 if (f->seal && f->writable)
69 journal_file_append_tag(f);
70
71 /* Sync everything to disk, before we mark the file offline */
72 if (f->mmap && f->fd >= 0)
73 mmap_cache_close_fd(f->mmap, f->fd);
74
75 if (f->writable && f->fd >= 0)
76 fdatasync(f->fd);
77
78 if (f->header) {
79 /* Mark the file offline. Don't override the archived state if it already is set */
80 if (f->writable && f->header->state == STATE_ONLINE)
81 f->header->state = STATE_OFFLINE;
82
83 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
84 }
85
86 if (f->fd >= 0)
87 close_nointr_nofail(f->fd);
88
89 free(f->path);
90
91 if (f->mmap)
92 mmap_cache_unref(f->mmap);
93
94 #ifdef HAVE_XZ
95 free(f->compress_buffer);
96 #endif
97
98 #ifdef HAVE_GCRYPT
99 if (f->fss_file)
100 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
101 else if (f->fsprg_state)
102 free(f->fsprg_state);
103
104 free(f->fsprg_seed);
105
106 if (f->hmac)
107 gcry_md_close(f->hmac);
108 #endif
109
110 free(f);
111 }
112
113 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
114 Header h;
115 ssize_t k;
116 int r;
117
118 assert(f);
119
120 zero(h);
121 memcpy(h.signature, HEADER_SIGNATURE, 8);
122 h.header_size = htole64(ALIGN64(sizeof(h)));
123
124 h.incompatible_flags =
125 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
126
127 h.compatible_flags =
128 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
129
130 r = sd_id128_randomize(&h.file_id);
131 if (r < 0)
132 return r;
133
134 if (template) {
135 h.seqnum_id = template->header->seqnum_id;
136 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
137 } else
138 h.seqnum_id = h.file_id;
139
140 k = pwrite(f->fd, &h, sizeof(h), 0);
141 if (k < 0)
142 return -errno;
143
144 if (k != sizeof(h))
145 return -EIO;
146
147 return 0;
148 }
149
150 static int journal_file_refresh_header(JournalFile *f) {
151 int r;
152 sd_id128_t boot_id;
153
154 assert(f);
155
156 r = sd_id128_get_machine(&f->header->machine_id);
157 if (r < 0)
158 return r;
159
160 r = sd_id128_get_boot(&boot_id);
161 if (r < 0)
162 return r;
163
164 if (sd_id128_equal(boot_id, f->header->boot_id))
165 f->tail_entry_monotonic_valid = true;
166
167 f->header->boot_id = boot_id;
168
169 f->header->state = STATE_ONLINE;
170
171 /* Sync the online state to disk */
172 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
173 fdatasync(f->fd);
174
175 return 0;
176 }
177
178 static int journal_file_verify_header(JournalFile *f) {
179 assert(f);
180
181 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
182 return -EBADMSG;
183
184 /* In both read and write mode we refuse to open files with
185 * incompatible flags we don't know */
186 #ifdef HAVE_XZ
187 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
188 return -EPROTONOSUPPORT;
189 #else
190 if (f->header->incompatible_flags != 0)
191 return -EPROTONOSUPPORT;
192 #endif
193
194 /* When open for writing we refuse to open files with
195 * compatible flags, too */
196 if (f->writable) {
197 #ifdef HAVE_GCRYPT
198 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
199 return -EPROTONOSUPPORT;
200 #else
201 if (f->header->compatible_flags != 0)
202 return -EPROTONOSUPPORT;
203 #endif
204 }
205
206 if (f->header->state >= _STATE_MAX)
207 return -EBADMSG;
208
209 /* The first addition was n_data, so check that we are at least this large */
210 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
211 return -EBADMSG;
212
213 if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
214 return -EBADMSG;
215
216 if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
217 return -ENODATA;
218
219 if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
220 return -ENODATA;
221
222 if (!VALID64(f->header->data_hash_table_offset) ||
223 !VALID64(f->header->field_hash_table_offset) ||
224 !VALID64(f->header->tail_object_offset) ||
225 !VALID64(f->header->entry_array_offset))
226 return -ENODATA;
227
228 if (f->writable) {
229 uint8_t state;
230 sd_id128_t machine_id;
231 int r;
232
233 r = sd_id128_get_machine(&machine_id);
234 if (r < 0)
235 return r;
236
237 if (!sd_id128_equal(machine_id, f->header->machine_id))
238 return -EHOSTDOWN;
239
240 state = f->header->state;
241
242 if (state == STATE_ONLINE) {
243 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
244 return -EBUSY;
245 } else if (state == STATE_ARCHIVED)
246 return -ESHUTDOWN;
247 else if (state != STATE_OFFLINE) {
248 log_debug("Journal file %s has unknown state %u.", f->path, state);
249 return -EBUSY;
250 }
251 }
252
253 f->compress = JOURNAL_HEADER_COMPRESSED(f->header);
254
255 if (f->writable)
256 f->seal = JOURNAL_HEADER_SEALED(f->header);
257
258 return 0;
259 }
260
261 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
262 uint64_t old_size, new_size;
263 int r;
264
265 assert(f);
266
267 /* We assume that this file is not sparse, and we know that
268 * for sure, since we always call posix_fallocate()
269 * ourselves */
270
271 old_size =
272 le64toh(f->header->header_size) +
273 le64toh(f->header->arena_size);
274
275 new_size = PAGE_ALIGN(offset + size);
276 if (new_size < le64toh(f->header->header_size))
277 new_size = le64toh(f->header->header_size);
278
279 if (new_size <= old_size)
280 return 0;
281
282 if (f->metrics.max_size > 0 &&
283 new_size > f->metrics.max_size)
284 return -E2BIG;
285
286 if (new_size > f->metrics.min_size &&
287 f->metrics.keep_free > 0) {
288 struct statvfs svfs;
289
290 if (fstatvfs(f->fd, &svfs) >= 0) {
291 uint64_t available;
292
293 available = svfs.f_bfree * svfs.f_bsize;
294
295 if (available >= f->metrics.keep_free)
296 available -= f->metrics.keep_free;
297 else
298 available = 0;
299
300 if (new_size - old_size > available)
301 return -E2BIG;
302 }
303 }
304
305 /* Note that the glibc fallocate() fallback is very
306 inefficient, hence we try to minimize the allocation area
307 as we can. */
308 r = posix_fallocate(f->fd, old_size, new_size - old_size);
309 if (r != 0)
310 return -r;
311
312 mmap_cache_close_fd_range(f->mmap, f->fd, old_size);
313
314 if (fstat(f->fd, &f->last_stat) < 0)
315 return -errno;
316
317 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
318
319 return 0;
320 }
321
322 static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, uint64_t size, void **ret) {
323 assert(f);
324 assert(ret);
325
326 /* Avoid SIGBUS on invalid accesses */
327 if (offset + size > (uint64_t) f->last_stat.st_size) {
328 /* Hmm, out of range? Let's refresh the fstat() data
329 * first, before we trust that check. */
330
331 if (fstat(f->fd, &f->last_stat) < 0 ||
332 offset + size > (uint64_t) f->last_stat.st_size)
333 return -EADDRNOTAVAIL;
334 }
335
336 return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
337 }
338
339 static uint64_t minimum_header_size(Object *o) {
340
341 static uint64_t table[] = {
342 [OBJECT_DATA] = sizeof(DataObject),
343 [OBJECT_FIELD] = sizeof(FieldObject),
344 [OBJECT_ENTRY] = sizeof(EntryObject),
345 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
346 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
347 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
348 [OBJECT_TAG] = sizeof(TagObject),
349 };
350
351 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
352 return sizeof(ObjectHeader);
353
354 return table[o->object.type];
355 }
356
357 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
358 int r;
359 void *t;
360 Object *o;
361 uint64_t s;
362 unsigned context;
363
364 assert(f);
365 assert(ret);
366
367 /* Objects may only be located at multiple of 64 bit */
368 if (!VALID64(offset))
369 return -EFAULT;
370
371 /* One context for each type, plus one catch-all for the rest */
372 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
373
374 r = journal_file_move_to(f, context, offset, sizeof(ObjectHeader), &t);
375 if (r < 0)
376 return r;
377
378 o = (Object*) t;
379 s = le64toh(o->object.size);
380
381 if (s < sizeof(ObjectHeader))
382 return -EBADMSG;
383
384 if (o->object.type <= OBJECT_UNUSED)
385 return -EBADMSG;
386
387 if (s < minimum_header_size(o))
388 return -EBADMSG;
389
390 if (type >= 0 && o->object.type != type)
391 return -EBADMSG;
392
393 if (s > sizeof(ObjectHeader)) {
394 r = journal_file_move_to(f, o->object.type, offset, s, &t);
395 if (r < 0)
396 return r;
397
398 o = (Object*) t;
399 }
400
401 *ret = o;
402 return 0;
403 }
404
405 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
406 uint64_t r;
407
408 assert(f);
409
410 r = le64toh(f->header->tail_entry_seqnum) + 1;
411
412 if (seqnum) {
413 /* If an external seqnum counter was passed, we update
414 * both the local and the external one, and set it to
415 * the maximum of both */
416
417 if (*seqnum + 1 > r)
418 r = *seqnum + 1;
419
420 *seqnum = r;
421 }
422
423 f->header->tail_entry_seqnum = htole64(r);
424
425 if (f->header->head_entry_seqnum == 0)
426 f->header->head_entry_seqnum = htole64(r);
427
428 return r;
429 }
430
431 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
432 int r;
433 uint64_t p;
434 Object *tail, *o;
435 void *t;
436
437 assert(f);
438 assert(type > 0 && type < _OBJECT_TYPE_MAX);
439 assert(size >= sizeof(ObjectHeader));
440 assert(offset);
441 assert(ret);
442
443 p = le64toh(f->header->tail_object_offset);
444 if (p == 0)
445 p = le64toh(f->header->header_size);
446 else {
447 r = journal_file_move_to_object(f, -1, p, &tail);
448 if (r < 0)
449 return r;
450
451 p += ALIGN64(le64toh(tail->object.size));
452 }
453
454 r = journal_file_allocate(f, p, size);
455 if (r < 0)
456 return r;
457
458 r = journal_file_move_to(f, type, p, size, &t);
459 if (r < 0)
460 return r;
461
462 o = (Object*) t;
463
464 zero(o->object);
465 o->object.type = type;
466 o->object.size = htole64(size);
467
468 f->header->tail_object_offset = htole64(p);
469 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
470
471 *ret = o;
472 *offset = p;
473
474 return 0;
475 }
476
477 static int journal_file_setup_data_hash_table(JournalFile *f) {
478 uint64_t s, p;
479 Object *o;
480 int r;
481
482 assert(f);
483
484 /* We estimate that we need 1 hash table entry per 768 of
485 journal file and we want to make sure we never get beyond
486 75% fill level. Calculate the hash table size for the
487 maximum file size based on these metrics. */
488
489 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
490 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
491 s = DEFAULT_DATA_HASH_TABLE_SIZE;
492
493 log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
494
495 r = journal_file_append_object(f,
496 OBJECT_DATA_HASH_TABLE,
497 offsetof(Object, hash_table.items) + s,
498 &o, &p);
499 if (r < 0)
500 return r;
501
502 memset(o->hash_table.items, 0, s);
503
504 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
505 f->header->data_hash_table_size = htole64(s);
506
507 return 0;
508 }
509
510 static int journal_file_setup_field_hash_table(JournalFile *f) {
511 uint64_t s, p;
512 Object *o;
513 int r;
514
515 assert(f);
516
517 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
518 r = journal_file_append_object(f,
519 OBJECT_FIELD_HASH_TABLE,
520 offsetof(Object, hash_table.items) + s,
521 &o, &p);
522 if (r < 0)
523 return r;
524
525 memset(o->hash_table.items, 0, s);
526
527 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
528 f->header->field_hash_table_size = htole64(s);
529
530 return 0;
531 }
532
533 static int journal_file_map_data_hash_table(JournalFile *f) {
534 uint64_t s, p;
535 void *t;
536 int r;
537
538 assert(f);
539
540 p = le64toh(f->header->data_hash_table_offset);
541 s = le64toh(f->header->data_hash_table_size);
542
543 r = journal_file_move_to(f,
544 OBJECT_DATA_HASH_TABLE,
545 p, s,
546 &t);
547 if (r < 0)
548 return r;
549
550 f->data_hash_table = t;
551 return 0;
552 }
553
554 static int journal_file_map_field_hash_table(JournalFile *f) {
555 uint64_t s, p;
556 void *t;
557 int r;
558
559 assert(f);
560
561 p = le64toh(f->header->field_hash_table_offset);
562 s = le64toh(f->header->field_hash_table_size);
563
564 r = journal_file_move_to(f,
565 OBJECT_FIELD_HASH_TABLE,
566 p, s,
567 &t);
568 if (r < 0)
569 return r;
570
571 f->field_hash_table = t;
572 return 0;
573 }
574
575 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
576 uint64_t p, h;
577 int r;
578
579 assert(f);
580 assert(o);
581 assert(offset > 0);
582 assert(o->object.type == OBJECT_DATA);
583
584 /* This might alter the window we are looking at */
585
586 o->data.next_hash_offset = o->data.next_field_offset = 0;
587 o->data.entry_offset = o->data.entry_array_offset = 0;
588 o->data.n_entries = 0;
589
590 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
591 p = le64toh(f->data_hash_table[h].tail_hash_offset);
592 if (p == 0) {
593 /* Only entry in the hash table is easy */
594 f->data_hash_table[h].head_hash_offset = htole64(offset);
595 } else {
596 /* Move back to the previous data object, to patch in
597 * pointer */
598
599 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
600 if (r < 0)
601 return r;
602
603 o->data.next_hash_offset = htole64(offset);
604 }
605
606 f->data_hash_table[h].tail_hash_offset = htole64(offset);
607
608 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
609 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
610
611 return 0;
612 }
613
614 int journal_file_find_data_object_with_hash(
615 JournalFile *f,
616 const void *data, uint64_t size, uint64_t hash,
617 Object **ret, uint64_t *offset) {
618
619 uint64_t p, osize, h;
620 int r;
621
622 assert(f);
623 assert(data || size == 0);
624
625 osize = offsetof(Object, data.payload) + size;
626
627 if (f->header->data_hash_table_size == 0)
628 return -EBADMSG;
629
630 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
631 p = le64toh(f->data_hash_table[h].head_hash_offset);
632
633 while (p > 0) {
634 Object *o;
635
636 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
637 if (r < 0)
638 return r;
639
640 if (le64toh(o->data.hash) != hash)
641 goto next;
642
643 if (o->object.flags & OBJECT_COMPRESSED) {
644 #ifdef HAVE_XZ
645 uint64_t l, rsize;
646
647 l = le64toh(o->object.size);
648 if (l <= offsetof(Object, data.payload))
649 return -EBADMSG;
650
651 l -= offsetof(Object, data.payload);
652
653 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
654 return -EBADMSG;
655
656 if (rsize == size &&
657 memcmp(f->compress_buffer, data, size) == 0) {
658
659 if (ret)
660 *ret = o;
661
662 if (offset)
663 *offset = p;
664
665 return 1;
666 }
667 #else
668 return -EPROTONOSUPPORT;
669 #endif
670
671 } else if (le64toh(o->object.size) == osize &&
672 memcmp(o->data.payload, data, size) == 0) {
673
674 if (ret)
675 *ret = o;
676
677 if (offset)
678 *offset = p;
679
680 return 1;
681 }
682
683 next:
684 p = le64toh(o->data.next_hash_offset);
685 }
686
687 return 0;
688 }
689
690 int journal_file_find_data_object(
691 JournalFile *f,
692 const void *data, uint64_t size,
693 Object **ret, uint64_t *offset) {
694
695 uint64_t hash;
696
697 assert(f);
698 assert(data || size == 0);
699
700 hash = hash64(data, size);
701
702 return journal_file_find_data_object_with_hash(f,
703 data, size, hash,
704 ret, offset);
705 }
706
707 static int journal_file_append_data(
708 JournalFile *f,
709 const void *data, uint64_t size,
710 Object **ret, uint64_t *offset) {
711
712 uint64_t hash, p;
713 uint64_t osize;
714 Object *o;
715 int r;
716 bool compressed = false;
717
718 assert(f);
719 assert(data || size == 0);
720
721 hash = hash64(data, size);
722
723 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
724 if (r < 0)
725 return r;
726 else if (r > 0) {
727
728 if (ret)
729 *ret = o;
730
731 if (offset)
732 *offset = p;
733
734 return 0;
735 }
736
737 osize = offsetof(Object, data.payload) + size;
738 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
739 if (r < 0)
740 return r;
741
742 o->data.hash = htole64(hash);
743
744 #ifdef HAVE_XZ
745 if (f->compress &&
746 size >= COMPRESSION_SIZE_THRESHOLD) {
747 uint64_t rsize;
748
749 compressed = compress_blob(data, size, o->data.payload, &rsize);
750
751 if (compressed) {
752 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
753 o->object.flags |= OBJECT_COMPRESSED;
754
755 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
756 }
757 }
758 #endif
759
760 if (!compressed && size > 0)
761 memcpy(o->data.payload, data, size);
762
763 r = journal_file_link_data(f, o, p, hash);
764 if (r < 0)
765 return r;
766
767 r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
768 if (r < 0)
769 return r;
770
771 /* The linking might have altered the window, so let's
772 * refresh our pointer */
773 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
774 if (r < 0)
775 return r;
776
777 if (ret)
778 *ret = o;
779
780 if (offset)
781 *offset = p;
782
783 return 0;
784 }
785
786 uint64_t journal_file_entry_n_items(Object *o) {
787 assert(o);
788 assert(o->object.type == OBJECT_ENTRY);
789
790 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
791 }
792
793 uint64_t journal_file_entry_array_n_items(Object *o) {
794 assert(o);
795 assert(o->object.type == OBJECT_ENTRY_ARRAY);
796
797 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
798 }
799
800 uint64_t journal_file_hash_table_n_items(Object *o) {
801 assert(o);
802 assert(o->object.type == OBJECT_DATA_HASH_TABLE ||
803 o->object.type == OBJECT_FIELD_HASH_TABLE);
804
805 return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
806 }
807
808 static int link_entry_into_array(JournalFile *f,
809 le64_t *first,
810 le64_t *idx,
811 uint64_t p) {
812 int r;
813 uint64_t n = 0, ap = 0, q, i, a, hidx;
814 Object *o;
815
816 assert(f);
817 assert(first);
818 assert(idx);
819 assert(p > 0);
820
821 a = le64toh(*first);
822 i = hidx = le64toh(*idx);
823 while (a > 0) {
824
825 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
826 if (r < 0)
827 return r;
828
829 n = journal_file_entry_array_n_items(o);
830 if (i < n) {
831 o->entry_array.items[i] = htole64(p);
832 *idx = htole64(hidx + 1);
833 return 0;
834 }
835
836 i -= n;
837 ap = a;
838 a = le64toh(o->entry_array.next_entry_array_offset);
839 }
840
841 if (hidx > n)
842 n = (hidx+1) * 2;
843 else
844 n = n * 2;
845
846 if (n < 4)
847 n = 4;
848
849 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
850 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
851 &o, &q);
852 if (r < 0)
853 return r;
854
855 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
856 if (r < 0)
857 return r;
858
859 o->entry_array.items[i] = htole64(p);
860
861 if (ap == 0)
862 *first = htole64(q);
863 else {
864 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
865 if (r < 0)
866 return r;
867
868 o->entry_array.next_entry_array_offset = htole64(q);
869 }
870
871 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
872 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
873
874 *idx = htole64(hidx + 1);
875
876 return 0;
877 }
878
879 static int link_entry_into_array_plus_one(JournalFile *f,
880 le64_t *extra,
881 le64_t *first,
882 le64_t *idx,
883 uint64_t p) {
884
885 int r;
886
887 assert(f);
888 assert(extra);
889 assert(first);
890 assert(idx);
891 assert(p > 0);
892
893 if (*idx == 0)
894 *extra = htole64(p);
895 else {
896 le64_t i;
897
898 i = htole64(le64toh(*idx) - 1);
899 r = link_entry_into_array(f, first, &i, p);
900 if (r < 0)
901 return r;
902 }
903
904 *idx = htole64(le64toh(*idx) + 1);
905 return 0;
906 }
907
908 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
909 uint64_t p;
910 int r;
911 assert(f);
912 assert(o);
913 assert(offset > 0);
914
915 p = le64toh(o->entry.items[i].object_offset);
916 if (p == 0)
917 return -EINVAL;
918
919 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
920 if (r < 0)
921 return r;
922
923 return link_entry_into_array_plus_one(f,
924 &o->data.entry_offset,
925 &o->data.entry_array_offset,
926 &o->data.n_entries,
927 offset);
928 }
929
930 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
931 uint64_t n, i;
932 int r;
933
934 assert(f);
935 assert(o);
936 assert(offset > 0);
937 assert(o->object.type == OBJECT_ENTRY);
938
939 __sync_synchronize();
940
941 /* Link up the entry itself */
942 r = link_entry_into_array(f,
943 &f->header->entry_array_offset,
944 &f->header->n_entries,
945 offset);
946 if (r < 0)
947 return r;
948
949 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
950
951 if (f->header->head_entry_realtime == 0)
952 f->header->head_entry_realtime = o->entry.realtime;
953
954 f->header->tail_entry_realtime = o->entry.realtime;
955 f->header->tail_entry_monotonic = o->entry.monotonic;
956
957 f->tail_entry_monotonic_valid = true;
958
959 /* Link up the items */
960 n = journal_file_entry_n_items(o);
961 for (i = 0; i < n; i++) {
962 r = journal_file_link_entry_item(f, o, offset, i);
963 if (r < 0)
964 return r;
965 }
966
967 return 0;
968 }
969
970 static int journal_file_append_entry_internal(
971 JournalFile *f,
972 const dual_timestamp *ts,
973 uint64_t xor_hash,
974 const EntryItem items[], unsigned n_items,
975 uint64_t *seqnum,
976 Object **ret, uint64_t *offset) {
977 uint64_t np;
978 uint64_t osize;
979 Object *o;
980 int r;
981
982 assert(f);
983 assert(items || n_items == 0);
984 assert(ts);
985
986 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
987
988 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
989 if (r < 0)
990 return r;
991
992 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
993 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
994 o->entry.realtime = htole64(ts->realtime);
995 o->entry.monotonic = htole64(ts->monotonic);
996 o->entry.xor_hash = htole64(xor_hash);
997 o->entry.boot_id = f->header->boot_id;
998
999 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
1000 if (r < 0)
1001 return r;
1002
1003 r = journal_file_link_entry(f, o, np);
1004 if (r < 0)
1005 return r;
1006
1007 if (ret)
1008 *ret = o;
1009
1010 if (offset)
1011 *offset = np;
1012
1013 return 0;
1014 }
1015
1016 void journal_file_post_change(JournalFile *f) {
1017 assert(f);
1018
1019 /* inotify() does not receive IN_MODIFY events from file
1020 * accesses done via mmap(). After each access we hence
1021 * trigger IN_MODIFY by truncating the journal file to its
1022 * current size which triggers IN_MODIFY. */
1023
1024 __sync_synchronize();
1025
1026 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1027 log_error("Failed to to truncate file to its own size: %m");
1028 }
1029
1030 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1031 unsigned i;
1032 EntryItem *items;
1033 int r;
1034 uint64_t xor_hash = 0;
1035 struct dual_timestamp _ts;
1036
1037 assert(f);
1038 assert(iovec || n_iovec == 0);
1039
1040 if (!f->writable)
1041 return -EPERM;
1042
1043 if (!ts) {
1044 dual_timestamp_get(&_ts);
1045 ts = &_ts;
1046 }
1047
1048 if (f->tail_entry_monotonic_valid &&
1049 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1050 return -EINVAL;
1051
1052 r = journal_file_maybe_append_tag(f, ts->realtime);
1053 if (r < 0)
1054 return r;
1055
1056 /* alloca() can't take 0, hence let's allocate at least one */
1057 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1058
1059 for (i = 0; i < n_iovec; i++) {
1060 uint64_t p;
1061 Object *o;
1062
1063 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1064 if (r < 0)
1065 return r;
1066
1067 xor_hash ^= le64toh(o->data.hash);
1068 items[i].object_offset = htole64(p);
1069 items[i].hash = o->data.hash;
1070 }
1071
1072 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1073
1074 journal_file_post_change(f);
1075
1076 return r;
1077 }
1078
1079 static int generic_array_get(JournalFile *f,
1080 uint64_t first,
1081 uint64_t i,
1082 Object **ret, uint64_t *offset) {
1083
1084 Object *o;
1085 uint64_t p = 0, a;
1086 int r;
1087
1088 assert(f);
1089
1090 a = first;
1091 while (a > 0) {
1092 uint64_t n;
1093
1094 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1095 if (r < 0)
1096 return r;
1097
1098 n = journal_file_entry_array_n_items(o);
1099 if (i < n) {
1100 p = le64toh(o->entry_array.items[i]);
1101 break;
1102 }
1103
1104 i -= n;
1105 a = le64toh(o->entry_array.next_entry_array_offset);
1106 }
1107
1108 if (a <= 0 || p <= 0)
1109 return 0;
1110
1111 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1112 if (r < 0)
1113 return r;
1114
1115 if (ret)
1116 *ret = o;
1117
1118 if (offset)
1119 *offset = p;
1120
1121 return 1;
1122 }
1123
1124 static int generic_array_get_plus_one(JournalFile *f,
1125 uint64_t extra,
1126 uint64_t first,
1127 uint64_t i,
1128 Object **ret, uint64_t *offset) {
1129
1130 Object *o;
1131
1132 assert(f);
1133
1134 if (i == 0) {
1135 int r;
1136
1137 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1138 if (r < 0)
1139 return r;
1140
1141 if (ret)
1142 *ret = o;
1143
1144 if (offset)
1145 *offset = extra;
1146
1147 return 1;
1148 }
1149
1150 return generic_array_get(f, first, i-1, ret, offset);
1151 }
1152
1153 enum {
1154 TEST_FOUND,
1155 TEST_LEFT,
1156 TEST_RIGHT
1157 };
1158
1159 static int generic_array_bisect(JournalFile *f,
1160 uint64_t first,
1161 uint64_t n,
1162 uint64_t needle,
1163 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1164 direction_t direction,
1165 Object **ret,
1166 uint64_t *offset,
1167 uint64_t *idx) {
1168
1169 uint64_t a, p, t = 0, i = 0, last_p = 0;
1170 bool subtract_one = false;
1171 Object *o, *array = NULL;
1172 int r;
1173
1174 assert(f);
1175 assert(test_object);
1176
1177 a = first;
1178 while (a > 0) {
1179 uint64_t left, right, k, lp;
1180
1181 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1182 if (r < 0)
1183 return r;
1184
1185 k = journal_file_entry_array_n_items(array);
1186 right = MIN(k, n);
1187 if (right <= 0)
1188 return 0;
1189
1190 i = right - 1;
1191 lp = p = le64toh(array->entry_array.items[i]);
1192 if (p <= 0)
1193 return -EBADMSG;
1194
1195 r = test_object(f, p, needle);
1196 if (r < 0)
1197 return r;
1198
1199 if (r == TEST_FOUND)
1200 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1201
1202 if (r == TEST_RIGHT) {
1203 left = 0;
1204 right -= 1;
1205 for (;;) {
1206 if (left == right) {
1207 if (direction == DIRECTION_UP)
1208 subtract_one = true;
1209
1210 i = left;
1211 goto found;
1212 }
1213
1214 assert(left < right);
1215
1216 i = (left + right) / 2;
1217 p = le64toh(array->entry_array.items[i]);
1218 if (p <= 0)
1219 return -EBADMSG;
1220
1221 r = test_object(f, p, needle);
1222 if (r < 0)
1223 return r;
1224
1225 if (r == TEST_FOUND)
1226 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1227
1228 if (r == TEST_RIGHT)
1229 right = i;
1230 else
1231 left = i + 1;
1232 }
1233 }
1234
1235 if (k > n) {
1236 if (direction == DIRECTION_UP) {
1237 i = n;
1238 subtract_one = true;
1239 goto found;
1240 }
1241
1242 return 0;
1243 }
1244
1245 last_p = lp;
1246
1247 n -= k;
1248 t += k;
1249 a = le64toh(array->entry_array.next_entry_array_offset);
1250 }
1251
1252 return 0;
1253
1254 found:
1255 if (subtract_one && t == 0 && i == 0)
1256 return 0;
1257
1258 if (subtract_one && i == 0)
1259 p = last_p;
1260 else if (subtract_one)
1261 p = le64toh(array->entry_array.items[i-1]);
1262 else
1263 p = le64toh(array->entry_array.items[i]);
1264
1265 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1266 if (r < 0)
1267 return r;
1268
1269 if (ret)
1270 *ret = o;
1271
1272 if (offset)
1273 *offset = p;
1274
1275 if (idx)
1276 *idx = t + i + (subtract_one ? -1 : 0);
1277
1278 return 1;
1279 }
1280
1281 static int generic_array_bisect_plus_one(JournalFile *f,
1282 uint64_t extra,
1283 uint64_t first,
1284 uint64_t n,
1285 uint64_t needle,
1286 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1287 direction_t direction,
1288 Object **ret,
1289 uint64_t *offset,
1290 uint64_t *idx) {
1291
1292 int r;
1293 bool step_back = false;
1294 Object *o;
1295
1296 assert(f);
1297 assert(test_object);
1298
1299 if (n <= 0)
1300 return 0;
1301
1302 /* This bisects the array in object 'first', but first checks
1303 * an extra */
1304 r = test_object(f, extra, needle);
1305 if (r < 0)
1306 return r;
1307
1308 if (r == TEST_FOUND)
1309 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1310
1311 /* if we are looking with DIRECTION_UP then we need to first
1312 see if in the actual array there is a matching entry, and
1313 return the last one of that. But if there isn't any we need
1314 to return this one. Hence remember this, and return it
1315 below. */
1316 if (r == TEST_LEFT)
1317 step_back = direction == DIRECTION_UP;
1318
1319 if (r == TEST_RIGHT) {
1320 if (direction == DIRECTION_DOWN)
1321 goto found;
1322 else
1323 return 0;
1324 }
1325
1326 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1327
1328 if (r == 0 && step_back)
1329 goto found;
1330
1331 if (r > 0 && idx)
1332 (*idx) ++;
1333
1334 return r;
1335
1336 found:
1337 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1338 if (r < 0)
1339 return r;
1340
1341 if (ret)
1342 *ret = o;
1343
1344 if (offset)
1345 *offset = extra;
1346
1347 if (idx)
1348 *idx = 0;
1349
1350 return 1;
1351 }
1352
1353 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1354 assert(f);
1355 assert(p > 0);
1356
1357 if (p == needle)
1358 return TEST_FOUND;
1359 else if (p < needle)
1360 return TEST_LEFT;
1361 else
1362 return TEST_RIGHT;
1363 }
1364
1365 int journal_file_move_to_entry_by_offset(
1366 JournalFile *f,
1367 uint64_t p,
1368 direction_t direction,
1369 Object **ret,
1370 uint64_t *offset) {
1371
1372 return generic_array_bisect(f,
1373 le64toh(f->header->entry_array_offset),
1374 le64toh(f->header->n_entries),
1375 p,
1376 test_object_offset,
1377 direction,
1378 ret, offset, NULL);
1379 }
1380
1381
1382 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1383 Object *o;
1384 int r;
1385
1386 assert(f);
1387 assert(p > 0);
1388
1389 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1390 if (r < 0)
1391 return r;
1392
1393 if (le64toh(o->entry.seqnum) == needle)
1394 return TEST_FOUND;
1395 else if (le64toh(o->entry.seqnum) < needle)
1396 return TEST_LEFT;
1397 else
1398 return TEST_RIGHT;
1399 }
1400
1401 int journal_file_move_to_entry_by_seqnum(
1402 JournalFile *f,
1403 uint64_t seqnum,
1404 direction_t direction,
1405 Object **ret,
1406 uint64_t *offset) {
1407
1408 return generic_array_bisect(f,
1409 le64toh(f->header->entry_array_offset),
1410 le64toh(f->header->n_entries),
1411 seqnum,
1412 test_object_seqnum,
1413 direction,
1414 ret, offset, NULL);
1415 }
1416
1417 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1418 Object *o;
1419 int r;
1420
1421 assert(f);
1422 assert(p > 0);
1423
1424 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1425 if (r < 0)
1426 return r;
1427
1428 if (le64toh(o->entry.realtime) == needle)
1429 return TEST_FOUND;
1430 else if (le64toh(o->entry.realtime) < needle)
1431 return TEST_LEFT;
1432 else
1433 return TEST_RIGHT;
1434 }
1435
1436 int journal_file_move_to_entry_by_realtime(
1437 JournalFile *f,
1438 uint64_t realtime,
1439 direction_t direction,
1440 Object **ret,
1441 uint64_t *offset) {
1442
1443 return generic_array_bisect(f,
1444 le64toh(f->header->entry_array_offset),
1445 le64toh(f->header->n_entries),
1446 realtime,
1447 test_object_realtime,
1448 direction,
1449 ret, offset, NULL);
1450 }
1451
1452 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1453 Object *o;
1454 int r;
1455
1456 assert(f);
1457 assert(p > 0);
1458
1459 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1460 if (r < 0)
1461 return r;
1462
1463 if (le64toh(o->entry.monotonic) == needle)
1464 return TEST_FOUND;
1465 else if (le64toh(o->entry.monotonic) < needle)
1466 return TEST_LEFT;
1467 else
1468 return TEST_RIGHT;
1469 }
1470
1471 int journal_file_move_to_entry_by_monotonic(
1472 JournalFile *f,
1473 sd_id128_t boot_id,
1474 uint64_t monotonic,
1475 direction_t direction,
1476 Object **ret,
1477 uint64_t *offset) {
1478
1479 char t[9+32+1] = "_BOOT_ID=";
1480 Object *o;
1481 int r;
1482
1483 assert(f);
1484
1485 sd_id128_to_string(boot_id, t + 9);
1486 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1487 if (r < 0)
1488 return r;
1489 if (r == 0)
1490 return -ENOENT;
1491
1492 return generic_array_bisect_plus_one(f,
1493 le64toh(o->data.entry_offset),
1494 le64toh(o->data.entry_array_offset),
1495 le64toh(o->data.n_entries),
1496 monotonic,
1497 test_object_monotonic,
1498 direction,
1499 ret, offset, NULL);
1500 }
1501
1502 int journal_file_next_entry(
1503 JournalFile *f,
1504 Object *o, uint64_t p,
1505 direction_t direction,
1506 Object **ret, uint64_t *offset) {
1507
1508 uint64_t i, n;
1509 int r;
1510
1511 assert(f);
1512 assert(p > 0 || !o);
1513
1514 n = le64toh(f->header->n_entries);
1515 if (n <= 0)
1516 return 0;
1517
1518 if (!o)
1519 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1520 else {
1521 if (o->object.type != OBJECT_ENTRY)
1522 return -EINVAL;
1523
1524 r = generic_array_bisect(f,
1525 le64toh(f->header->entry_array_offset),
1526 le64toh(f->header->n_entries),
1527 p,
1528 test_object_offset,
1529 DIRECTION_DOWN,
1530 NULL, NULL,
1531 &i);
1532 if (r <= 0)
1533 return r;
1534
1535 if (direction == DIRECTION_DOWN) {
1536 if (i >= n - 1)
1537 return 0;
1538
1539 i++;
1540 } else {
1541 if (i <= 0)
1542 return 0;
1543
1544 i--;
1545 }
1546 }
1547
1548 /* And jump to it */
1549 return generic_array_get(f,
1550 le64toh(f->header->entry_array_offset),
1551 i,
1552 ret, offset);
1553 }
1554
1555 int journal_file_skip_entry(
1556 JournalFile *f,
1557 Object *o, uint64_t p,
1558 int64_t skip,
1559 Object **ret, uint64_t *offset) {
1560
1561 uint64_t i, n;
1562 int r;
1563
1564 assert(f);
1565 assert(o);
1566 assert(p > 0);
1567
1568 if (o->object.type != OBJECT_ENTRY)
1569 return -EINVAL;
1570
1571 r = generic_array_bisect(f,
1572 le64toh(f->header->entry_array_offset),
1573 le64toh(f->header->n_entries),
1574 p,
1575 test_object_offset,
1576 DIRECTION_DOWN,
1577 NULL, NULL,
1578 &i);
1579 if (r <= 0)
1580 return r;
1581
1582 /* Calculate new index */
1583 if (skip < 0) {
1584 if ((uint64_t) -skip >= i)
1585 i = 0;
1586 else
1587 i = i - (uint64_t) -skip;
1588 } else
1589 i += (uint64_t) skip;
1590
1591 n = le64toh(f->header->n_entries);
1592 if (n <= 0)
1593 return -EBADMSG;
1594
1595 if (i >= n)
1596 i = n-1;
1597
1598 return generic_array_get(f,
1599 le64toh(f->header->entry_array_offset),
1600 i,
1601 ret, offset);
1602 }
1603
1604 int journal_file_next_entry_for_data(
1605 JournalFile *f,
1606 Object *o, uint64_t p,
1607 uint64_t data_offset,
1608 direction_t direction,
1609 Object **ret, uint64_t *offset) {
1610
1611 uint64_t n, i;
1612 int r;
1613 Object *d;
1614
1615 assert(f);
1616 assert(p > 0 || !o);
1617
1618 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1619 if (r < 0)
1620 return r;
1621
1622 n = le64toh(d->data.n_entries);
1623 if (n <= 0)
1624 return n;
1625
1626 if (!o)
1627 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1628 else {
1629 if (o->object.type != OBJECT_ENTRY)
1630 return -EINVAL;
1631
1632 r = generic_array_bisect_plus_one(f,
1633 le64toh(d->data.entry_offset),
1634 le64toh(d->data.entry_array_offset),
1635 le64toh(d->data.n_entries),
1636 p,
1637 test_object_offset,
1638 DIRECTION_DOWN,
1639 NULL, NULL,
1640 &i);
1641
1642 if (r <= 0)
1643 return r;
1644
1645 if (direction == DIRECTION_DOWN) {
1646 if (i >= n - 1)
1647 return 0;
1648
1649 i++;
1650 } else {
1651 if (i <= 0)
1652 return 0;
1653
1654 i--;
1655 }
1656
1657 }
1658
1659 return generic_array_get_plus_one(f,
1660 le64toh(d->data.entry_offset),
1661 le64toh(d->data.entry_array_offset),
1662 i,
1663 ret, offset);
1664 }
1665
1666 int journal_file_move_to_entry_by_offset_for_data(
1667 JournalFile *f,
1668 uint64_t data_offset,
1669 uint64_t p,
1670 direction_t direction,
1671 Object **ret, uint64_t *offset) {
1672
1673 int r;
1674 Object *d;
1675
1676 assert(f);
1677
1678 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1679 if (r < 0)
1680 return r;
1681
1682 return generic_array_bisect_plus_one(f,
1683 le64toh(d->data.entry_offset),
1684 le64toh(d->data.entry_array_offset),
1685 le64toh(d->data.n_entries),
1686 p,
1687 test_object_offset,
1688 direction,
1689 ret, offset, NULL);
1690 }
1691
1692 int journal_file_move_to_entry_by_monotonic_for_data(
1693 JournalFile *f,
1694 uint64_t data_offset,
1695 sd_id128_t boot_id,
1696 uint64_t monotonic,
1697 direction_t direction,
1698 Object **ret, uint64_t *offset) {
1699
1700 char t[9+32+1] = "_BOOT_ID=";
1701 Object *o, *d;
1702 int r;
1703 uint64_t b, z;
1704
1705 assert(f);
1706
1707 /* First, seek by time */
1708 sd_id128_to_string(boot_id, t + 9);
1709 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1710 if (r < 0)
1711 return r;
1712 if (r == 0)
1713 return -ENOENT;
1714
1715 r = generic_array_bisect_plus_one(f,
1716 le64toh(o->data.entry_offset),
1717 le64toh(o->data.entry_array_offset),
1718 le64toh(o->data.n_entries),
1719 monotonic,
1720 test_object_monotonic,
1721 direction,
1722 NULL, &z, NULL);
1723 if (r <= 0)
1724 return r;
1725
1726 /* And now, continue seeking until we find an entry that
1727 * exists in both bisection arrays */
1728
1729 for (;;) {
1730 Object *qo;
1731 uint64_t p, q;
1732
1733 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1734 if (r < 0)
1735 return r;
1736
1737 r = generic_array_bisect_plus_one(f,
1738 le64toh(d->data.entry_offset),
1739 le64toh(d->data.entry_array_offset),
1740 le64toh(d->data.n_entries),
1741 z,
1742 test_object_offset,
1743 direction,
1744 NULL, &p, NULL);
1745 if (r <= 0)
1746 return r;
1747
1748 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1749 if (r < 0)
1750 return r;
1751
1752 r = generic_array_bisect_plus_one(f,
1753 le64toh(o->data.entry_offset),
1754 le64toh(o->data.entry_array_offset),
1755 le64toh(o->data.n_entries),
1756 p,
1757 test_object_offset,
1758 direction,
1759 &qo, &q, NULL);
1760
1761 if (r <= 0)
1762 return r;
1763
1764 if (p == q) {
1765 if (ret)
1766 *ret = qo;
1767 if (offset)
1768 *offset = q;
1769
1770 return 1;
1771 }
1772
1773 z = q;
1774 }
1775
1776 return 0;
1777 }
1778
1779 int journal_file_move_to_entry_by_seqnum_for_data(
1780 JournalFile *f,
1781 uint64_t data_offset,
1782 uint64_t seqnum,
1783 direction_t direction,
1784 Object **ret, uint64_t *offset) {
1785
1786 Object *d;
1787 int r;
1788
1789 assert(f);
1790
1791 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1792 if (r < 0)
1793 return r;
1794
1795 return generic_array_bisect_plus_one(f,
1796 le64toh(d->data.entry_offset),
1797 le64toh(d->data.entry_array_offset),
1798 le64toh(d->data.n_entries),
1799 seqnum,
1800 test_object_seqnum,
1801 direction,
1802 ret, offset, NULL);
1803 }
1804
1805 int journal_file_move_to_entry_by_realtime_for_data(
1806 JournalFile *f,
1807 uint64_t data_offset,
1808 uint64_t realtime,
1809 direction_t direction,
1810 Object **ret, uint64_t *offset) {
1811
1812 Object *d;
1813 int r;
1814
1815 assert(f);
1816
1817 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1818 if (r < 0)
1819 return r;
1820
1821 return generic_array_bisect_plus_one(f,
1822 le64toh(d->data.entry_offset),
1823 le64toh(d->data.entry_array_offset),
1824 le64toh(d->data.n_entries),
1825 realtime,
1826 test_object_realtime,
1827 direction,
1828 ret, offset, NULL);
1829 }
1830
1831 void journal_file_dump(JournalFile *f) {
1832 Object *o;
1833 int r;
1834 uint64_t p;
1835
1836 assert(f);
1837
1838 journal_file_print_header(f);
1839
1840 p = le64toh(f->header->header_size);
1841 while (p != 0) {
1842 r = journal_file_move_to_object(f, -1, p, &o);
1843 if (r < 0)
1844 goto fail;
1845
1846 switch (o->object.type) {
1847
1848 case OBJECT_UNUSED:
1849 printf("Type: OBJECT_UNUSED\n");
1850 break;
1851
1852 case OBJECT_DATA:
1853 printf("Type: OBJECT_DATA\n");
1854 break;
1855
1856 case OBJECT_ENTRY:
1857 printf("Type: OBJECT_ENTRY seqnum=%llu monotonic=%llu realtime=%llu\n",
1858 (unsigned long long) le64toh(o->entry.seqnum),
1859 (unsigned long long) le64toh(o->entry.monotonic),
1860 (unsigned long long) le64toh(o->entry.realtime));
1861 break;
1862
1863 case OBJECT_FIELD_HASH_TABLE:
1864 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1865 break;
1866
1867 case OBJECT_DATA_HASH_TABLE:
1868 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1869 break;
1870
1871 case OBJECT_ENTRY_ARRAY:
1872 printf("Type: OBJECT_ENTRY_ARRAY\n");
1873 break;
1874
1875 case OBJECT_TAG:
1876 printf("Type: OBJECT_TAG seqnum=%llu epoch=%llu\n",
1877 (unsigned long long) le64toh(o->tag.seqnum),
1878 (unsigned long long) le64toh(o->tag.epoch));
1879 break;
1880 }
1881
1882 if (o->object.flags & OBJECT_COMPRESSED)
1883 printf("Flags: COMPRESSED\n");
1884
1885 if (p == le64toh(f->header->tail_object_offset))
1886 p = 0;
1887 else
1888 p = p + ALIGN64(le64toh(o->object.size));
1889 }
1890
1891 return;
1892 fail:
1893 log_error("File corrupt");
1894 }
1895
1896 void journal_file_print_header(JournalFile *f) {
1897 char a[33], b[33], c[33];
1898 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
1899
1900 assert(f);
1901
1902 printf("File Path: %s\n"
1903 "File ID: %s\n"
1904 "Machine ID: %s\n"
1905 "Boot ID: %s\n"
1906 "Sequential Number ID: %s\n"
1907 "State: %s\n"
1908 "Compatible Flags:%s%s\n"
1909 "Incompatible Flags:%s%s\n"
1910 "Header size: %llu\n"
1911 "Arena size: %llu\n"
1912 "Data Hash Table Size: %llu\n"
1913 "Field Hash Table Size: %llu\n"
1914 "Rotate Suggested: %s\n"
1915 "Head Sequential Number: %llu\n"
1916 "Tail Sequential Number: %llu\n"
1917 "Head Realtime Timestamp: %s\n"
1918 "Tail Realtime Timestamp: %s\n"
1919 "Objects: %llu\n"
1920 "Entry Objects: %llu\n",
1921 f->path,
1922 sd_id128_to_string(f->header->file_id, a),
1923 sd_id128_to_string(f->header->machine_id, b),
1924 sd_id128_to_string(f->header->boot_id, c),
1925 sd_id128_to_string(f->header->seqnum_id, c),
1926 f->header->state == STATE_OFFLINE ? "OFFLINE" :
1927 f->header->state == STATE_ONLINE ? "ONLINE" :
1928 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
1929 JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
1930 (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
1931 JOURNAL_HEADER_COMPRESSED(f->header) ? " COMPRESSED" : "",
1932 (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1933 (unsigned long long) le64toh(f->header->header_size),
1934 (unsigned long long) le64toh(f->header->arena_size),
1935 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1936 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
1937 yes_no(journal_file_rotate_suggested(f)),
1938 (unsigned long long) le64toh(f->header->head_entry_seqnum),
1939 (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1940 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
1941 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1942 (unsigned long long) le64toh(f->header->n_objects),
1943 (unsigned long long) le64toh(f->header->n_entries));
1944
1945 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1946 printf("Data Objects: %llu\n"
1947 "Data Hash Table Fill: %.1f%%\n",
1948 (unsigned long long) le64toh(f->header->n_data),
1949 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
1950
1951 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1952 printf("Field Objects: %llu\n"
1953 "Field Hash Table Fill: %.1f%%\n",
1954 (unsigned long long) le64toh(f->header->n_fields),
1955 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
1956
1957 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
1958 printf("Tag Objects: %llu\n",
1959 (unsigned long long) le64toh(f->header->n_tags));
1960 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1961 printf("Entry Array Objects: %llu\n",
1962 (unsigned long long) le64toh(f->header->n_entry_arrays));
1963 }
1964
1965 int journal_file_open(
1966 const char *fname,
1967 int flags,
1968 mode_t mode,
1969 bool compress,
1970 bool seal,
1971 JournalMetrics *metrics,
1972 MMapCache *mmap_cache,
1973 JournalFile *template,
1974 JournalFile **ret) {
1975
1976 JournalFile *f;
1977 int r;
1978 bool newly_created = false;
1979
1980 assert(fname);
1981
1982 if ((flags & O_ACCMODE) != O_RDONLY &&
1983 (flags & O_ACCMODE) != O_RDWR)
1984 return -EINVAL;
1985
1986 if (!endswith(fname, ".journal") &&
1987 !endswith(fname, ".journal~"))
1988 return -EINVAL;
1989
1990 f = new0(JournalFile, 1);
1991 if (!f)
1992 return -ENOMEM;
1993
1994 f->fd = -1;
1995 f->mode = mode;
1996
1997 f->flags = flags;
1998 f->prot = prot_from_flags(flags);
1999 f->writable = (flags & O_ACCMODE) != O_RDONLY;
2000 f->compress = compress;
2001 f->seal = seal;
2002
2003 if (mmap_cache)
2004 f->mmap = mmap_cache_ref(mmap_cache);
2005 else {
2006 f->mmap = mmap_cache_new();
2007 if (!f->mmap) {
2008 r = -ENOMEM;
2009 goto fail;
2010 }
2011 }
2012
2013 f->path = strdup(fname);
2014 if (!f->path) {
2015 r = -ENOMEM;
2016 goto fail;
2017 }
2018
2019 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2020 if (f->fd < 0) {
2021 r = -errno;
2022 goto fail;
2023 }
2024
2025 if (fstat(f->fd, &f->last_stat) < 0) {
2026 r = -errno;
2027 goto fail;
2028 }
2029
2030 if (f->last_stat.st_size == 0 && f->writable) {
2031 newly_created = true;
2032
2033 /* Try to load the FSPRG state, and if we can't, then
2034 * just don't do sealing */
2035 r = journal_file_fss_load(f);
2036 if (r < 0)
2037 f->seal = false;
2038
2039 r = journal_file_init_header(f, template);
2040 if (r < 0)
2041 goto fail;
2042
2043 if (fstat(f->fd, &f->last_stat) < 0) {
2044 r = -errno;
2045 goto fail;
2046 }
2047 }
2048
2049 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2050 r = -EIO;
2051 goto fail;
2052 }
2053
2054 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2055 if (f->header == MAP_FAILED) {
2056 f->header = NULL;
2057 r = -errno;
2058 goto fail;
2059 }
2060
2061 if (!newly_created) {
2062 r = journal_file_verify_header(f);
2063 if (r < 0)
2064 goto fail;
2065 }
2066
2067 if (!newly_created && f->writable) {
2068 r = journal_file_fss_load(f);
2069 if (r < 0)
2070 goto fail;
2071 }
2072
2073 if (f->writable) {
2074 if (metrics) {
2075 journal_default_metrics(metrics, f->fd);
2076 f->metrics = *metrics;
2077 } else if (template)
2078 f->metrics = template->metrics;
2079
2080 r = journal_file_refresh_header(f);
2081 if (r < 0)
2082 goto fail;
2083 }
2084
2085 r = journal_file_hmac_setup(f);
2086 if (r < 0)
2087 goto fail;
2088
2089 if (newly_created) {
2090 r = journal_file_setup_field_hash_table(f);
2091 if (r < 0)
2092 goto fail;
2093
2094 r = journal_file_setup_data_hash_table(f);
2095 if (r < 0)
2096 goto fail;
2097
2098 r = journal_file_append_first_tag(f);
2099 if (r < 0)
2100 goto fail;
2101 }
2102
2103 r = journal_file_map_field_hash_table(f);
2104 if (r < 0)
2105 goto fail;
2106
2107 r = journal_file_map_data_hash_table(f);
2108 if (r < 0)
2109 goto fail;
2110
2111 if (ret)
2112 *ret = f;
2113
2114 return 0;
2115
2116 fail:
2117 journal_file_close(f);
2118
2119 return r;
2120 }
2121
2122 int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
2123 char *p;
2124 size_t l;
2125 JournalFile *old_file, *new_file = NULL;
2126 int r;
2127
2128 assert(f);
2129 assert(*f);
2130
2131 old_file = *f;
2132
2133 if (!old_file->writable)
2134 return -EINVAL;
2135
2136 if (!endswith(old_file->path, ".journal"))
2137 return -EINVAL;
2138
2139 l = strlen(old_file->path);
2140
2141 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2142 if (!p)
2143 return -ENOMEM;
2144
2145 memcpy(p, old_file->path, l - 8);
2146 p[l-8] = '@';
2147 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2148 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2149 "-%016llx-%016llx.journal",
2150 (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
2151 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2152
2153 r = rename(old_file->path, p);
2154 free(p);
2155
2156 if (r < 0)
2157 return -errno;
2158
2159 old_file->header->state = STATE_ARCHIVED;
2160
2161 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
2162 journal_file_close(old_file);
2163
2164 *f = new_file;
2165 return r;
2166 }
2167
2168 int journal_file_open_reliably(
2169 const char *fname,
2170 int flags,
2171 mode_t mode,
2172 bool compress,
2173 bool seal,
2174 JournalMetrics *metrics,
2175 MMapCache *mmap_cache,
2176 JournalFile *template,
2177 JournalFile **ret) {
2178
2179 int r;
2180 size_t l;
2181 char *p;
2182
2183 r = journal_file_open(fname, flags, mode, compress, seal,
2184 metrics, mmap_cache, template, ret);
2185 if (r != -EBADMSG && /* corrupted */
2186 r != -ENODATA && /* truncated */
2187 r != -EHOSTDOWN && /* other machine */
2188 r != -EPROTONOSUPPORT && /* incompatible feature */
2189 r != -EBUSY && /* unclean shutdown */
2190 r != -ESHUTDOWN /* already archived */)
2191 return r;
2192
2193 if ((flags & O_ACCMODE) == O_RDONLY)
2194 return r;
2195
2196 if (!(flags & O_CREAT))
2197 return r;
2198
2199 if (!endswith(fname, ".journal"))
2200 return r;
2201
2202 /* The file is corrupted. Rotate it away and try it again (but only once) */
2203
2204 l = strlen(fname);
2205 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2206 (int) (l-8), fname,
2207 (unsigned long long) now(CLOCK_REALTIME),
2208 random_ull()) < 0)
2209 return -ENOMEM;
2210
2211 r = rename(fname, p);
2212 free(p);
2213 if (r < 0)
2214 return -errno;
2215
2216 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2217
2218 return journal_file_open(fname, flags, mode, compress, seal,
2219 metrics, mmap_cache, template, ret);
2220 }
2221
2222
2223 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2224 uint64_t i, n;
2225 uint64_t q, xor_hash = 0;
2226 int r;
2227 EntryItem *items;
2228 dual_timestamp ts;
2229
2230 assert(from);
2231 assert(to);
2232 assert(o);
2233 assert(p);
2234
2235 if (!to->writable)
2236 return -EPERM;
2237
2238 ts.monotonic = le64toh(o->entry.monotonic);
2239 ts.realtime = le64toh(o->entry.realtime);
2240
2241 if (to->tail_entry_monotonic_valid &&
2242 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2243 return -EINVAL;
2244
2245 n = journal_file_entry_n_items(o);
2246 items = alloca(sizeof(EntryItem) * n);
2247
2248 for (i = 0; i < n; i++) {
2249 uint64_t l, h;
2250 le64_t le_hash;
2251 size_t t;
2252 void *data;
2253 Object *u;
2254
2255 q = le64toh(o->entry.items[i].object_offset);
2256 le_hash = o->entry.items[i].hash;
2257
2258 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2259 if (r < 0)
2260 return r;
2261
2262 if (le_hash != o->data.hash)
2263 return -EBADMSG;
2264
2265 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2266 t = (size_t) l;
2267
2268 /* We hit the limit on 32bit machines */
2269 if ((uint64_t) t != l)
2270 return -E2BIG;
2271
2272 if (o->object.flags & OBJECT_COMPRESSED) {
2273 #ifdef HAVE_XZ
2274 uint64_t rsize;
2275
2276 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2277 return -EBADMSG;
2278
2279 data = from->compress_buffer;
2280 l = rsize;
2281 #else
2282 return -EPROTONOSUPPORT;
2283 #endif
2284 } else
2285 data = o->data.payload;
2286
2287 r = journal_file_append_data(to, data, l, &u, &h);
2288 if (r < 0)
2289 return r;
2290
2291 xor_hash ^= le64toh(u->data.hash);
2292 items[i].object_offset = htole64(h);
2293 items[i].hash = u->data.hash;
2294
2295 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2296 if (r < 0)
2297 return r;
2298 }
2299
2300 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2301 }
2302
2303 void journal_default_metrics(JournalMetrics *m, int fd) {
2304 uint64_t fs_size = 0;
2305 struct statvfs ss;
2306 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2307
2308 assert(m);
2309 assert(fd >= 0);
2310
2311 if (fstatvfs(fd, &ss) >= 0)
2312 fs_size = ss.f_frsize * ss.f_blocks;
2313
2314 if (m->max_use == (uint64_t) -1) {
2315
2316 if (fs_size > 0) {
2317 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2318
2319 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2320 m->max_use = DEFAULT_MAX_USE_UPPER;
2321
2322 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2323 m->max_use = DEFAULT_MAX_USE_LOWER;
2324 } else
2325 m->max_use = DEFAULT_MAX_USE_LOWER;
2326 } else {
2327 m->max_use = PAGE_ALIGN(m->max_use);
2328
2329 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2330 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2331 }
2332
2333 if (m->max_size == (uint64_t) -1) {
2334 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2335
2336 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2337 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2338 } else
2339 m->max_size = PAGE_ALIGN(m->max_size);
2340
2341 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2342 m->max_size = JOURNAL_FILE_SIZE_MIN;
2343
2344 if (m->max_size*2 > m->max_use)
2345 m->max_use = m->max_size*2;
2346
2347 if (m->min_size == (uint64_t) -1)
2348 m->min_size = JOURNAL_FILE_SIZE_MIN;
2349 else {
2350 m->min_size = PAGE_ALIGN(m->min_size);
2351
2352 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2353 m->min_size = JOURNAL_FILE_SIZE_MIN;
2354
2355 if (m->min_size > m->max_size)
2356 m->max_size = m->min_size;
2357 }
2358
2359 if (m->keep_free == (uint64_t) -1) {
2360
2361 if (fs_size > 0) {
2362 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2363
2364 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2365 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2366
2367 } else
2368 m->keep_free = DEFAULT_KEEP_FREE;
2369 }
2370
2371 log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2372 format_bytes(a, sizeof(a), m->max_use),
2373 format_bytes(b, sizeof(b), m->max_size),
2374 format_bytes(c, sizeof(c), m->min_size),
2375 format_bytes(d, sizeof(d), m->keep_free));
2376 }
2377
2378 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2379 assert(f);
2380 assert(from || to);
2381
2382 if (from) {
2383 if (f->header->head_entry_realtime == 0)
2384 return -ENOENT;
2385
2386 *from = le64toh(f->header->head_entry_realtime);
2387 }
2388
2389 if (to) {
2390 if (f->header->tail_entry_realtime == 0)
2391 return -ENOENT;
2392
2393 *to = le64toh(f->header->tail_entry_realtime);
2394 }
2395
2396 return 1;
2397 }
2398
2399 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2400 char t[9+32+1] = "_BOOT_ID=";
2401 Object *o;
2402 uint64_t p;
2403 int r;
2404
2405 assert(f);
2406 assert(from || to);
2407
2408 sd_id128_to_string(boot_id, t + 9);
2409
2410 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2411 if (r <= 0)
2412 return r;
2413
2414 if (le64toh(o->data.n_entries) <= 0)
2415 return 0;
2416
2417 if (from) {
2418 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2419 if (r < 0)
2420 return r;
2421
2422 *from = le64toh(o->entry.monotonic);
2423 }
2424
2425 if (to) {
2426 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2427 if (r < 0)
2428 return r;
2429
2430 r = generic_array_get_plus_one(f,
2431 le64toh(o->data.entry_offset),
2432 le64toh(o->data.entry_array_offset),
2433 le64toh(o->data.n_entries)-1,
2434 &o, NULL);
2435 if (r <= 0)
2436 return r;
2437
2438 *to = le64toh(o->entry.monotonic);
2439 }
2440
2441 return 1;
2442 }
2443
2444 bool journal_file_rotate_suggested(JournalFile *f) {
2445 assert(f);
2446
2447 /* If we gained new header fields we gained new features,
2448 * hence suggest a rotation */
2449 if (le64toh(f->header->header_size) < sizeof(Header)) {
2450 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2451 return true;
2452 }
2453
2454 /* Let's check if the hash tables grew over a certain fill
2455 * level (75%, borrowing this value from Java's hash table
2456 * implementation), and if so suggest a rotation. To calculate
2457 * the fill level we need the n_data field, which only exists
2458 * in newer versions. */
2459
2460 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2461 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2462 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2463 f->path,
2464 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2465 (unsigned long long) le64toh(f->header->n_data),
2466 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2467 (unsigned long long) (f->last_stat.st_size),
2468 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
2469 return true;
2470 }
2471
2472 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2473 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2474 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2475 f->path,
2476 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2477 (unsigned long long) le64toh(f->header->n_fields),
2478 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
2479 return true;
2480 }
2481
2482 return false;
2483 }