]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journal-file.c
journal: verify hashes only during actual verification, not all the time
[thirdparty/systemd.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "journal-authenticate.h"
33 #include "lookup3.h"
34 #include "compress.h"
35 #include "fsprg.h"
36
37 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
39
40 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
41
42 /* This is the minimum journal file size */
43 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
44
45 /* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
49
50 /* This is the upper bound if we deduce max_size from max_use */
51 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
52
53 /* This is the upper bound if we deduce the keep_free value from the
54 * file system size */
55 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57 /* This is the keep_free value when we can't determine the system
58 * size */
59 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
60
61 /* n_data was the first entry we added after the initial file format design */
62 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
63
64 void journal_file_close(JournalFile *f) {
65 assert(f);
66
67 /* Write the final tag */
68 if (f->authenticate)
69 journal_file_append_tag(f);
70
71 /* Sync everything to disk, before we mark the file offline */
72 if (f->mmap && f->fd >= 0)
73 mmap_cache_close_fd(f->mmap, f->fd);
74
75 if (f->writable && f->fd >= 0)
76 fdatasync(f->fd);
77
78 if (f->header) {
79 /* Mark the file offline. Don't override the archived state if it already is set */
80 if (f->writable && f->header->state == STATE_ONLINE)
81 f->header->state = STATE_OFFLINE;
82
83 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
84 }
85
86 if (f->fd >= 0)
87 close_nointr_nofail(f->fd);
88
89 free(f->path);
90
91 if (f->mmap)
92 mmap_cache_unref(f->mmap);
93
94 #ifdef HAVE_XZ
95 free(f->compress_buffer);
96 #endif
97
98 #ifdef HAVE_GCRYPT
99 if (f->fsprg_header)
100 munmap(f->fsprg_header, PAGE_ALIGN(f->fsprg_size));
101
102 if (f->hmac)
103 gcry_md_close(f->hmac);
104 #endif
105
106 free(f);
107 }
108
109 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
110 Header h;
111 ssize_t k;
112 int r;
113
114 assert(f);
115
116 zero(h);
117 memcpy(h.signature, HEADER_SIGNATURE, 8);
118 h.header_size = htole64(ALIGN64(sizeof(h)));
119
120 h.incompatible_flags =
121 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
122
123 h.compatible_flags =
124 htole32(f->authenticate ? HEADER_COMPATIBLE_AUTHENTICATED : 0);
125
126 r = sd_id128_randomize(&h.file_id);
127 if (r < 0)
128 return r;
129
130 if (template) {
131 h.seqnum_id = template->header->seqnum_id;
132 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
133 } else
134 h.seqnum_id = h.file_id;
135
136 k = pwrite(f->fd, &h, sizeof(h), 0);
137 if (k < 0)
138 return -errno;
139
140 if (k != sizeof(h))
141 return -EIO;
142
143 return 0;
144 }
145
146 static int journal_file_refresh_header(JournalFile *f) {
147 int r;
148 sd_id128_t boot_id;
149
150 assert(f);
151
152 r = sd_id128_get_machine(&f->header->machine_id);
153 if (r < 0)
154 return r;
155
156 r = sd_id128_get_boot(&boot_id);
157 if (r < 0)
158 return r;
159
160 if (sd_id128_equal(boot_id, f->header->boot_id))
161 f->tail_entry_monotonic_valid = true;
162
163 f->header->boot_id = boot_id;
164
165 f->header->state = STATE_ONLINE;
166
167 /* Sync the online state to disk */
168 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
169 fdatasync(f->fd);
170
171 return 0;
172 }
173
174 static int journal_file_verify_header(JournalFile *f) {
175 assert(f);
176
177 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
178 return -EBADMSG;
179
180 /* In both read and write mode we refuse to open files with
181 * incompatible flags we don't know */
182 #ifdef HAVE_XZ
183 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
184 return -EPROTONOSUPPORT;
185 #else
186 if (f->header->incompatible_flags != 0)
187 return -EPROTONOSUPPORT;
188 #endif
189
190 /* When open for writing we refuse to open files with
191 * compatible flags, too */
192 if (f->writable) {
193 #ifdef HAVE_GCRYPT
194 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_AUTHENTICATED) != 0)
195 return -EPROTONOSUPPORT;
196 #else
197 if (f->header->compatible_flags != 0)
198 return -EPROTONOSUPPORT;
199 #endif
200 }
201
202 /* The first addition was n_data, so check that we are at least this large */
203 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
204 return -EBADMSG;
205
206 if ((le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED) &&
207 !JOURNAL_HEADER_CONTAINS(f->header, n_tags))
208 return -EBADMSG;
209
210 if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
211 return -ENODATA;
212
213 if (f->writable) {
214 uint8_t state;
215 sd_id128_t machine_id;
216 int r;
217
218 r = sd_id128_get_machine(&machine_id);
219 if (r < 0)
220 return r;
221
222 if (!sd_id128_equal(machine_id, f->header->machine_id))
223 return -EHOSTDOWN;
224
225 state = f->header->state;
226
227 if (state == STATE_ONLINE) {
228 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
229 return -EBUSY;
230 } else if (state == STATE_ARCHIVED)
231 return -ESHUTDOWN;
232 else if (state != STATE_OFFLINE) {
233 log_debug("Journal file %s has unknown state %u.", f->path, state);
234 return -EBUSY;
235 }
236 }
237
238 f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
239 f->authenticate = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED);
240
241 return 0;
242 }
243
244 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
245 uint64_t old_size, new_size;
246 int r;
247
248 assert(f);
249
250 /* We assume that this file is not sparse, and we know that
251 * for sure, since we always call posix_fallocate()
252 * ourselves */
253
254 old_size =
255 le64toh(f->header->header_size) +
256 le64toh(f->header->arena_size);
257
258 new_size = PAGE_ALIGN(offset + size);
259 if (new_size < le64toh(f->header->header_size))
260 new_size = le64toh(f->header->header_size);
261
262 if (new_size <= old_size)
263 return 0;
264
265 if (f->metrics.max_size > 0 &&
266 new_size > f->metrics.max_size)
267 return -E2BIG;
268
269 if (new_size > f->metrics.min_size &&
270 f->metrics.keep_free > 0) {
271 struct statvfs svfs;
272
273 if (fstatvfs(f->fd, &svfs) >= 0) {
274 uint64_t available;
275
276 available = svfs.f_bfree * svfs.f_bsize;
277
278 if (available >= f->metrics.keep_free)
279 available -= f->metrics.keep_free;
280 else
281 available = 0;
282
283 if (new_size - old_size > available)
284 return -E2BIG;
285 }
286 }
287
288 /* Note that the glibc fallocate() fallback is very
289 inefficient, hence we try to minimize the allocation area
290 as we can. */
291 r = posix_fallocate(f->fd, old_size, new_size - old_size);
292 if (r != 0)
293 return -r;
294
295 mmap_cache_close_fd_range(f->mmap, f->fd, old_size);
296
297 if (fstat(f->fd, &f->last_stat) < 0)
298 return -errno;
299
300 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
301
302 return 0;
303 }
304
305 static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, uint64_t size, void **ret) {
306 assert(f);
307 assert(ret);
308
309 /* Avoid SIGBUS on invalid accesses */
310 if (offset + size > (uint64_t) f->last_stat.st_size) {
311 /* Hmm, out of range? Let's refresh the fstat() data
312 * first, before we trust that check. */
313
314 if (fstat(f->fd, &f->last_stat) < 0 ||
315 offset + size > (uint64_t) f->last_stat.st_size)
316 return -EADDRNOTAVAIL;
317 }
318
319 return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
320 }
321
322 static uint64_t minimum_header_size(Object *o) {
323
324 static uint64_t table[] = {
325 [OBJECT_DATA] = sizeof(DataObject),
326 [OBJECT_FIELD] = sizeof(FieldObject),
327 [OBJECT_ENTRY] = sizeof(EntryObject),
328 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
329 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
330 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
331 [OBJECT_TAG] = sizeof(TagObject),
332 };
333
334 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
335 return sizeof(ObjectHeader);
336
337 return table[o->object.type];
338 }
339
340 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
341 int r;
342 void *t;
343 Object *o;
344 uint64_t s;
345 unsigned context;
346
347 assert(f);
348 assert(ret);
349
350 /* One context for each type, plus one catch-all for the rest */
351 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
352
353 r = journal_file_move_to(f, context, offset, sizeof(ObjectHeader), &t);
354 if (r < 0)
355 return r;
356
357 o = (Object*) t;
358 s = le64toh(o->object.size);
359
360 if (s < sizeof(ObjectHeader))
361 return -EBADMSG;
362
363 if (o->object.type <= OBJECT_UNUSED)
364 return -EBADMSG;
365
366 if (s < minimum_header_size(o))
367 return -EBADMSG;
368
369 if (type >= 0 && o->object.type != type)
370 return -EBADMSG;
371
372 if (s > sizeof(ObjectHeader)) {
373 r = journal_file_move_to(f, o->object.type, offset, s, &t);
374 if (r < 0)
375 return r;
376
377 o = (Object*) t;
378 }
379
380 *ret = o;
381 return 0;
382 }
383
384 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
385 uint64_t r;
386
387 assert(f);
388
389 r = le64toh(f->header->tail_entry_seqnum) + 1;
390
391 if (seqnum) {
392 /* If an external seqnum counter was passed, we update
393 * both the local and the external one, and set it to
394 * the maximum of both */
395
396 if (*seqnum + 1 > r)
397 r = *seqnum + 1;
398
399 *seqnum = r;
400 }
401
402 f->header->tail_entry_seqnum = htole64(r);
403
404 if (f->header->head_entry_seqnum == 0)
405 f->header->head_entry_seqnum = htole64(r);
406
407 return r;
408 }
409
410 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
411 int r;
412 uint64_t p;
413 Object *tail, *o;
414 void *t;
415
416 assert(f);
417 assert(type > 0 && type < _OBJECT_TYPE_MAX);
418 assert(size >= sizeof(ObjectHeader));
419 assert(offset);
420 assert(ret);
421
422 p = le64toh(f->header->tail_object_offset);
423 if (p == 0)
424 p = le64toh(f->header->header_size);
425 else {
426 r = journal_file_move_to_object(f, -1, p, &tail);
427 if (r < 0)
428 return r;
429
430 p += ALIGN64(le64toh(tail->object.size));
431 }
432
433 r = journal_file_allocate(f, p, size);
434 if (r < 0)
435 return r;
436
437 r = journal_file_move_to(f, type, p, size, &t);
438 if (r < 0)
439 return r;
440
441 o = (Object*) t;
442
443 zero(o->object);
444 o->object.type = type;
445 o->object.size = htole64(size);
446
447 f->header->tail_object_offset = htole64(p);
448 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
449
450 *ret = o;
451 *offset = p;
452
453 return 0;
454 }
455
456 static int journal_file_setup_data_hash_table(JournalFile *f) {
457 uint64_t s, p;
458 Object *o;
459 int r;
460
461 assert(f);
462
463 /* We estimate that we need 1 hash table entry per 768 of
464 journal file and we want to make sure we never get beyond
465 75% fill level. Calculate the hash table size for the
466 maximum file size based on these metrics. */
467
468 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
469 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
470 s = DEFAULT_DATA_HASH_TABLE_SIZE;
471
472 log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
473
474 r = journal_file_append_object(f,
475 OBJECT_DATA_HASH_TABLE,
476 offsetof(Object, hash_table.items) + s,
477 &o, &p);
478 if (r < 0)
479 return r;
480
481 memset(o->hash_table.items, 0, s);
482
483 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
484 f->header->data_hash_table_size = htole64(s);
485
486 return 0;
487 }
488
489 static int journal_file_setup_field_hash_table(JournalFile *f) {
490 uint64_t s, p;
491 Object *o;
492 int r;
493
494 assert(f);
495
496 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
497 r = journal_file_append_object(f,
498 OBJECT_FIELD_HASH_TABLE,
499 offsetof(Object, hash_table.items) + s,
500 &o, &p);
501 if (r < 0)
502 return r;
503
504 memset(o->hash_table.items, 0, s);
505
506 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
507 f->header->field_hash_table_size = htole64(s);
508
509 return 0;
510 }
511
512 static int journal_file_map_data_hash_table(JournalFile *f) {
513 uint64_t s, p;
514 void *t;
515 int r;
516
517 assert(f);
518
519 p = le64toh(f->header->data_hash_table_offset);
520 s = le64toh(f->header->data_hash_table_size);
521
522 r = journal_file_move_to(f,
523 OBJECT_DATA_HASH_TABLE,
524 p, s,
525 &t);
526 if (r < 0)
527 return r;
528
529 f->data_hash_table = t;
530 return 0;
531 }
532
533 static int journal_file_map_field_hash_table(JournalFile *f) {
534 uint64_t s, p;
535 void *t;
536 int r;
537
538 assert(f);
539
540 p = le64toh(f->header->field_hash_table_offset);
541 s = le64toh(f->header->field_hash_table_size);
542
543 r = journal_file_move_to(f,
544 OBJECT_FIELD_HASH_TABLE,
545 p, s,
546 &t);
547 if (r < 0)
548 return r;
549
550 f->field_hash_table = t;
551 return 0;
552 }
553
554 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
555 uint64_t p, h;
556 int r;
557
558 assert(f);
559 assert(o);
560 assert(offset > 0);
561 assert(o->object.type == OBJECT_DATA);
562
563 /* This might alter the window we are looking at */
564
565 o->data.next_hash_offset = o->data.next_field_offset = 0;
566 o->data.entry_offset = o->data.entry_array_offset = 0;
567 o->data.n_entries = 0;
568
569 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
570 p = le64toh(f->data_hash_table[h].tail_hash_offset);
571 if (p == 0) {
572 /* Only entry in the hash table is easy */
573 f->data_hash_table[h].head_hash_offset = htole64(offset);
574 } else {
575 /* Move back to the previous data object, to patch in
576 * pointer */
577
578 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
579 if (r < 0)
580 return r;
581
582 o->data.next_hash_offset = htole64(offset);
583 }
584
585 f->data_hash_table[h].tail_hash_offset = htole64(offset);
586
587 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
588 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
589
590 return 0;
591 }
592
593 int journal_file_find_data_object_with_hash(
594 JournalFile *f,
595 const void *data, uint64_t size, uint64_t hash,
596 Object **ret, uint64_t *offset) {
597
598 uint64_t p, osize, h;
599 int r;
600
601 assert(f);
602 assert(data || size == 0);
603
604 osize = offsetof(Object, data.payload) + size;
605
606 if (f->header->data_hash_table_size == 0)
607 return -EBADMSG;
608
609 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
610 p = le64toh(f->data_hash_table[h].head_hash_offset);
611
612 while (p > 0) {
613 Object *o;
614
615 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
616 if (r < 0)
617 return r;
618
619 if (le64toh(o->data.hash) != hash)
620 goto next;
621
622 if (o->object.flags & OBJECT_COMPRESSED) {
623 #ifdef HAVE_XZ
624 uint64_t l, rsize;
625
626 l = le64toh(o->object.size);
627 if (l <= offsetof(Object, data.payload))
628 return -EBADMSG;
629
630 l -= offsetof(Object, data.payload);
631
632 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
633 return -EBADMSG;
634
635 if (rsize == size &&
636 memcmp(f->compress_buffer, data, size) == 0) {
637
638 if (ret)
639 *ret = o;
640
641 if (offset)
642 *offset = p;
643
644 return 1;
645 }
646 #else
647 return -EPROTONOSUPPORT;
648 #endif
649
650 } else if (le64toh(o->object.size) == osize &&
651 memcmp(o->data.payload, data, size) == 0) {
652
653 if (ret)
654 *ret = o;
655
656 if (offset)
657 *offset = p;
658
659 return 1;
660 }
661
662 next:
663 p = le64toh(o->data.next_hash_offset);
664 }
665
666 return 0;
667 }
668
669 int journal_file_find_data_object(
670 JournalFile *f,
671 const void *data, uint64_t size,
672 Object **ret, uint64_t *offset) {
673
674 uint64_t hash;
675
676 assert(f);
677 assert(data || size == 0);
678
679 hash = hash64(data, size);
680
681 return journal_file_find_data_object_with_hash(f,
682 data, size, hash,
683 ret, offset);
684 }
685
686 static int journal_file_append_data(
687 JournalFile *f,
688 const void *data, uint64_t size,
689 Object **ret, uint64_t *offset) {
690
691 uint64_t hash, p;
692 uint64_t osize;
693 Object *o;
694 int r;
695 bool compressed = false;
696
697 assert(f);
698 assert(data || size == 0);
699
700 hash = hash64(data, size);
701
702 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
703 if (r < 0)
704 return r;
705 else if (r > 0) {
706
707 if (ret)
708 *ret = o;
709
710 if (offset)
711 *offset = p;
712
713 return 0;
714 }
715
716 osize = offsetof(Object, data.payload) + size;
717 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
718 if (r < 0)
719 return r;
720
721 o->data.hash = htole64(hash);
722
723 #ifdef HAVE_XZ
724 if (f->compress &&
725 size >= COMPRESSION_SIZE_THRESHOLD) {
726 uint64_t rsize;
727
728 compressed = compress_blob(data, size, o->data.payload, &rsize);
729
730 if (compressed) {
731 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
732 o->object.flags |= OBJECT_COMPRESSED;
733
734 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
735 }
736 }
737 #endif
738
739 if (!compressed && size > 0)
740 memcpy(o->data.payload, data, size);
741
742 r = journal_file_link_data(f, o, p, hash);
743 if (r < 0)
744 return r;
745
746 r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
747 if (r < 0)
748 return r;
749
750 /* The linking might have altered the window, so let's
751 * refresh our pointer */
752 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
753 if (r < 0)
754 return r;
755
756 if (ret)
757 *ret = o;
758
759 if (offset)
760 *offset = p;
761
762 return 0;
763 }
764
765 uint64_t journal_file_entry_n_items(Object *o) {
766 assert(o);
767 assert(o->object.type == OBJECT_ENTRY);
768
769 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
770 }
771
772 uint64_t journal_file_entry_array_n_items(Object *o) {
773 assert(o);
774 assert(o->object.type == OBJECT_ENTRY_ARRAY);
775
776 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
777 }
778
779 static int link_entry_into_array(JournalFile *f,
780 le64_t *first,
781 le64_t *idx,
782 uint64_t p) {
783 int r;
784 uint64_t n = 0, ap = 0, q, i, a, hidx;
785 Object *o;
786
787 assert(f);
788 assert(first);
789 assert(idx);
790 assert(p > 0);
791
792 a = le64toh(*first);
793 i = hidx = le64toh(*idx);
794 while (a > 0) {
795
796 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
797 if (r < 0)
798 return r;
799
800 n = journal_file_entry_array_n_items(o);
801 if (i < n) {
802 o->entry_array.items[i] = htole64(p);
803 *idx = htole64(hidx + 1);
804 return 0;
805 }
806
807 i -= n;
808 ap = a;
809 a = le64toh(o->entry_array.next_entry_array_offset);
810 }
811
812 if (hidx > n)
813 n = (hidx+1) * 2;
814 else
815 n = n * 2;
816
817 if (n < 4)
818 n = 4;
819
820 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
821 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
822 &o, &q);
823 if (r < 0)
824 return r;
825
826 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
827 if (r < 0)
828 return r;
829
830 o->entry_array.items[i] = htole64(p);
831
832 if (ap == 0)
833 *first = htole64(q);
834 else {
835 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
836 if (r < 0)
837 return r;
838
839 o->entry_array.next_entry_array_offset = htole64(q);
840 }
841
842 *idx = htole64(hidx + 1);
843
844 return 0;
845 }
846
847 static int link_entry_into_array_plus_one(JournalFile *f,
848 le64_t *extra,
849 le64_t *first,
850 le64_t *idx,
851 uint64_t p) {
852
853 int r;
854
855 assert(f);
856 assert(extra);
857 assert(first);
858 assert(idx);
859 assert(p > 0);
860
861 if (*idx == 0)
862 *extra = htole64(p);
863 else {
864 le64_t i;
865
866 i = htole64(le64toh(*idx) - 1);
867 r = link_entry_into_array(f, first, &i, p);
868 if (r < 0)
869 return r;
870 }
871
872 *idx = htole64(le64toh(*idx) + 1);
873 return 0;
874 }
875
876 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
877 uint64_t p;
878 int r;
879 assert(f);
880 assert(o);
881 assert(offset > 0);
882
883 p = le64toh(o->entry.items[i].object_offset);
884 if (p == 0)
885 return -EINVAL;
886
887 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
888 if (r < 0)
889 return r;
890
891 return link_entry_into_array_plus_one(f,
892 &o->data.entry_offset,
893 &o->data.entry_array_offset,
894 &o->data.n_entries,
895 offset);
896 }
897
898 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
899 uint64_t n, i;
900 int r;
901
902 assert(f);
903 assert(o);
904 assert(offset > 0);
905 assert(o->object.type == OBJECT_ENTRY);
906
907 __sync_synchronize();
908
909 /* Link up the entry itself */
910 r = link_entry_into_array(f,
911 &f->header->entry_array_offset,
912 &f->header->n_entries,
913 offset);
914 if (r < 0)
915 return r;
916
917 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
918
919 if (f->header->head_entry_realtime == 0)
920 f->header->head_entry_realtime = o->entry.realtime;
921
922 f->header->tail_entry_realtime = o->entry.realtime;
923 f->header->tail_entry_monotonic = o->entry.monotonic;
924
925 f->tail_entry_monotonic_valid = true;
926
927 /* Link up the items */
928 n = journal_file_entry_n_items(o);
929 for (i = 0; i < n; i++) {
930 r = journal_file_link_entry_item(f, o, offset, i);
931 if (r < 0)
932 return r;
933 }
934
935 return 0;
936 }
937
938 static int journal_file_append_entry_internal(
939 JournalFile *f,
940 const dual_timestamp *ts,
941 uint64_t xor_hash,
942 const EntryItem items[], unsigned n_items,
943 uint64_t *seqnum,
944 Object **ret, uint64_t *offset) {
945 uint64_t np;
946 uint64_t osize;
947 Object *o;
948 int r;
949
950 assert(f);
951 assert(items || n_items == 0);
952 assert(ts);
953
954 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
955
956 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
957 if (r < 0)
958 return r;
959
960 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
961 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
962 o->entry.realtime = htole64(ts->realtime);
963 o->entry.monotonic = htole64(ts->monotonic);
964 o->entry.xor_hash = htole64(xor_hash);
965 o->entry.boot_id = f->header->boot_id;
966
967 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
968 if (r < 0)
969 return r;
970
971 r = journal_file_link_entry(f, o, np);
972 if (r < 0)
973 return r;
974
975 if (ret)
976 *ret = o;
977
978 if (offset)
979 *offset = np;
980
981 return 0;
982 }
983
984 void journal_file_post_change(JournalFile *f) {
985 assert(f);
986
987 /* inotify() does not receive IN_MODIFY events from file
988 * accesses done via mmap(). After each access we hence
989 * trigger IN_MODIFY by truncating the journal file to its
990 * current size which triggers IN_MODIFY. */
991
992 __sync_synchronize();
993
994 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
995 log_error("Failed to to truncate file to its own size: %m");
996 }
997
998 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
999 unsigned i;
1000 EntryItem *items;
1001 int r;
1002 uint64_t xor_hash = 0;
1003 struct dual_timestamp _ts;
1004
1005 assert(f);
1006 assert(iovec || n_iovec == 0);
1007
1008 if (!f->writable)
1009 return -EPERM;
1010
1011 if (!ts) {
1012 dual_timestamp_get(&_ts);
1013 ts = &_ts;
1014 }
1015
1016 if (f->tail_entry_monotonic_valid &&
1017 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1018 return -EINVAL;
1019
1020 r = journal_file_maybe_append_tag(f, ts->realtime);
1021 if (r < 0)
1022 return r;
1023
1024 /* alloca() can't take 0, hence let's allocate at least one */
1025 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1026
1027 for (i = 0; i < n_iovec; i++) {
1028 uint64_t p;
1029 Object *o;
1030
1031 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1032 if (r < 0)
1033 return r;
1034
1035 xor_hash ^= le64toh(o->data.hash);
1036 items[i].object_offset = htole64(p);
1037 items[i].hash = o->data.hash;
1038 }
1039
1040 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1041
1042 journal_file_post_change(f);
1043
1044 return r;
1045 }
1046
1047 static int generic_array_get(JournalFile *f,
1048 uint64_t first,
1049 uint64_t i,
1050 Object **ret, uint64_t *offset) {
1051
1052 Object *o;
1053 uint64_t p = 0, a;
1054 int r;
1055
1056 assert(f);
1057
1058 a = first;
1059 while (a > 0) {
1060 uint64_t n;
1061
1062 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1063 if (r < 0)
1064 return r;
1065
1066 n = journal_file_entry_array_n_items(o);
1067 if (i < n) {
1068 p = le64toh(o->entry_array.items[i]);
1069 break;
1070 }
1071
1072 i -= n;
1073 a = le64toh(o->entry_array.next_entry_array_offset);
1074 }
1075
1076 if (a <= 0 || p <= 0)
1077 return 0;
1078
1079 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1080 if (r < 0)
1081 return r;
1082
1083 if (ret)
1084 *ret = o;
1085
1086 if (offset)
1087 *offset = p;
1088
1089 return 1;
1090 }
1091
1092 static int generic_array_get_plus_one(JournalFile *f,
1093 uint64_t extra,
1094 uint64_t first,
1095 uint64_t i,
1096 Object **ret, uint64_t *offset) {
1097
1098 Object *o;
1099
1100 assert(f);
1101
1102 if (i == 0) {
1103 int r;
1104
1105 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1106 if (r < 0)
1107 return r;
1108
1109 if (ret)
1110 *ret = o;
1111
1112 if (offset)
1113 *offset = extra;
1114
1115 return 1;
1116 }
1117
1118 return generic_array_get(f, first, i-1, ret, offset);
1119 }
1120
1121 enum {
1122 TEST_FOUND,
1123 TEST_LEFT,
1124 TEST_RIGHT
1125 };
1126
1127 static int generic_array_bisect(JournalFile *f,
1128 uint64_t first,
1129 uint64_t n,
1130 uint64_t needle,
1131 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1132 direction_t direction,
1133 Object **ret,
1134 uint64_t *offset,
1135 uint64_t *idx) {
1136
1137 uint64_t a, p, t = 0, i = 0, last_p = 0;
1138 bool subtract_one = false;
1139 Object *o, *array = NULL;
1140 int r;
1141
1142 assert(f);
1143 assert(test_object);
1144
1145 a = first;
1146 while (a > 0) {
1147 uint64_t left, right, k, lp;
1148
1149 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1150 if (r < 0)
1151 return r;
1152
1153 k = journal_file_entry_array_n_items(array);
1154 right = MIN(k, n);
1155 if (right <= 0)
1156 return 0;
1157
1158 i = right - 1;
1159 lp = p = le64toh(array->entry_array.items[i]);
1160 if (p <= 0)
1161 return -EBADMSG;
1162
1163 r = test_object(f, p, needle);
1164 if (r < 0)
1165 return r;
1166
1167 if (r == TEST_FOUND)
1168 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1169
1170 if (r == TEST_RIGHT) {
1171 left = 0;
1172 right -= 1;
1173 for (;;) {
1174 if (left == right) {
1175 if (direction == DIRECTION_UP)
1176 subtract_one = true;
1177
1178 i = left;
1179 goto found;
1180 }
1181
1182 assert(left < right);
1183
1184 i = (left + right) / 2;
1185 p = le64toh(array->entry_array.items[i]);
1186 if (p <= 0)
1187 return -EBADMSG;
1188
1189 r = test_object(f, p, needle);
1190 if (r < 0)
1191 return r;
1192
1193 if (r == TEST_FOUND)
1194 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1195
1196 if (r == TEST_RIGHT)
1197 right = i;
1198 else
1199 left = i + 1;
1200 }
1201 }
1202
1203 if (k > n) {
1204 if (direction == DIRECTION_UP) {
1205 i = n;
1206 subtract_one = true;
1207 goto found;
1208 }
1209
1210 return 0;
1211 }
1212
1213 last_p = lp;
1214
1215 n -= k;
1216 t += k;
1217 a = le64toh(array->entry_array.next_entry_array_offset);
1218 }
1219
1220 return 0;
1221
1222 found:
1223 if (subtract_one && t == 0 && i == 0)
1224 return 0;
1225
1226 if (subtract_one && i == 0)
1227 p = last_p;
1228 else if (subtract_one)
1229 p = le64toh(array->entry_array.items[i-1]);
1230 else
1231 p = le64toh(array->entry_array.items[i]);
1232
1233 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1234 if (r < 0)
1235 return r;
1236
1237 if (ret)
1238 *ret = o;
1239
1240 if (offset)
1241 *offset = p;
1242
1243 if (idx)
1244 *idx = t + i + (subtract_one ? -1 : 0);
1245
1246 return 1;
1247 }
1248
1249 static int generic_array_bisect_plus_one(JournalFile *f,
1250 uint64_t extra,
1251 uint64_t first,
1252 uint64_t n,
1253 uint64_t needle,
1254 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1255 direction_t direction,
1256 Object **ret,
1257 uint64_t *offset,
1258 uint64_t *idx) {
1259
1260 int r;
1261 bool step_back = false;
1262 Object *o;
1263
1264 assert(f);
1265 assert(test_object);
1266
1267 if (n <= 0)
1268 return 0;
1269
1270 /* This bisects the array in object 'first', but first checks
1271 * an extra */
1272 r = test_object(f, extra, needle);
1273 if (r < 0)
1274 return r;
1275
1276 if (r == TEST_FOUND)
1277 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1278
1279 /* if we are looking with DIRECTION_UP then we need to first
1280 see if in the actual array there is a matching entry, and
1281 return the last one of that. But if there isn't any we need
1282 to return this one. Hence remember this, and return it
1283 below. */
1284 if (r == TEST_LEFT)
1285 step_back = direction == DIRECTION_UP;
1286
1287 if (r == TEST_RIGHT) {
1288 if (direction == DIRECTION_DOWN)
1289 goto found;
1290 else
1291 return 0;
1292 }
1293
1294 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1295
1296 if (r == 0 && step_back)
1297 goto found;
1298
1299 if (r > 0 && idx)
1300 (*idx) ++;
1301
1302 return r;
1303
1304 found:
1305 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1306 if (r < 0)
1307 return r;
1308
1309 if (ret)
1310 *ret = o;
1311
1312 if (offset)
1313 *offset = extra;
1314
1315 if (idx)
1316 *idx = 0;
1317
1318 return 1;
1319 }
1320
1321 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1322 assert(f);
1323 assert(p > 0);
1324
1325 if (p == needle)
1326 return TEST_FOUND;
1327 else if (p < needle)
1328 return TEST_LEFT;
1329 else
1330 return TEST_RIGHT;
1331 }
1332
1333 int journal_file_move_to_entry_by_offset(
1334 JournalFile *f,
1335 uint64_t p,
1336 direction_t direction,
1337 Object **ret,
1338 uint64_t *offset) {
1339
1340 return generic_array_bisect(f,
1341 le64toh(f->header->entry_array_offset),
1342 le64toh(f->header->n_entries),
1343 p,
1344 test_object_offset,
1345 direction,
1346 ret, offset, NULL);
1347 }
1348
1349
1350 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1351 Object *o;
1352 int r;
1353
1354 assert(f);
1355 assert(p > 0);
1356
1357 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1358 if (r < 0)
1359 return r;
1360
1361 if (le64toh(o->entry.seqnum) == needle)
1362 return TEST_FOUND;
1363 else if (le64toh(o->entry.seqnum) < needle)
1364 return TEST_LEFT;
1365 else
1366 return TEST_RIGHT;
1367 }
1368
1369 int journal_file_move_to_entry_by_seqnum(
1370 JournalFile *f,
1371 uint64_t seqnum,
1372 direction_t direction,
1373 Object **ret,
1374 uint64_t *offset) {
1375
1376 return generic_array_bisect(f,
1377 le64toh(f->header->entry_array_offset),
1378 le64toh(f->header->n_entries),
1379 seqnum,
1380 test_object_seqnum,
1381 direction,
1382 ret, offset, NULL);
1383 }
1384
1385 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1386 Object *o;
1387 int r;
1388
1389 assert(f);
1390 assert(p > 0);
1391
1392 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1393 if (r < 0)
1394 return r;
1395
1396 if (le64toh(o->entry.realtime) == needle)
1397 return TEST_FOUND;
1398 else if (le64toh(o->entry.realtime) < needle)
1399 return TEST_LEFT;
1400 else
1401 return TEST_RIGHT;
1402 }
1403
1404 int journal_file_move_to_entry_by_realtime(
1405 JournalFile *f,
1406 uint64_t realtime,
1407 direction_t direction,
1408 Object **ret,
1409 uint64_t *offset) {
1410
1411 return generic_array_bisect(f,
1412 le64toh(f->header->entry_array_offset),
1413 le64toh(f->header->n_entries),
1414 realtime,
1415 test_object_realtime,
1416 direction,
1417 ret, offset, NULL);
1418 }
1419
1420 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1421 Object *o;
1422 int r;
1423
1424 assert(f);
1425 assert(p > 0);
1426
1427 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1428 if (r < 0)
1429 return r;
1430
1431 if (le64toh(o->entry.monotonic) == needle)
1432 return TEST_FOUND;
1433 else if (le64toh(o->entry.monotonic) < needle)
1434 return TEST_LEFT;
1435 else
1436 return TEST_RIGHT;
1437 }
1438
1439 int journal_file_move_to_entry_by_monotonic(
1440 JournalFile *f,
1441 sd_id128_t boot_id,
1442 uint64_t monotonic,
1443 direction_t direction,
1444 Object **ret,
1445 uint64_t *offset) {
1446
1447 char t[9+32+1] = "_BOOT_ID=";
1448 Object *o;
1449 int r;
1450
1451 assert(f);
1452
1453 sd_id128_to_string(boot_id, t + 9);
1454 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1455 if (r < 0)
1456 return r;
1457 if (r == 0)
1458 return -ENOENT;
1459
1460 return generic_array_bisect_plus_one(f,
1461 le64toh(o->data.entry_offset),
1462 le64toh(o->data.entry_array_offset),
1463 le64toh(o->data.n_entries),
1464 monotonic,
1465 test_object_monotonic,
1466 direction,
1467 ret, offset, NULL);
1468 }
1469
1470 int journal_file_next_entry(
1471 JournalFile *f,
1472 Object *o, uint64_t p,
1473 direction_t direction,
1474 Object **ret, uint64_t *offset) {
1475
1476 uint64_t i, n;
1477 int r;
1478
1479 assert(f);
1480 assert(p > 0 || !o);
1481
1482 n = le64toh(f->header->n_entries);
1483 if (n <= 0)
1484 return 0;
1485
1486 if (!o)
1487 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1488 else {
1489 if (o->object.type != OBJECT_ENTRY)
1490 return -EINVAL;
1491
1492 r = generic_array_bisect(f,
1493 le64toh(f->header->entry_array_offset),
1494 le64toh(f->header->n_entries),
1495 p,
1496 test_object_offset,
1497 DIRECTION_DOWN,
1498 NULL, NULL,
1499 &i);
1500 if (r <= 0)
1501 return r;
1502
1503 if (direction == DIRECTION_DOWN) {
1504 if (i >= n - 1)
1505 return 0;
1506
1507 i++;
1508 } else {
1509 if (i <= 0)
1510 return 0;
1511
1512 i--;
1513 }
1514 }
1515
1516 /* And jump to it */
1517 return generic_array_get(f,
1518 le64toh(f->header->entry_array_offset),
1519 i,
1520 ret, offset);
1521 }
1522
1523 int journal_file_skip_entry(
1524 JournalFile *f,
1525 Object *o, uint64_t p,
1526 int64_t skip,
1527 Object **ret, uint64_t *offset) {
1528
1529 uint64_t i, n;
1530 int r;
1531
1532 assert(f);
1533 assert(o);
1534 assert(p > 0);
1535
1536 if (o->object.type != OBJECT_ENTRY)
1537 return -EINVAL;
1538
1539 r = generic_array_bisect(f,
1540 le64toh(f->header->entry_array_offset),
1541 le64toh(f->header->n_entries),
1542 p,
1543 test_object_offset,
1544 DIRECTION_DOWN,
1545 NULL, NULL,
1546 &i);
1547 if (r <= 0)
1548 return r;
1549
1550 /* Calculate new index */
1551 if (skip < 0) {
1552 if ((uint64_t) -skip >= i)
1553 i = 0;
1554 else
1555 i = i - (uint64_t) -skip;
1556 } else
1557 i += (uint64_t) skip;
1558
1559 n = le64toh(f->header->n_entries);
1560 if (n <= 0)
1561 return -EBADMSG;
1562
1563 if (i >= n)
1564 i = n-1;
1565
1566 return generic_array_get(f,
1567 le64toh(f->header->entry_array_offset),
1568 i,
1569 ret, offset);
1570 }
1571
1572 int journal_file_next_entry_for_data(
1573 JournalFile *f,
1574 Object *o, uint64_t p,
1575 uint64_t data_offset,
1576 direction_t direction,
1577 Object **ret, uint64_t *offset) {
1578
1579 uint64_t n, i;
1580 int r;
1581 Object *d;
1582
1583 assert(f);
1584 assert(p > 0 || !o);
1585
1586 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1587 if (r < 0)
1588 return r;
1589
1590 n = le64toh(d->data.n_entries);
1591 if (n <= 0)
1592 return n;
1593
1594 if (!o)
1595 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1596 else {
1597 if (o->object.type != OBJECT_ENTRY)
1598 return -EINVAL;
1599
1600 r = generic_array_bisect_plus_one(f,
1601 le64toh(d->data.entry_offset),
1602 le64toh(d->data.entry_array_offset),
1603 le64toh(d->data.n_entries),
1604 p,
1605 test_object_offset,
1606 DIRECTION_DOWN,
1607 NULL, NULL,
1608 &i);
1609
1610 if (r <= 0)
1611 return r;
1612
1613 if (direction == DIRECTION_DOWN) {
1614 if (i >= n - 1)
1615 return 0;
1616
1617 i++;
1618 } else {
1619 if (i <= 0)
1620 return 0;
1621
1622 i--;
1623 }
1624
1625 }
1626
1627 return generic_array_get_plus_one(f,
1628 le64toh(d->data.entry_offset),
1629 le64toh(d->data.entry_array_offset),
1630 i,
1631 ret, offset);
1632 }
1633
1634 int journal_file_move_to_entry_by_offset_for_data(
1635 JournalFile *f,
1636 uint64_t data_offset,
1637 uint64_t p,
1638 direction_t direction,
1639 Object **ret, uint64_t *offset) {
1640
1641 int r;
1642 Object *d;
1643
1644 assert(f);
1645
1646 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1647 if (r < 0)
1648 return r;
1649
1650 return generic_array_bisect_plus_one(f,
1651 le64toh(d->data.entry_offset),
1652 le64toh(d->data.entry_array_offset),
1653 le64toh(d->data.n_entries),
1654 p,
1655 test_object_offset,
1656 direction,
1657 ret, offset, NULL);
1658 }
1659
1660 int journal_file_move_to_entry_by_monotonic_for_data(
1661 JournalFile *f,
1662 uint64_t data_offset,
1663 sd_id128_t boot_id,
1664 uint64_t monotonic,
1665 direction_t direction,
1666 Object **ret, uint64_t *offset) {
1667
1668 char t[9+32+1] = "_BOOT_ID=";
1669 Object *o, *d;
1670 int r;
1671 uint64_t b, z;
1672
1673 assert(f);
1674
1675 /* First, seek by time */
1676 sd_id128_to_string(boot_id, t + 9);
1677 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1678 if (r < 0)
1679 return r;
1680 if (r == 0)
1681 return -ENOENT;
1682
1683 r = generic_array_bisect_plus_one(f,
1684 le64toh(o->data.entry_offset),
1685 le64toh(o->data.entry_array_offset),
1686 le64toh(o->data.n_entries),
1687 monotonic,
1688 test_object_monotonic,
1689 direction,
1690 NULL, &z, NULL);
1691 if (r <= 0)
1692 return r;
1693
1694 /* And now, continue seeking until we find an entry that
1695 * exists in both bisection arrays */
1696
1697 for (;;) {
1698 Object *qo;
1699 uint64_t p, q;
1700
1701 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1702 if (r < 0)
1703 return r;
1704
1705 r = generic_array_bisect_plus_one(f,
1706 le64toh(d->data.entry_offset),
1707 le64toh(d->data.entry_array_offset),
1708 le64toh(d->data.n_entries),
1709 z,
1710 test_object_offset,
1711 direction,
1712 NULL, &p, NULL);
1713 if (r <= 0)
1714 return r;
1715
1716 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1717 if (r < 0)
1718 return r;
1719
1720 r = generic_array_bisect_plus_one(f,
1721 le64toh(o->data.entry_offset),
1722 le64toh(o->data.entry_array_offset),
1723 le64toh(o->data.n_entries),
1724 p,
1725 test_object_offset,
1726 direction,
1727 &qo, &q, NULL);
1728
1729 if (r <= 0)
1730 return r;
1731
1732 if (p == q) {
1733 if (ret)
1734 *ret = qo;
1735 if (offset)
1736 *offset = q;
1737
1738 return 1;
1739 }
1740
1741 z = q;
1742 }
1743
1744 return 0;
1745 }
1746
1747 int journal_file_move_to_entry_by_seqnum_for_data(
1748 JournalFile *f,
1749 uint64_t data_offset,
1750 uint64_t seqnum,
1751 direction_t direction,
1752 Object **ret, uint64_t *offset) {
1753
1754 Object *d;
1755 int r;
1756
1757 assert(f);
1758
1759 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1760 if (r < 0)
1761 return r;
1762
1763 return generic_array_bisect_plus_one(f,
1764 le64toh(d->data.entry_offset),
1765 le64toh(d->data.entry_array_offset),
1766 le64toh(d->data.n_entries),
1767 seqnum,
1768 test_object_seqnum,
1769 direction,
1770 ret, offset, NULL);
1771 }
1772
1773 int journal_file_move_to_entry_by_realtime_for_data(
1774 JournalFile *f,
1775 uint64_t data_offset,
1776 uint64_t realtime,
1777 direction_t direction,
1778 Object **ret, uint64_t *offset) {
1779
1780 Object *d;
1781 int r;
1782
1783 assert(f);
1784
1785 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1786 if (r < 0)
1787 return r;
1788
1789 return generic_array_bisect_plus_one(f,
1790 le64toh(d->data.entry_offset),
1791 le64toh(d->data.entry_array_offset),
1792 le64toh(d->data.n_entries),
1793 realtime,
1794 test_object_realtime,
1795 direction,
1796 ret, offset, NULL);
1797 }
1798
1799 void journal_file_dump(JournalFile *f) {
1800 Object *o;
1801 int r;
1802 uint64_t p;
1803
1804 assert(f);
1805
1806 journal_file_print_header(f);
1807
1808 p = le64toh(f->header->header_size);
1809 while (p != 0) {
1810 r = journal_file_move_to_object(f, -1, p, &o);
1811 if (r < 0)
1812 goto fail;
1813
1814 switch (o->object.type) {
1815
1816 case OBJECT_UNUSED:
1817 printf("Type: OBJECT_UNUSED\n");
1818 break;
1819
1820 case OBJECT_DATA:
1821 printf("Type: OBJECT_DATA\n");
1822 break;
1823
1824 case OBJECT_ENTRY:
1825 printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
1826 (unsigned long long) le64toh(o->entry.seqnum),
1827 (unsigned long long) le64toh(o->entry.monotonic),
1828 (unsigned long long) le64toh(o->entry.realtime));
1829 break;
1830
1831 case OBJECT_FIELD_HASH_TABLE:
1832 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1833 break;
1834
1835 case OBJECT_DATA_HASH_TABLE:
1836 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1837 break;
1838
1839 case OBJECT_ENTRY_ARRAY:
1840 printf("Type: OBJECT_ENTRY_ARRAY\n");
1841 break;
1842
1843 case OBJECT_TAG:
1844 printf("Type: OBJECT_TAG %llu\n",
1845 (unsigned long long) le64toh(o->tag.seqnum));
1846 break;
1847 }
1848
1849 if (o->object.flags & OBJECT_COMPRESSED)
1850 printf("Flags: COMPRESSED\n");
1851
1852 if (p == le64toh(f->header->tail_object_offset))
1853 p = 0;
1854 else
1855 p = p + ALIGN64(le64toh(o->object.size));
1856 }
1857
1858 return;
1859 fail:
1860 log_error("File corrupt");
1861 }
1862
1863 void journal_file_print_header(JournalFile *f) {
1864 char a[33], b[33], c[33];
1865 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
1866
1867 assert(f);
1868
1869 printf("File Path: %s\n"
1870 "File ID: %s\n"
1871 "Machine ID: %s\n"
1872 "Boot ID: %s\n"
1873 "Sequential Number ID: %s\n"
1874 "State: %s\n"
1875 "Compatible Flags:%s%s\n"
1876 "Incompatible Flags:%s%s\n"
1877 "Header size: %llu\n"
1878 "Arena size: %llu\n"
1879 "Data Hash Table Size: %llu\n"
1880 "Field Hash Table Size: %llu\n"
1881 "Objects: %llu\n"
1882 "Entry Objects: %llu\n"
1883 "Rotate Suggested: %s\n"
1884 "Head Sequential Number: %llu\n"
1885 "Tail Sequential Number: %llu\n"
1886 "Head Realtime Timestamp: %s\n"
1887 "Tail Realtime Timestamp: %s\n",
1888 f->path,
1889 sd_id128_to_string(f->header->file_id, a),
1890 sd_id128_to_string(f->header->machine_id, b),
1891 sd_id128_to_string(f->header->boot_id, c),
1892 sd_id128_to_string(f->header->seqnum_id, c),
1893 f->header->state == STATE_OFFLINE ? "offline" :
1894 f->header->state == STATE_ONLINE ? "online" :
1895 f->header->state == STATE_ARCHIVED ? "archived" : "unknown",
1896 (f->header->compatible_flags & HEADER_COMPATIBLE_AUTHENTICATED) ? " AUTHENTICATED" : "",
1897 (f->header->compatible_flags & ~HEADER_COMPATIBLE_AUTHENTICATED) ? " ???" : "",
1898 (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
1899 (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1900 (unsigned long long) le64toh(f->header->header_size),
1901 (unsigned long long) le64toh(f->header->arena_size),
1902 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1903 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
1904 (unsigned long long) le64toh(f->header->n_objects),
1905 (unsigned long long) le64toh(f->header->n_entries),
1906 yes_no(journal_file_rotate_suggested(f)),
1907 (unsigned long long) le64toh(f->header->head_entry_seqnum),
1908 (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1909 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
1910 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)));
1911
1912 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1913 printf("Data Objects: %llu\n"
1914 "Data Hash Table Fill: %.1f%%\n",
1915 (unsigned long long) le64toh(f->header->n_data),
1916 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
1917
1918 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1919 printf("Field Objects: %llu\n"
1920 "Field Hash Table Fill: %.1f%%\n",
1921 (unsigned long long) le64toh(f->header->n_fields),
1922 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
1923 }
1924
1925 int journal_file_open(
1926 const char *fname,
1927 int flags,
1928 mode_t mode,
1929 bool compress,
1930 bool authenticate,
1931 JournalMetrics *metrics,
1932 MMapCache *mmap_cache,
1933 JournalFile *template,
1934 JournalFile **ret) {
1935
1936 JournalFile *f;
1937 int r;
1938 bool newly_created = false;
1939
1940 assert(fname);
1941
1942 if ((flags & O_ACCMODE) != O_RDONLY &&
1943 (flags & O_ACCMODE) != O_RDWR)
1944 return -EINVAL;
1945
1946 if (!endswith(fname, ".journal"))
1947 return -EINVAL;
1948
1949 f = new0(JournalFile, 1);
1950 if (!f)
1951 return -ENOMEM;
1952
1953 f->fd = -1;
1954 f->mode = mode;
1955
1956 f->flags = flags;
1957 f->prot = prot_from_flags(flags);
1958 f->writable = (flags & O_ACCMODE) != O_RDONLY;
1959 f->compress = compress;
1960 f->authenticate = authenticate;
1961
1962 if (mmap_cache)
1963 f->mmap = mmap_cache_ref(mmap_cache);
1964 else {
1965 /* One context for each type, plus the zeroth catchall
1966 * context. One fd for the file plus one for each type
1967 * (which we need during verification */
1968 f->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, 1 + _OBJECT_TYPE_MAX);
1969 if (!f->mmap) {
1970 r = -ENOMEM;
1971 goto fail;
1972 }
1973 }
1974
1975 f->path = strdup(fname);
1976 if (!f->path) {
1977 r = -ENOMEM;
1978 goto fail;
1979 }
1980
1981 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
1982 if (f->fd < 0) {
1983 r = -errno;
1984 goto fail;
1985 }
1986
1987 if (fstat(f->fd, &f->last_stat) < 0) {
1988 r = -errno;
1989 goto fail;
1990 }
1991
1992 if (f->last_stat.st_size == 0 && f->writable) {
1993 newly_created = true;
1994
1995 /* Try to load the FSPRG state, and if we can't, then
1996 * just don't do authentication */
1997 r = journal_file_load_fsprg(f);
1998 if (r < 0)
1999 f->authenticate = false;
2000
2001 r = journal_file_init_header(f, template);
2002 if (r < 0)
2003 goto fail;
2004
2005 if (fstat(f->fd, &f->last_stat) < 0) {
2006 r = -errno;
2007 goto fail;
2008 }
2009 }
2010
2011 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2012 r = -EIO;
2013 goto fail;
2014 }
2015
2016 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2017 if (f->header == MAP_FAILED) {
2018 f->header = NULL;
2019 r = -errno;
2020 goto fail;
2021 }
2022
2023 if (!newly_created) {
2024 r = journal_file_verify_header(f);
2025 if (r < 0)
2026 goto fail;
2027 }
2028
2029 if (!newly_created && f->writable) {
2030 r = journal_file_load_fsprg(f);
2031 if (r < 0)
2032 goto fail;
2033 }
2034
2035 if (f->writable) {
2036 if (metrics) {
2037 journal_default_metrics(metrics, f->fd);
2038 f->metrics = *metrics;
2039 } else if (template)
2040 f->metrics = template->metrics;
2041
2042 r = journal_file_refresh_header(f);
2043 if (r < 0)
2044 goto fail;
2045
2046 r = journal_file_setup_hmac(f);
2047 if (r < 0)
2048 goto fail;
2049 }
2050
2051 if (newly_created) {
2052 r = journal_file_setup_field_hash_table(f);
2053 if (r < 0)
2054 goto fail;
2055
2056 r = journal_file_setup_data_hash_table(f);
2057 if (r < 0)
2058 goto fail;
2059
2060 r = journal_file_append_first_tag(f);
2061 if (r < 0)
2062 goto fail;
2063 }
2064
2065 r = journal_file_map_field_hash_table(f);
2066 if (r < 0)
2067 goto fail;
2068
2069 r = journal_file_map_data_hash_table(f);
2070 if (r < 0)
2071 goto fail;
2072
2073 if (ret)
2074 *ret = f;
2075
2076 return 0;
2077
2078 fail:
2079 journal_file_close(f);
2080
2081 return r;
2082 }
2083
2084 int journal_file_rotate(JournalFile **f, bool compress, bool authenticate) {
2085 char *p;
2086 size_t l;
2087 JournalFile *old_file, *new_file = NULL;
2088 int r;
2089
2090 assert(f);
2091 assert(*f);
2092
2093 old_file = *f;
2094
2095 if (!old_file->writable)
2096 return -EINVAL;
2097
2098 if (!endswith(old_file->path, ".journal"))
2099 return -EINVAL;
2100
2101 l = strlen(old_file->path);
2102
2103 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2104 if (!p)
2105 return -ENOMEM;
2106
2107 memcpy(p, old_file->path, l - 8);
2108 p[l-8] = '@';
2109 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2110 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2111 "-%016llx-%016llx.journal",
2112 (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
2113 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2114
2115 r = rename(old_file->path, p);
2116 free(p);
2117
2118 if (r < 0)
2119 return -errno;
2120
2121 old_file->header->state = STATE_ARCHIVED;
2122
2123 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, authenticate, NULL, old_file->mmap, old_file, &new_file);
2124 journal_file_close(old_file);
2125
2126 *f = new_file;
2127 return r;
2128 }
2129
2130 int journal_file_open_reliably(
2131 const char *fname,
2132 int flags,
2133 mode_t mode,
2134 bool compress,
2135 bool authenticate,
2136 JournalMetrics *metrics,
2137 MMapCache *mmap,
2138 JournalFile *template,
2139 JournalFile **ret) {
2140
2141 int r;
2142 size_t l;
2143 char *p;
2144
2145 r = journal_file_open(fname, flags, mode, compress, authenticate, metrics, mmap, template, ret);
2146 if (r != -EBADMSG && /* corrupted */
2147 r != -ENODATA && /* truncated */
2148 r != -EHOSTDOWN && /* other machine */
2149 r != -EPROTONOSUPPORT && /* incompatible feature */
2150 r != -EBUSY && /* unclean shutdown */
2151 r != -ESHUTDOWN /* already archived */)
2152 return r;
2153
2154 if ((flags & O_ACCMODE) == O_RDONLY)
2155 return r;
2156
2157 if (!(flags & O_CREAT))
2158 return r;
2159
2160 if (!endswith(fname, ".journal"))
2161 return r;
2162
2163 /* The file is corrupted. Rotate it away and try it again (but only once) */
2164
2165 l = strlen(fname);
2166 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2167 (int) (l-8), fname,
2168 (unsigned long long) now(CLOCK_REALTIME),
2169 random_ull()) < 0)
2170 return -ENOMEM;
2171
2172 r = rename(fname, p);
2173 free(p);
2174 if (r < 0)
2175 return -errno;
2176
2177 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2178
2179 return journal_file_open(fname, flags, mode, compress, authenticate, metrics, mmap, template, ret);
2180 }
2181
2182
2183 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2184 uint64_t i, n;
2185 uint64_t q, xor_hash = 0;
2186 int r;
2187 EntryItem *items;
2188 dual_timestamp ts;
2189
2190 assert(from);
2191 assert(to);
2192 assert(o);
2193 assert(p);
2194
2195 if (!to->writable)
2196 return -EPERM;
2197
2198 ts.monotonic = le64toh(o->entry.monotonic);
2199 ts.realtime = le64toh(o->entry.realtime);
2200
2201 if (to->tail_entry_monotonic_valid &&
2202 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2203 return -EINVAL;
2204
2205 n = journal_file_entry_n_items(o);
2206 items = alloca(sizeof(EntryItem) * n);
2207
2208 for (i = 0; i < n; i++) {
2209 uint64_t l, h;
2210 le64_t le_hash;
2211 size_t t;
2212 void *data;
2213 Object *u;
2214
2215 q = le64toh(o->entry.items[i].object_offset);
2216 le_hash = o->entry.items[i].hash;
2217
2218 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2219 if (r < 0)
2220 return r;
2221
2222 if (le_hash != o->data.hash)
2223 return -EBADMSG;
2224
2225 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2226 t = (size_t) l;
2227
2228 /* We hit the limit on 32bit machines */
2229 if ((uint64_t) t != l)
2230 return -E2BIG;
2231
2232 if (o->object.flags & OBJECT_COMPRESSED) {
2233 #ifdef HAVE_XZ
2234 uint64_t rsize;
2235
2236 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2237 return -EBADMSG;
2238
2239 data = from->compress_buffer;
2240 l = rsize;
2241 #else
2242 return -EPROTONOSUPPORT;
2243 #endif
2244 } else
2245 data = o->data.payload;
2246
2247 r = journal_file_append_data(to, data, l, &u, &h);
2248 if (r < 0)
2249 return r;
2250
2251 xor_hash ^= le64toh(u->data.hash);
2252 items[i].object_offset = htole64(h);
2253 items[i].hash = u->data.hash;
2254
2255 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2256 if (r < 0)
2257 return r;
2258 }
2259
2260 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2261 }
2262
2263 void journal_default_metrics(JournalMetrics *m, int fd) {
2264 uint64_t fs_size = 0;
2265 struct statvfs ss;
2266 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2267
2268 assert(m);
2269 assert(fd >= 0);
2270
2271 if (fstatvfs(fd, &ss) >= 0)
2272 fs_size = ss.f_frsize * ss.f_blocks;
2273
2274 if (m->max_use == (uint64_t) -1) {
2275
2276 if (fs_size > 0) {
2277 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2278
2279 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2280 m->max_use = DEFAULT_MAX_USE_UPPER;
2281
2282 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2283 m->max_use = DEFAULT_MAX_USE_LOWER;
2284 } else
2285 m->max_use = DEFAULT_MAX_USE_LOWER;
2286 } else {
2287 m->max_use = PAGE_ALIGN(m->max_use);
2288
2289 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2290 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2291 }
2292
2293 if (m->max_size == (uint64_t) -1) {
2294 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2295
2296 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2297 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2298 } else
2299 m->max_size = PAGE_ALIGN(m->max_size);
2300
2301 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2302 m->max_size = JOURNAL_FILE_SIZE_MIN;
2303
2304 if (m->max_size*2 > m->max_use)
2305 m->max_use = m->max_size*2;
2306
2307 if (m->min_size == (uint64_t) -1)
2308 m->min_size = JOURNAL_FILE_SIZE_MIN;
2309 else {
2310 m->min_size = PAGE_ALIGN(m->min_size);
2311
2312 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2313 m->min_size = JOURNAL_FILE_SIZE_MIN;
2314
2315 if (m->min_size > m->max_size)
2316 m->max_size = m->min_size;
2317 }
2318
2319 if (m->keep_free == (uint64_t) -1) {
2320
2321 if (fs_size > 0) {
2322 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2323
2324 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2325 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2326
2327 } else
2328 m->keep_free = DEFAULT_KEEP_FREE;
2329 }
2330
2331 log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2332 format_bytes(a, sizeof(a), m->max_use),
2333 format_bytes(b, sizeof(b), m->max_size),
2334 format_bytes(c, sizeof(c), m->min_size),
2335 format_bytes(d, sizeof(d), m->keep_free));
2336 }
2337
2338 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2339 assert(f);
2340 assert(from || to);
2341
2342 if (from) {
2343 if (f->header->head_entry_realtime == 0)
2344 return -ENOENT;
2345
2346 *from = le64toh(f->header->head_entry_realtime);
2347 }
2348
2349 if (to) {
2350 if (f->header->tail_entry_realtime == 0)
2351 return -ENOENT;
2352
2353 *to = le64toh(f->header->tail_entry_realtime);
2354 }
2355
2356 return 1;
2357 }
2358
2359 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2360 char t[9+32+1] = "_BOOT_ID=";
2361 Object *o;
2362 uint64_t p;
2363 int r;
2364
2365 assert(f);
2366 assert(from || to);
2367
2368 sd_id128_to_string(boot_id, t + 9);
2369
2370 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2371 if (r <= 0)
2372 return r;
2373
2374 if (le64toh(o->data.n_entries) <= 0)
2375 return 0;
2376
2377 if (from) {
2378 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2379 if (r < 0)
2380 return r;
2381
2382 *from = le64toh(o->entry.monotonic);
2383 }
2384
2385 if (to) {
2386 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2387 if (r < 0)
2388 return r;
2389
2390 r = generic_array_get_plus_one(f,
2391 le64toh(o->data.entry_offset),
2392 le64toh(o->data.entry_array_offset),
2393 le64toh(o->data.n_entries)-1,
2394 &o, NULL);
2395 if (r <= 0)
2396 return r;
2397
2398 *to = le64toh(o->entry.monotonic);
2399 }
2400
2401 return 1;
2402 }
2403
2404 bool journal_file_rotate_suggested(JournalFile *f) {
2405 assert(f);
2406
2407 /* If we gained new header fields we gained new features,
2408 * hence suggest a rotation */
2409 if (le64toh(f->header->header_size) < sizeof(Header)) {
2410 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2411 return true;
2412 }
2413
2414 /* Let's check if the hash tables grew over a certain fill
2415 * level (75%, borrowing this value from Java's hash table
2416 * implementation), and if so suggest a rotation. To calculate
2417 * the fill level we need the n_data field, which only exists
2418 * in newer versions. */
2419
2420 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2421 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2422 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2423 f->path,
2424 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2425 (unsigned long long) le64toh(f->header->n_data),
2426 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2427 (unsigned long long) (f->last_stat.st_size),
2428 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
2429 return true;
2430 }
2431
2432 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2433 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2434 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2435 f->path,
2436 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2437 (unsigned long long) le64toh(f->header->n_fields),
2438 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
2439 return true;
2440 }
2441
2442 return false;
2443 }