1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
26 #include <sys/statvfs.h>
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "journal-authenticate.h"
37 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
40 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
42 /* This is the minimum journal file size */
43 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
45 /* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
50 /* This is the upper bound if we deduce max_size from max_use */
51 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
53 /* This is the upper bound if we deduce the keep_free value from the
55 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
57 /* This is the keep_free value when we can't determine the system
59 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
61 /* n_data was the first entry we added after the initial file format design */
62 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
64 void journal_file_close(JournalFile
*f
) {
67 /* Write the final tag */
68 if (f
->seal
&& f
->writable
)
69 journal_file_append_tag(f
);
71 /* Sync everything to disk, before we mark the file offline */
72 if (f
->mmap
&& f
->fd
>= 0)
73 mmap_cache_close_fd(f
->mmap
, f
->fd
);
75 if (f
->writable
&& f
->fd
>= 0)
79 /* Mark the file offline. Don't override the archived state if it already is set */
80 if (f
->writable
&& f
->header
->state
== STATE_ONLINE
)
81 f
->header
->state
= STATE_OFFLINE
;
83 munmap(f
->header
, PAGE_ALIGN(sizeof(Header
)));
87 close_nointr_nofail(f
->fd
);
92 mmap_cache_unref(f
->mmap
);
95 free(f
->compress_buffer
);
100 munmap(f
->fss_file
, PAGE_ALIGN(f
->fss_file_size
));
101 else if (f
->fsprg_state
)
102 free(f
->fsprg_state
);
107 gcry_md_close(f
->hmac
);
113 static int journal_file_init_header(JournalFile
*f
, JournalFile
*template) {
121 memcpy(h
.signature
, HEADER_SIGNATURE
, 8);
122 h
.header_size
= htole64(ALIGN64(sizeof(h
)));
124 h
.incompatible_flags
=
125 htole32(f
->compress
? HEADER_INCOMPATIBLE_COMPRESSED
: 0);
128 htole32(f
->seal
? HEADER_COMPATIBLE_SEALED
: 0);
130 r
= sd_id128_randomize(&h
.file_id
);
135 h
.seqnum_id
= template->header
->seqnum_id
;
136 h
.tail_entry_seqnum
= template->header
->tail_entry_seqnum
;
138 h
.seqnum_id
= h
.file_id
;
140 k
= pwrite(f
->fd
, &h
, sizeof(h
), 0);
150 static int journal_file_refresh_header(JournalFile
*f
) {
156 r
= sd_id128_get_machine(&f
->header
->machine_id
);
160 r
= sd_id128_get_boot(&boot_id
);
164 if (sd_id128_equal(boot_id
, f
->header
->boot_id
))
165 f
->tail_entry_monotonic_valid
= true;
167 f
->header
->boot_id
= boot_id
;
169 f
->header
->state
= STATE_ONLINE
;
171 /* Sync the online state to disk */
172 msync(f
->header
, PAGE_ALIGN(sizeof(Header
)), MS_SYNC
);
178 static int journal_file_verify_header(JournalFile
*f
) {
181 if (memcmp(f
->header
->signature
, HEADER_SIGNATURE
, 8))
184 /* In both read and write mode we refuse to open files with
185 * incompatible flags we don't know */
187 if ((le32toh(f
->header
->incompatible_flags
) & ~HEADER_INCOMPATIBLE_COMPRESSED
) != 0)
188 return -EPROTONOSUPPORT
;
190 if (f
->header
->incompatible_flags
!= 0)
191 return -EPROTONOSUPPORT
;
194 /* When open for writing we refuse to open files with
195 * compatible flags, too */
198 if ((le32toh(f
->header
->compatible_flags
) & ~HEADER_COMPATIBLE_SEALED
) != 0)
199 return -EPROTONOSUPPORT
;
201 if (f
->header
->compatible_flags
!= 0)
202 return -EPROTONOSUPPORT
;
206 if (f
->header
->state
>= _STATE_MAX
)
209 /* The first addition was n_data, so check that we are at least this large */
210 if (le64toh(f
->header
->header_size
) < HEADER_SIZE_MIN
)
213 if (JOURNAL_HEADER_SEALED(f
->header
) && !JOURNAL_HEADER_CONTAINS(f
->header
, n_entry_arrays
))
216 if ((le64toh(f
->header
->header_size
) + le64toh(f
->header
->arena_size
)) > (uint64_t) f
->last_stat
.st_size
)
219 if (le64toh(f
->header
->tail_object_offset
) > (le64toh(f
->header
->header_size
) + le64toh(f
->header
->arena_size
)))
222 if (!VALID64(f
->header
->data_hash_table_offset
) ||
223 !VALID64(f
->header
->field_hash_table_offset
) ||
224 !VALID64(f
->header
->tail_object_offset
) ||
225 !VALID64(f
->header
->entry_array_offset
))
230 sd_id128_t machine_id
;
233 r
= sd_id128_get_machine(&machine_id
);
237 if (!sd_id128_equal(machine_id
, f
->header
->machine_id
))
240 state
= f
->header
->state
;
242 if (state
== STATE_ONLINE
) {
243 log_debug("Journal file %s is already online. Assuming unclean closing.", f
->path
);
245 } else if (state
== STATE_ARCHIVED
)
247 else if (state
!= STATE_OFFLINE
) {
248 log_debug("Journal file %s has unknown state %u.", f
->path
, state
);
253 f
->compress
= JOURNAL_HEADER_COMPRESSED(f
->header
);
256 f
->seal
= JOURNAL_HEADER_SEALED(f
->header
);
261 static int journal_file_allocate(JournalFile
*f
, uint64_t offset
, uint64_t size
) {
262 uint64_t old_size
, new_size
;
267 /* We assume that this file is not sparse, and we know that
268 * for sure, since we always call posix_fallocate()
272 le64toh(f
->header
->header_size
) +
273 le64toh(f
->header
->arena_size
);
275 new_size
= PAGE_ALIGN(offset
+ size
);
276 if (new_size
< le64toh(f
->header
->header_size
))
277 new_size
= le64toh(f
->header
->header_size
);
279 if (new_size
<= old_size
)
282 if (f
->metrics
.max_size
> 0 &&
283 new_size
> f
->metrics
.max_size
)
286 if (new_size
> f
->metrics
.min_size
&&
287 f
->metrics
.keep_free
> 0) {
290 if (fstatvfs(f
->fd
, &svfs
) >= 0) {
293 available
= svfs
.f_bfree
* svfs
.f_bsize
;
295 if (available
>= f
->metrics
.keep_free
)
296 available
-= f
->metrics
.keep_free
;
300 if (new_size
- old_size
> available
)
305 /* Note that the glibc fallocate() fallback is very
306 inefficient, hence we try to minimize the allocation area
308 r
= posix_fallocate(f
->fd
, old_size
, new_size
- old_size
);
312 mmap_cache_close_fd_range(f
->mmap
, f
->fd
, old_size
);
314 if (fstat(f
->fd
, &f
->last_stat
) < 0)
317 f
->header
->arena_size
= htole64(new_size
- le64toh(f
->header
->header_size
));
322 static int journal_file_move_to(JournalFile
*f
, int context
, uint64_t offset
, uint64_t size
, void **ret
) {
326 /* Avoid SIGBUS on invalid accesses */
327 if (offset
+ size
> (uint64_t) f
->last_stat
.st_size
) {
328 /* Hmm, out of range? Let's refresh the fstat() data
329 * first, before we trust that check. */
331 if (fstat(f
->fd
, &f
->last_stat
) < 0 ||
332 offset
+ size
> (uint64_t) f
->last_stat
.st_size
)
333 return -EADDRNOTAVAIL
;
336 return mmap_cache_get(f
->mmap
, f
->fd
, f
->prot
, context
, offset
, size
, ret
);
339 static uint64_t minimum_header_size(Object
*o
) {
341 static uint64_t table
[] = {
342 [OBJECT_DATA
] = sizeof(DataObject
),
343 [OBJECT_FIELD
] = sizeof(FieldObject
),
344 [OBJECT_ENTRY
] = sizeof(EntryObject
),
345 [OBJECT_DATA_HASH_TABLE
] = sizeof(HashTableObject
),
346 [OBJECT_FIELD_HASH_TABLE
] = sizeof(HashTableObject
),
347 [OBJECT_ENTRY_ARRAY
] = sizeof(EntryArrayObject
),
348 [OBJECT_TAG
] = sizeof(TagObject
),
351 if (o
->object
.type
>= ELEMENTSOF(table
) || table
[o
->object
.type
] <= 0)
352 return sizeof(ObjectHeader
);
354 return table
[o
->object
.type
];
357 int journal_file_move_to_object(JournalFile
*f
, int type
, uint64_t offset
, Object
**ret
) {
367 /* Objects may only be located at multiple of 64 bit */
368 if (!VALID64(offset
))
371 /* One context for each type, plus one catch-all for the rest */
372 context
= type
> 0 && type
< _OBJECT_TYPE_MAX
? type
: 0;
374 r
= journal_file_move_to(f
, context
, offset
, sizeof(ObjectHeader
), &t
);
379 s
= le64toh(o
->object
.size
);
381 if (s
< sizeof(ObjectHeader
))
384 if (o
->object
.type
<= OBJECT_UNUSED
)
387 if (s
< minimum_header_size(o
))
390 if (type
>= 0 && o
->object
.type
!= type
)
393 if (s
> sizeof(ObjectHeader
)) {
394 r
= journal_file_move_to(f
, o
->object
.type
, offset
, s
, &t
);
405 static uint64_t journal_file_entry_seqnum(JournalFile
*f
, uint64_t *seqnum
) {
410 r
= le64toh(f
->header
->tail_entry_seqnum
) + 1;
413 /* If an external seqnum counter was passed, we update
414 * both the local and the external one, and set it to
415 * the maximum of both */
423 f
->header
->tail_entry_seqnum
= htole64(r
);
425 if (f
->header
->head_entry_seqnum
== 0)
426 f
->header
->head_entry_seqnum
= htole64(r
);
431 int journal_file_append_object(JournalFile
*f
, int type
, uint64_t size
, Object
**ret
, uint64_t *offset
) {
438 assert(type
> 0 && type
< _OBJECT_TYPE_MAX
);
439 assert(size
>= sizeof(ObjectHeader
));
443 p
= le64toh(f
->header
->tail_object_offset
);
445 p
= le64toh(f
->header
->header_size
);
447 r
= journal_file_move_to_object(f
, -1, p
, &tail
);
451 p
+= ALIGN64(le64toh(tail
->object
.size
));
454 r
= journal_file_allocate(f
, p
, size
);
458 r
= journal_file_move_to(f
, type
, p
, size
, &t
);
465 o
->object
.type
= type
;
466 o
->object
.size
= htole64(size
);
468 f
->header
->tail_object_offset
= htole64(p
);
469 f
->header
->n_objects
= htole64(le64toh(f
->header
->n_objects
) + 1);
477 static int journal_file_setup_data_hash_table(JournalFile
*f
) {
484 /* We estimate that we need 1 hash table entry per 768 of
485 journal file and we want to make sure we never get beyond
486 75% fill level. Calculate the hash table size for the
487 maximum file size based on these metrics. */
489 s
= (f
->metrics
.max_size
* 4 / 768 / 3) * sizeof(HashItem
);
490 if (s
< DEFAULT_DATA_HASH_TABLE_SIZE
)
491 s
= DEFAULT_DATA_HASH_TABLE_SIZE
;
493 log_info("Reserving %llu entries in hash table.", (unsigned long long) (s
/ sizeof(HashItem
)));
495 r
= journal_file_append_object(f
,
496 OBJECT_DATA_HASH_TABLE
,
497 offsetof(Object
, hash_table
.items
) + s
,
502 memset(o
->hash_table
.items
, 0, s
);
504 f
->header
->data_hash_table_offset
= htole64(p
+ offsetof(Object
, hash_table
.items
));
505 f
->header
->data_hash_table_size
= htole64(s
);
510 static int journal_file_setup_field_hash_table(JournalFile
*f
) {
517 s
= DEFAULT_FIELD_HASH_TABLE_SIZE
;
518 r
= journal_file_append_object(f
,
519 OBJECT_FIELD_HASH_TABLE
,
520 offsetof(Object
, hash_table
.items
) + s
,
525 memset(o
->hash_table
.items
, 0, s
);
527 f
->header
->field_hash_table_offset
= htole64(p
+ offsetof(Object
, hash_table
.items
));
528 f
->header
->field_hash_table_size
= htole64(s
);
533 static int journal_file_map_data_hash_table(JournalFile
*f
) {
540 p
= le64toh(f
->header
->data_hash_table_offset
);
541 s
= le64toh(f
->header
->data_hash_table_size
);
543 r
= journal_file_move_to(f
,
544 OBJECT_DATA_HASH_TABLE
,
550 f
->data_hash_table
= t
;
554 static int journal_file_map_field_hash_table(JournalFile
*f
) {
561 p
= le64toh(f
->header
->field_hash_table_offset
);
562 s
= le64toh(f
->header
->field_hash_table_size
);
564 r
= journal_file_move_to(f
,
565 OBJECT_FIELD_HASH_TABLE
,
571 f
->field_hash_table
= t
;
575 static int journal_file_link_data(JournalFile
*f
, Object
*o
, uint64_t offset
, uint64_t hash
) {
582 assert(o
->object
.type
== OBJECT_DATA
);
584 /* This might alter the window we are looking at */
586 o
->data
.next_hash_offset
= o
->data
.next_field_offset
= 0;
587 o
->data
.entry_offset
= o
->data
.entry_array_offset
= 0;
588 o
->data
.n_entries
= 0;
590 h
= hash
% (le64toh(f
->header
->data_hash_table_size
) / sizeof(HashItem
));
591 p
= le64toh(f
->data_hash_table
[h
].tail_hash_offset
);
593 /* Only entry in the hash table is easy */
594 f
->data_hash_table
[h
].head_hash_offset
= htole64(offset
);
596 /* Move back to the previous data object, to patch in
599 r
= journal_file_move_to_object(f
, OBJECT_DATA
, p
, &o
);
603 o
->data
.next_hash_offset
= htole64(offset
);
606 f
->data_hash_table
[h
].tail_hash_offset
= htole64(offset
);
608 if (JOURNAL_HEADER_CONTAINS(f
->header
, n_data
))
609 f
->header
->n_data
= htole64(le64toh(f
->header
->n_data
) + 1);
614 int journal_file_find_data_object_with_hash(
616 const void *data
, uint64_t size
, uint64_t hash
,
617 Object
**ret
, uint64_t *offset
) {
619 uint64_t p
, osize
, h
;
623 assert(data
|| size
== 0);
625 osize
= offsetof(Object
, data
.payload
) + size
;
627 if (f
->header
->data_hash_table_size
== 0)
630 h
= hash
% (le64toh(f
->header
->data_hash_table_size
) / sizeof(HashItem
));
631 p
= le64toh(f
->data_hash_table
[h
].head_hash_offset
);
636 r
= journal_file_move_to_object(f
, OBJECT_DATA
, p
, &o
);
640 if (le64toh(o
->data
.hash
) != hash
)
643 if (o
->object
.flags
& OBJECT_COMPRESSED
) {
647 l
= le64toh(o
->object
.size
);
648 if (l
<= offsetof(Object
, data
.payload
))
651 l
-= offsetof(Object
, data
.payload
);
653 if (!uncompress_blob(o
->data
.payload
, l
, &f
->compress_buffer
, &f
->compress_buffer_size
, &rsize
))
657 memcmp(f
->compress_buffer
, data
, size
) == 0) {
668 return -EPROTONOSUPPORT
;
671 } else if (le64toh(o
->object
.size
) == osize
&&
672 memcmp(o
->data
.payload
, data
, size
) == 0) {
684 p
= le64toh(o
->data
.next_hash_offset
);
690 int journal_file_find_data_object(
692 const void *data
, uint64_t size
,
693 Object
**ret
, uint64_t *offset
) {
698 assert(data
|| size
== 0);
700 hash
= hash64(data
, size
);
702 return journal_file_find_data_object_with_hash(f
,
707 static int journal_file_append_data(
709 const void *data
, uint64_t size
,
710 Object
**ret
, uint64_t *offset
) {
716 bool compressed
= false;
719 assert(data
|| size
== 0);
721 hash
= hash64(data
, size
);
723 r
= journal_file_find_data_object_with_hash(f
, data
, size
, hash
, &o
, &p
);
737 osize
= offsetof(Object
, data
.payload
) + size
;
738 r
= journal_file_append_object(f
, OBJECT_DATA
, osize
, &o
, &p
);
742 o
->data
.hash
= htole64(hash
);
746 size
>= COMPRESSION_SIZE_THRESHOLD
) {
749 compressed
= compress_blob(data
, size
, o
->data
.payload
, &rsize
);
752 o
->object
.size
= htole64(offsetof(Object
, data
.payload
) + rsize
);
753 o
->object
.flags
|= OBJECT_COMPRESSED
;
755 log_debug("Compressed data object %lu -> %lu", (unsigned long) size
, (unsigned long) rsize
);
760 if (!compressed
&& size
> 0)
761 memcpy(o
->data
.payload
, data
, size
);
763 r
= journal_file_link_data(f
, o
, p
, hash
);
767 r
= journal_file_hmac_put_object(f
, OBJECT_DATA
, p
);
771 /* The linking might have altered the window, so let's
772 * refresh our pointer */
773 r
= journal_file_move_to_object(f
, OBJECT_DATA
, p
, &o
);
786 uint64_t journal_file_entry_n_items(Object
*o
) {
788 assert(o
->object
.type
== OBJECT_ENTRY
);
790 return (le64toh(o
->object
.size
) - offsetof(Object
, entry
.items
)) / sizeof(EntryItem
);
793 uint64_t journal_file_entry_array_n_items(Object
*o
) {
795 assert(o
->object
.type
== OBJECT_ENTRY_ARRAY
);
797 return (le64toh(o
->object
.size
) - offsetof(Object
, entry_array
.items
)) / sizeof(uint64_t);
800 uint64_t journal_file_hash_table_n_items(Object
*o
) {
802 assert(o
->object
.type
== OBJECT_DATA_HASH_TABLE
||
803 o
->object
.type
== OBJECT_FIELD_HASH_TABLE
);
805 return (le64toh(o
->object
.size
) - offsetof(Object
, hash_table
.items
)) / sizeof(HashItem
);
808 static int link_entry_into_array(JournalFile
*f
,
813 uint64_t n
= 0, ap
= 0, q
, i
, a
, hidx
;
822 i
= hidx
= le64toh(*idx
);
825 r
= journal_file_move_to_object(f
, OBJECT_ENTRY_ARRAY
, a
, &o
);
829 n
= journal_file_entry_array_n_items(o
);
831 o
->entry_array
.items
[i
] = htole64(p
);
832 *idx
= htole64(hidx
+ 1);
838 a
= le64toh(o
->entry_array
.next_entry_array_offset
);
849 r
= journal_file_append_object(f
, OBJECT_ENTRY_ARRAY
,
850 offsetof(Object
, entry_array
.items
) + n
* sizeof(uint64_t),
855 r
= journal_file_hmac_put_object(f
, OBJECT_ENTRY_ARRAY
, q
);
859 o
->entry_array
.items
[i
] = htole64(p
);
864 r
= journal_file_move_to_object(f
, OBJECT_ENTRY_ARRAY
, ap
, &o
);
868 o
->entry_array
.next_entry_array_offset
= htole64(q
);
871 if (JOURNAL_HEADER_CONTAINS(f
->header
, n_entry_arrays
))
872 f
->header
->n_entry_arrays
= htole64(le64toh(f
->header
->n_entry_arrays
) + 1);
874 *idx
= htole64(hidx
+ 1);
879 static int link_entry_into_array_plus_one(JournalFile
*f
,
898 i
= htole64(le64toh(*idx
) - 1);
899 r
= link_entry_into_array(f
, first
, &i
, p
);
904 *idx
= htole64(le64toh(*idx
) + 1);
908 static int journal_file_link_entry_item(JournalFile
*f
, Object
*o
, uint64_t offset
, uint64_t i
) {
915 p
= le64toh(o
->entry
.items
[i
].object_offset
);
919 r
= journal_file_move_to_object(f
, OBJECT_DATA
, p
, &o
);
923 return link_entry_into_array_plus_one(f
,
924 &o
->data
.entry_offset
,
925 &o
->data
.entry_array_offset
,
930 static int journal_file_link_entry(JournalFile
*f
, Object
*o
, uint64_t offset
) {
937 assert(o
->object
.type
== OBJECT_ENTRY
);
939 __sync_synchronize();
941 /* Link up the entry itself */
942 r
= link_entry_into_array(f
,
943 &f
->header
->entry_array_offset
,
944 &f
->header
->n_entries
,
949 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
951 if (f
->header
->head_entry_realtime
== 0)
952 f
->header
->head_entry_realtime
= o
->entry
.realtime
;
954 f
->header
->tail_entry_realtime
= o
->entry
.realtime
;
955 f
->header
->tail_entry_monotonic
= o
->entry
.monotonic
;
957 f
->tail_entry_monotonic_valid
= true;
959 /* Link up the items */
960 n
= journal_file_entry_n_items(o
);
961 for (i
= 0; i
< n
; i
++) {
962 r
= journal_file_link_entry_item(f
, o
, offset
, i
);
970 static int journal_file_append_entry_internal(
972 const dual_timestamp
*ts
,
974 const EntryItem items
[], unsigned n_items
,
976 Object
**ret
, uint64_t *offset
) {
983 assert(items
|| n_items
== 0);
986 osize
= offsetof(Object
, entry
.items
) + (n_items
* sizeof(EntryItem
));
988 r
= journal_file_append_object(f
, OBJECT_ENTRY
, osize
, &o
, &np
);
992 o
->entry
.seqnum
= htole64(journal_file_entry_seqnum(f
, seqnum
));
993 memcpy(o
->entry
.items
, items
, n_items
* sizeof(EntryItem
));
994 o
->entry
.realtime
= htole64(ts
->realtime
);
995 o
->entry
.monotonic
= htole64(ts
->monotonic
);
996 o
->entry
.xor_hash
= htole64(xor_hash
);
997 o
->entry
.boot_id
= f
->header
->boot_id
;
999 r
= journal_file_hmac_put_object(f
, OBJECT_ENTRY
, np
);
1003 r
= journal_file_link_entry(f
, o
, np
);
1016 void journal_file_post_change(JournalFile
*f
) {
1019 /* inotify() does not receive IN_MODIFY events from file
1020 * accesses done via mmap(). After each access we hence
1021 * trigger IN_MODIFY by truncating the journal file to its
1022 * current size which triggers IN_MODIFY. */
1024 __sync_synchronize();
1026 if (ftruncate(f
->fd
, f
->last_stat
.st_size
) < 0)
1027 log_error("Failed to to truncate file to its own size: %m");
1030 int journal_file_append_entry(JournalFile
*f
, const dual_timestamp
*ts
, const struct iovec iovec
[], unsigned n_iovec
, uint64_t *seqnum
, Object
**ret
, uint64_t *offset
) {
1034 uint64_t xor_hash
= 0;
1035 struct dual_timestamp _ts
;
1038 assert(iovec
|| n_iovec
== 0);
1044 dual_timestamp_get(&_ts
);
1048 if (f
->tail_entry_monotonic_valid
&&
1049 ts
->monotonic
< le64toh(f
->header
->tail_entry_monotonic
))
1052 r
= journal_file_maybe_append_tag(f
, ts
->realtime
);
1056 /* alloca() can't take 0, hence let's allocate at least one */
1057 items
= alloca(sizeof(EntryItem
) * MAX(1, n_iovec
));
1059 for (i
= 0; i
< n_iovec
; i
++) {
1063 r
= journal_file_append_data(f
, iovec
[i
].iov_base
, iovec
[i
].iov_len
, &o
, &p
);
1067 xor_hash
^= le64toh(o
->data
.hash
);
1068 items
[i
].object_offset
= htole64(p
);
1069 items
[i
].hash
= o
->data
.hash
;
1072 r
= journal_file_append_entry_internal(f
, ts
, xor_hash
, items
, n_iovec
, seqnum
, ret
, offset
);
1074 journal_file_post_change(f
);
1079 static int generic_array_get(JournalFile
*f
,
1082 Object
**ret
, uint64_t *offset
) {
1094 r
= journal_file_move_to_object(f
, OBJECT_ENTRY_ARRAY
, a
, &o
);
1098 n
= journal_file_entry_array_n_items(o
);
1100 p
= le64toh(o
->entry_array
.items
[i
]);
1105 a
= le64toh(o
->entry_array
.next_entry_array_offset
);
1108 if (a
<= 0 || p
<= 0)
1111 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, p
, &o
);
1124 static int generic_array_get_plus_one(JournalFile
*f
,
1128 Object
**ret
, uint64_t *offset
) {
1137 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, extra
, &o
);
1150 return generic_array_get(f
, first
, i
-1, ret
, offset
);
1159 static int generic_array_bisect(JournalFile
*f
,
1163 int (*test_object
)(JournalFile
*f
, uint64_t p
, uint64_t needle
),
1164 direction_t direction
,
1169 uint64_t a
, p
, t
= 0, i
= 0, last_p
= 0;
1170 bool subtract_one
= false;
1171 Object
*o
, *array
= NULL
;
1175 assert(test_object
);
1179 uint64_t left
, right
, k
, lp
;
1181 r
= journal_file_move_to_object(f
, OBJECT_ENTRY_ARRAY
, a
, &array
);
1185 k
= journal_file_entry_array_n_items(array
);
1191 lp
= p
= le64toh(array
->entry_array
.items
[i
]);
1195 r
= test_object(f
, p
, needle
);
1199 if (r
== TEST_FOUND
)
1200 r
= direction
== DIRECTION_DOWN
? TEST_RIGHT
: TEST_LEFT
;
1202 if (r
== TEST_RIGHT
) {
1206 if (left
== right
) {
1207 if (direction
== DIRECTION_UP
)
1208 subtract_one
= true;
1214 assert(left
< right
);
1216 i
= (left
+ right
) / 2;
1217 p
= le64toh(array
->entry_array
.items
[i
]);
1221 r
= test_object(f
, p
, needle
);
1225 if (r
== TEST_FOUND
)
1226 r
= direction
== DIRECTION_DOWN
? TEST_RIGHT
: TEST_LEFT
;
1228 if (r
== TEST_RIGHT
)
1236 if (direction
== DIRECTION_UP
) {
1238 subtract_one
= true;
1249 a
= le64toh(array
->entry_array
.next_entry_array_offset
);
1255 if (subtract_one
&& t
== 0 && i
== 0)
1258 if (subtract_one
&& i
== 0)
1260 else if (subtract_one
)
1261 p
= le64toh(array
->entry_array
.items
[i
-1]);
1263 p
= le64toh(array
->entry_array
.items
[i
]);
1265 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, p
, &o
);
1276 *idx
= t
+ i
+ (subtract_one
? -1 : 0);
1281 static int generic_array_bisect_plus_one(JournalFile
*f
,
1286 int (*test_object
)(JournalFile
*f
, uint64_t p
, uint64_t needle
),
1287 direction_t direction
,
1293 bool step_back
= false;
1297 assert(test_object
);
1302 /* This bisects the array in object 'first', but first checks
1304 r
= test_object(f
, extra
, needle
);
1308 if (r
== TEST_FOUND
)
1309 r
= direction
== DIRECTION_DOWN
? TEST_RIGHT
: TEST_LEFT
;
1311 /* if we are looking with DIRECTION_UP then we need to first
1312 see if in the actual array there is a matching entry, and
1313 return the last one of that. But if there isn't any we need
1314 to return this one. Hence remember this, and return it
1317 step_back
= direction
== DIRECTION_UP
;
1319 if (r
== TEST_RIGHT
) {
1320 if (direction
== DIRECTION_DOWN
)
1326 r
= generic_array_bisect(f
, first
, n
-1, needle
, test_object
, direction
, ret
, offset
, idx
);
1328 if (r
== 0 && step_back
)
1337 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, extra
, &o
);
1353 static int test_object_offset(JournalFile
*f
, uint64_t p
, uint64_t needle
) {
1359 else if (p
< needle
)
1365 int journal_file_move_to_entry_by_offset(
1368 direction_t direction
,
1372 return generic_array_bisect(f
,
1373 le64toh(f
->header
->entry_array_offset
),
1374 le64toh(f
->header
->n_entries
),
1382 static int test_object_seqnum(JournalFile
*f
, uint64_t p
, uint64_t needle
) {
1389 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, p
, &o
);
1393 if (le64toh(o
->entry
.seqnum
) == needle
)
1395 else if (le64toh(o
->entry
.seqnum
) < needle
)
1401 int journal_file_move_to_entry_by_seqnum(
1404 direction_t direction
,
1408 return generic_array_bisect(f
,
1409 le64toh(f
->header
->entry_array_offset
),
1410 le64toh(f
->header
->n_entries
),
1417 static int test_object_realtime(JournalFile
*f
, uint64_t p
, uint64_t needle
) {
1424 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, p
, &o
);
1428 if (le64toh(o
->entry
.realtime
) == needle
)
1430 else if (le64toh(o
->entry
.realtime
) < needle
)
1436 int journal_file_move_to_entry_by_realtime(
1439 direction_t direction
,
1443 return generic_array_bisect(f
,
1444 le64toh(f
->header
->entry_array_offset
),
1445 le64toh(f
->header
->n_entries
),
1447 test_object_realtime
,
1452 static int test_object_monotonic(JournalFile
*f
, uint64_t p
, uint64_t needle
) {
1459 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, p
, &o
);
1463 if (le64toh(o
->entry
.monotonic
) == needle
)
1465 else if (le64toh(o
->entry
.monotonic
) < needle
)
1471 int journal_file_move_to_entry_by_monotonic(
1475 direction_t direction
,
1479 char t
[9+32+1] = "_BOOT_ID=";
1485 sd_id128_to_string(boot_id
, t
+ 9);
1486 r
= journal_file_find_data_object(f
, t
, strlen(t
), &o
, NULL
);
1492 return generic_array_bisect_plus_one(f
,
1493 le64toh(o
->data
.entry_offset
),
1494 le64toh(o
->data
.entry_array_offset
),
1495 le64toh(o
->data
.n_entries
),
1497 test_object_monotonic
,
1502 int journal_file_next_entry(
1504 Object
*o
, uint64_t p
,
1505 direction_t direction
,
1506 Object
**ret
, uint64_t *offset
) {
1512 assert(p
> 0 || !o
);
1514 n
= le64toh(f
->header
->n_entries
);
1519 i
= direction
== DIRECTION_DOWN
? 0 : n
- 1;
1521 if (o
->object
.type
!= OBJECT_ENTRY
)
1524 r
= generic_array_bisect(f
,
1525 le64toh(f
->header
->entry_array_offset
),
1526 le64toh(f
->header
->n_entries
),
1535 if (direction
== DIRECTION_DOWN
) {
1548 /* And jump to it */
1549 return generic_array_get(f
,
1550 le64toh(f
->header
->entry_array_offset
),
1555 int journal_file_skip_entry(
1557 Object
*o
, uint64_t p
,
1559 Object
**ret
, uint64_t *offset
) {
1568 if (o
->object
.type
!= OBJECT_ENTRY
)
1571 r
= generic_array_bisect(f
,
1572 le64toh(f
->header
->entry_array_offset
),
1573 le64toh(f
->header
->n_entries
),
1582 /* Calculate new index */
1584 if ((uint64_t) -skip
>= i
)
1587 i
= i
- (uint64_t) -skip
;
1589 i
+= (uint64_t) skip
;
1591 n
= le64toh(f
->header
->n_entries
);
1598 return generic_array_get(f
,
1599 le64toh(f
->header
->entry_array_offset
),
1604 int journal_file_next_entry_for_data(
1606 Object
*o
, uint64_t p
,
1607 uint64_t data_offset
,
1608 direction_t direction
,
1609 Object
**ret
, uint64_t *offset
) {
1616 assert(p
> 0 || !o
);
1618 r
= journal_file_move_to_object(f
, OBJECT_DATA
, data_offset
, &d
);
1622 n
= le64toh(d
->data
.n_entries
);
1627 i
= direction
== DIRECTION_DOWN
? 0 : n
- 1;
1629 if (o
->object
.type
!= OBJECT_ENTRY
)
1632 r
= generic_array_bisect_plus_one(f
,
1633 le64toh(d
->data
.entry_offset
),
1634 le64toh(d
->data
.entry_array_offset
),
1635 le64toh(d
->data
.n_entries
),
1645 if (direction
== DIRECTION_DOWN
) {
1659 return generic_array_get_plus_one(f
,
1660 le64toh(d
->data
.entry_offset
),
1661 le64toh(d
->data
.entry_array_offset
),
1666 int journal_file_move_to_entry_by_offset_for_data(
1668 uint64_t data_offset
,
1670 direction_t direction
,
1671 Object
**ret
, uint64_t *offset
) {
1678 r
= journal_file_move_to_object(f
, OBJECT_DATA
, data_offset
, &d
);
1682 return generic_array_bisect_plus_one(f
,
1683 le64toh(d
->data
.entry_offset
),
1684 le64toh(d
->data
.entry_array_offset
),
1685 le64toh(d
->data
.n_entries
),
1692 int journal_file_move_to_entry_by_monotonic_for_data(
1694 uint64_t data_offset
,
1697 direction_t direction
,
1698 Object
**ret
, uint64_t *offset
) {
1700 char t
[9+32+1] = "_BOOT_ID=";
1707 /* First, seek by time */
1708 sd_id128_to_string(boot_id
, t
+ 9);
1709 r
= journal_file_find_data_object(f
, t
, strlen(t
), &o
, &b
);
1715 r
= generic_array_bisect_plus_one(f
,
1716 le64toh(o
->data
.entry_offset
),
1717 le64toh(o
->data
.entry_array_offset
),
1718 le64toh(o
->data
.n_entries
),
1720 test_object_monotonic
,
1726 /* And now, continue seeking until we find an entry that
1727 * exists in both bisection arrays */
1733 r
= journal_file_move_to_object(f
, OBJECT_DATA
, data_offset
, &d
);
1737 r
= generic_array_bisect_plus_one(f
,
1738 le64toh(d
->data
.entry_offset
),
1739 le64toh(d
->data
.entry_array_offset
),
1740 le64toh(d
->data
.n_entries
),
1748 r
= journal_file_move_to_object(f
, OBJECT_DATA
, b
, &o
);
1752 r
= generic_array_bisect_plus_one(f
,
1753 le64toh(o
->data
.entry_offset
),
1754 le64toh(o
->data
.entry_array_offset
),
1755 le64toh(o
->data
.n_entries
),
1779 int journal_file_move_to_entry_by_seqnum_for_data(
1781 uint64_t data_offset
,
1783 direction_t direction
,
1784 Object
**ret
, uint64_t *offset
) {
1791 r
= journal_file_move_to_object(f
, OBJECT_DATA
, data_offset
, &d
);
1795 return generic_array_bisect_plus_one(f
,
1796 le64toh(d
->data
.entry_offset
),
1797 le64toh(d
->data
.entry_array_offset
),
1798 le64toh(d
->data
.n_entries
),
1805 int journal_file_move_to_entry_by_realtime_for_data(
1807 uint64_t data_offset
,
1809 direction_t direction
,
1810 Object
**ret
, uint64_t *offset
) {
1817 r
= journal_file_move_to_object(f
, OBJECT_DATA
, data_offset
, &d
);
1821 return generic_array_bisect_plus_one(f
,
1822 le64toh(d
->data
.entry_offset
),
1823 le64toh(d
->data
.entry_array_offset
),
1824 le64toh(d
->data
.n_entries
),
1826 test_object_realtime
,
1831 void journal_file_dump(JournalFile
*f
) {
1838 journal_file_print_header(f
);
1840 p
= le64toh(f
->header
->header_size
);
1842 r
= journal_file_move_to_object(f
, -1, p
, &o
);
1846 switch (o
->object
.type
) {
1849 printf("Type: OBJECT_UNUSED\n");
1853 printf("Type: OBJECT_DATA\n");
1857 printf("Type: OBJECT_ENTRY seqnum=%llu monotonic=%llu realtime=%llu\n",
1858 (unsigned long long) le64toh(o
->entry
.seqnum
),
1859 (unsigned long long) le64toh(o
->entry
.monotonic
),
1860 (unsigned long long) le64toh(o
->entry
.realtime
));
1863 case OBJECT_FIELD_HASH_TABLE
:
1864 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1867 case OBJECT_DATA_HASH_TABLE
:
1868 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1871 case OBJECT_ENTRY_ARRAY
:
1872 printf("Type: OBJECT_ENTRY_ARRAY\n");
1876 printf("Type: OBJECT_TAG seqnum=%llu epoch=%llu\n",
1877 (unsigned long long) le64toh(o
->tag
.seqnum
),
1878 (unsigned long long) le64toh(o
->tag
.epoch
));
1882 if (o
->object
.flags
& OBJECT_COMPRESSED
)
1883 printf("Flags: COMPRESSED\n");
1885 if (p
== le64toh(f
->header
->tail_object_offset
))
1888 p
= p
+ ALIGN64(le64toh(o
->object
.size
));
1893 log_error("File corrupt");
1896 void journal_file_print_header(JournalFile
*f
) {
1897 char a
[33], b
[33], c
[33];
1898 char x
[FORMAT_TIMESTAMP_MAX
], y
[FORMAT_TIMESTAMP_MAX
];
1902 printf("File Path: %s\n"
1906 "Sequential Number ID: %s\n"
1908 "Compatible Flags:%s%s\n"
1909 "Incompatible Flags:%s%s\n"
1910 "Header size: %llu\n"
1911 "Arena size: %llu\n"
1912 "Data Hash Table Size: %llu\n"
1913 "Field Hash Table Size: %llu\n"
1914 "Rotate Suggested: %s\n"
1915 "Head Sequential Number: %llu\n"
1916 "Tail Sequential Number: %llu\n"
1917 "Head Realtime Timestamp: %s\n"
1918 "Tail Realtime Timestamp: %s\n"
1920 "Entry Objects: %llu\n",
1922 sd_id128_to_string(f
->header
->file_id
, a
),
1923 sd_id128_to_string(f
->header
->machine_id
, b
),
1924 sd_id128_to_string(f
->header
->boot_id
, c
),
1925 sd_id128_to_string(f
->header
->seqnum_id
, c
),
1926 f
->header
->state
== STATE_OFFLINE
? "OFFLINE" :
1927 f
->header
->state
== STATE_ONLINE
? "ONLINE" :
1928 f
->header
->state
== STATE_ARCHIVED
? "ARCHIVED" : "UNKNOWN",
1929 JOURNAL_HEADER_SEALED(f
->header
) ? " SEALED" : "",
1930 (le32toh(f
->header
->compatible_flags
) & ~HEADER_COMPATIBLE_SEALED
) ? " ???" : "",
1931 JOURNAL_HEADER_COMPRESSED(f
->header
) ? " COMPRESSED" : "",
1932 (le32toh(f
->header
->incompatible_flags
) & ~HEADER_INCOMPATIBLE_COMPRESSED
) ? " ???" : "",
1933 (unsigned long long) le64toh(f
->header
->header_size
),
1934 (unsigned long long) le64toh(f
->header
->arena_size
),
1935 (unsigned long long) le64toh(f
->header
->data_hash_table_size
) / sizeof(HashItem
),
1936 (unsigned long long) le64toh(f
->header
->field_hash_table_size
) / sizeof(HashItem
),
1937 yes_no(journal_file_rotate_suggested(f
)),
1938 (unsigned long long) le64toh(f
->header
->head_entry_seqnum
),
1939 (unsigned long long) le64toh(f
->header
->tail_entry_seqnum
),
1940 format_timestamp(x
, sizeof(x
), le64toh(f
->header
->head_entry_realtime
)),
1941 format_timestamp(y
, sizeof(y
), le64toh(f
->header
->tail_entry_realtime
)),
1942 (unsigned long long) le64toh(f
->header
->n_objects
),
1943 (unsigned long long) le64toh(f
->header
->n_entries
));
1945 if (JOURNAL_HEADER_CONTAINS(f
->header
, n_data
))
1946 printf("Data Objects: %llu\n"
1947 "Data Hash Table Fill: %.1f%%\n",
1948 (unsigned long long) le64toh(f
->header
->n_data
),
1949 100.0 * (double) le64toh(f
->header
->n_data
) / ((double) (le64toh(f
->header
->data_hash_table_size
) / sizeof(HashItem
))));
1951 if (JOURNAL_HEADER_CONTAINS(f
->header
, n_fields
))
1952 printf("Field Objects: %llu\n"
1953 "Field Hash Table Fill: %.1f%%\n",
1954 (unsigned long long) le64toh(f
->header
->n_fields
),
1955 100.0 * (double) le64toh(f
->header
->n_fields
) / ((double) (le64toh(f
->header
->field_hash_table_size
) / sizeof(HashItem
))));
1957 if (JOURNAL_HEADER_CONTAINS(f
->header
, n_tags
))
1958 printf("Tag Objects: %llu\n",
1959 (unsigned long long) le64toh(f
->header
->n_tags
));
1960 if (JOURNAL_HEADER_CONTAINS(f
->header
, n_entry_arrays
))
1961 printf("Entry Array Objects: %llu\n",
1962 (unsigned long long) le64toh(f
->header
->n_entry_arrays
));
1965 int journal_file_open(
1971 JournalMetrics
*metrics
,
1972 MMapCache
*mmap_cache
,
1973 JournalFile
*template,
1974 JournalFile
**ret
) {
1978 bool newly_created
= false;
1982 if ((flags
& O_ACCMODE
) != O_RDONLY
&&
1983 (flags
& O_ACCMODE
) != O_RDWR
)
1986 if (!endswith(fname
, ".journal") &&
1987 !endswith(fname
, ".journal~"))
1990 f
= new0(JournalFile
, 1);
1998 f
->prot
= prot_from_flags(flags
);
1999 f
->writable
= (flags
& O_ACCMODE
) != O_RDONLY
;
2000 f
->compress
= compress
;
2004 f
->mmap
= mmap_cache_ref(mmap_cache
);
2006 f
->mmap
= mmap_cache_new();
2013 f
->path
= strdup(fname
);
2019 f
->fd
= open(f
->path
, f
->flags
|O_CLOEXEC
, f
->mode
);
2025 if (fstat(f
->fd
, &f
->last_stat
) < 0) {
2030 if (f
->last_stat
.st_size
== 0 && f
->writable
) {
2031 newly_created
= true;
2033 /* Try to load the FSPRG state, and if we can't, then
2034 * just don't do sealing */
2035 r
= journal_file_fss_load(f
);
2039 r
= journal_file_init_header(f
, template);
2043 if (fstat(f
->fd
, &f
->last_stat
) < 0) {
2049 if (f
->last_stat
.st_size
< (off_t
) HEADER_SIZE_MIN
) {
2054 f
->header
= mmap(NULL
, PAGE_ALIGN(sizeof(Header
)), prot_from_flags(flags
), MAP_SHARED
, f
->fd
, 0);
2055 if (f
->header
== MAP_FAILED
) {
2061 if (!newly_created
) {
2062 r
= journal_file_verify_header(f
);
2067 if (!newly_created
&& f
->writable
) {
2068 r
= journal_file_fss_load(f
);
2075 journal_default_metrics(metrics
, f
->fd
);
2076 f
->metrics
= *metrics
;
2077 } else if (template)
2078 f
->metrics
= template->metrics
;
2080 r
= journal_file_refresh_header(f
);
2085 r
= journal_file_hmac_setup(f
);
2089 if (newly_created
) {
2090 r
= journal_file_setup_field_hash_table(f
);
2094 r
= journal_file_setup_data_hash_table(f
);
2098 r
= journal_file_append_first_tag(f
);
2103 r
= journal_file_map_field_hash_table(f
);
2107 r
= journal_file_map_data_hash_table(f
);
2117 journal_file_close(f
);
2122 int journal_file_rotate(JournalFile
**f
, bool compress
, bool seal
) {
2125 JournalFile
*old_file
, *new_file
= NULL
;
2133 if (!old_file
->writable
)
2136 if (!endswith(old_file
->path
, ".journal"))
2139 l
= strlen(old_file
->path
);
2141 p
= new(char, l
+ 1 + 32 + 1 + 16 + 1 + 16 + 1);
2145 memcpy(p
, old_file
->path
, l
- 8);
2147 sd_id128_to_string(old_file
->header
->seqnum_id
, p
+ l
- 8 + 1);
2148 snprintf(p
+ l
- 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2149 "-%016llx-%016llx.journal",
2150 (unsigned long long) le64toh((*f
)->header
->tail_entry_seqnum
),
2151 (unsigned long long) le64toh((*f
)->header
->tail_entry_realtime
));
2153 r
= rename(old_file
->path
, p
);
2159 old_file
->header
->state
= STATE_ARCHIVED
;
2161 r
= journal_file_open(old_file
->path
, old_file
->flags
, old_file
->mode
, compress
, seal
, NULL
, old_file
->mmap
, old_file
, &new_file
);
2162 journal_file_close(old_file
);
2168 int journal_file_open_reliably(
2174 JournalMetrics
*metrics
,
2175 MMapCache
*mmap_cache
,
2176 JournalFile
*template,
2177 JournalFile
**ret
) {
2183 r
= journal_file_open(fname
, flags
, mode
, compress
, seal
,
2184 metrics
, mmap_cache
, template, ret
);
2185 if (r
!= -EBADMSG
&& /* corrupted */
2186 r
!= -ENODATA
&& /* truncated */
2187 r
!= -EHOSTDOWN
&& /* other machine */
2188 r
!= -EPROTONOSUPPORT
&& /* incompatible feature */
2189 r
!= -EBUSY
&& /* unclean shutdown */
2190 r
!= -ESHUTDOWN
/* already archived */)
2193 if ((flags
& O_ACCMODE
) == O_RDONLY
)
2196 if (!(flags
& O_CREAT
))
2199 if (!endswith(fname
, ".journal"))
2202 /* The file is corrupted. Rotate it away and try it again (but only once) */
2205 if (asprintf(&p
, "%.*s@%016llx-%016llx.journal~",
2207 (unsigned long long) now(CLOCK_REALTIME
),
2211 r
= rename(fname
, p
);
2216 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname
);
2218 return journal_file_open(fname
, flags
, mode
, compress
, seal
,
2219 metrics
, mmap_cache
, template, ret
);
2223 int journal_file_copy_entry(JournalFile
*from
, JournalFile
*to
, Object
*o
, uint64_t p
, uint64_t *seqnum
, Object
**ret
, uint64_t *offset
) {
2225 uint64_t q
, xor_hash
= 0;
2238 ts
.monotonic
= le64toh(o
->entry
.monotonic
);
2239 ts
.realtime
= le64toh(o
->entry
.realtime
);
2241 if (to
->tail_entry_monotonic_valid
&&
2242 ts
.monotonic
< le64toh(to
->header
->tail_entry_monotonic
))
2245 n
= journal_file_entry_n_items(o
);
2246 items
= alloca(sizeof(EntryItem
) * n
);
2248 for (i
= 0; i
< n
; i
++) {
2255 q
= le64toh(o
->entry
.items
[i
].object_offset
);
2256 le_hash
= o
->entry
.items
[i
].hash
;
2258 r
= journal_file_move_to_object(from
, OBJECT_DATA
, q
, &o
);
2262 if (le_hash
!= o
->data
.hash
)
2265 l
= le64toh(o
->object
.size
) - offsetof(Object
, data
.payload
);
2268 /* We hit the limit on 32bit machines */
2269 if ((uint64_t) t
!= l
)
2272 if (o
->object
.flags
& OBJECT_COMPRESSED
) {
2276 if (!uncompress_blob(o
->data
.payload
, l
, &from
->compress_buffer
, &from
->compress_buffer_size
, &rsize
))
2279 data
= from
->compress_buffer
;
2282 return -EPROTONOSUPPORT
;
2285 data
= o
->data
.payload
;
2287 r
= journal_file_append_data(to
, data
, l
, &u
, &h
);
2291 xor_hash
^= le64toh(u
->data
.hash
);
2292 items
[i
].object_offset
= htole64(h
);
2293 items
[i
].hash
= u
->data
.hash
;
2295 r
= journal_file_move_to_object(from
, OBJECT_ENTRY
, p
, &o
);
2300 return journal_file_append_entry_internal(to
, &ts
, xor_hash
, items
, n
, seqnum
, ret
, offset
);
2303 void journal_default_metrics(JournalMetrics
*m
, int fd
) {
2304 uint64_t fs_size
= 0;
2306 char a
[FORMAT_BYTES_MAX
], b
[FORMAT_BYTES_MAX
], c
[FORMAT_BYTES_MAX
], d
[FORMAT_BYTES_MAX
];
2311 if (fstatvfs(fd
, &ss
) >= 0)
2312 fs_size
= ss
.f_frsize
* ss
.f_blocks
;
2314 if (m
->max_use
== (uint64_t) -1) {
2317 m
->max_use
= PAGE_ALIGN(fs_size
/ 10); /* 10% of file system size */
2319 if (m
->max_use
> DEFAULT_MAX_USE_UPPER
)
2320 m
->max_use
= DEFAULT_MAX_USE_UPPER
;
2322 if (m
->max_use
< DEFAULT_MAX_USE_LOWER
)
2323 m
->max_use
= DEFAULT_MAX_USE_LOWER
;
2325 m
->max_use
= DEFAULT_MAX_USE_LOWER
;
2327 m
->max_use
= PAGE_ALIGN(m
->max_use
);
2329 if (m
->max_use
< JOURNAL_FILE_SIZE_MIN
*2)
2330 m
->max_use
= JOURNAL_FILE_SIZE_MIN
*2;
2333 if (m
->max_size
== (uint64_t) -1) {
2334 m
->max_size
= PAGE_ALIGN(m
->max_use
/ 8); /* 8 chunks */
2336 if (m
->max_size
> DEFAULT_MAX_SIZE_UPPER
)
2337 m
->max_size
= DEFAULT_MAX_SIZE_UPPER
;
2339 m
->max_size
= PAGE_ALIGN(m
->max_size
);
2341 if (m
->max_size
< JOURNAL_FILE_SIZE_MIN
)
2342 m
->max_size
= JOURNAL_FILE_SIZE_MIN
;
2344 if (m
->max_size
*2 > m
->max_use
)
2345 m
->max_use
= m
->max_size
*2;
2347 if (m
->min_size
== (uint64_t) -1)
2348 m
->min_size
= JOURNAL_FILE_SIZE_MIN
;
2350 m
->min_size
= PAGE_ALIGN(m
->min_size
);
2352 if (m
->min_size
< JOURNAL_FILE_SIZE_MIN
)
2353 m
->min_size
= JOURNAL_FILE_SIZE_MIN
;
2355 if (m
->min_size
> m
->max_size
)
2356 m
->max_size
= m
->min_size
;
2359 if (m
->keep_free
== (uint64_t) -1) {
2362 m
->keep_free
= PAGE_ALIGN(fs_size
/ 20); /* 5% of file system size */
2364 if (m
->keep_free
> DEFAULT_KEEP_FREE_UPPER
)
2365 m
->keep_free
= DEFAULT_KEEP_FREE_UPPER
;
2368 m
->keep_free
= DEFAULT_KEEP_FREE
;
2371 log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2372 format_bytes(a
, sizeof(a
), m
->max_use
),
2373 format_bytes(b
, sizeof(b
), m
->max_size
),
2374 format_bytes(c
, sizeof(c
), m
->min_size
),
2375 format_bytes(d
, sizeof(d
), m
->keep_free
));
2378 int journal_file_get_cutoff_realtime_usec(JournalFile
*f
, usec_t
*from
, usec_t
*to
) {
2383 if (f
->header
->head_entry_realtime
== 0)
2386 *from
= le64toh(f
->header
->head_entry_realtime
);
2390 if (f
->header
->tail_entry_realtime
== 0)
2393 *to
= le64toh(f
->header
->tail_entry_realtime
);
2399 int journal_file_get_cutoff_monotonic_usec(JournalFile
*f
, sd_id128_t boot_id
, usec_t
*from
, usec_t
*to
) {
2400 char t
[9+32+1] = "_BOOT_ID=";
2408 sd_id128_to_string(boot_id
, t
+ 9);
2410 r
= journal_file_find_data_object(f
, t
, strlen(t
), &o
, &p
);
2414 if (le64toh(o
->data
.n_entries
) <= 0)
2418 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, le64toh(o
->data
.entry_offset
), &o
);
2422 *from
= le64toh(o
->entry
.monotonic
);
2426 r
= journal_file_move_to_object(f
, OBJECT_DATA
, p
, &o
);
2430 r
= generic_array_get_plus_one(f
,
2431 le64toh(o
->data
.entry_offset
),
2432 le64toh(o
->data
.entry_array_offset
),
2433 le64toh(o
->data
.n_entries
)-1,
2438 *to
= le64toh(o
->entry
.monotonic
);
2444 bool journal_file_rotate_suggested(JournalFile
*f
) {
2447 /* If we gained new header fields we gained new features,
2448 * hence suggest a rotation */
2449 if (le64toh(f
->header
->header_size
) < sizeof(Header
)) {
2450 log_debug("%s uses an outdated header, suggesting rotation.", f
->path
);
2454 /* Let's check if the hash tables grew over a certain fill
2455 * level (75%, borrowing this value from Java's hash table
2456 * implementation), and if so suggest a rotation. To calculate
2457 * the fill level we need the n_data field, which only exists
2458 * in newer versions. */
2460 if (JOURNAL_HEADER_CONTAINS(f
->header
, n_data
))
2461 if (le64toh(f
->header
->n_data
) * 4ULL > (le64toh(f
->header
->data_hash_table_size
) / sizeof(HashItem
)) * 3ULL) {
2462 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2464 100.0 * (double) le64toh(f
->header
->n_data
) / ((double) (le64toh(f
->header
->data_hash_table_size
) / sizeof(HashItem
))),
2465 (unsigned long long) le64toh(f
->header
->n_data
),
2466 (unsigned long long) (le64toh(f
->header
->data_hash_table_size
) / sizeof(HashItem
)),
2467 (unsigned long long) (f
->last_stat
.st_size
),
2468 (unsigned long long) (f
->last_stat
.st_size
/ le64toh(f
->header
->n_data
)));
2472 if (JOURNAL_HEADER_CONTAINS(f
->header
, n_fields
))
2473 if (le64toh(f
->header
->n_fields
) * 4ULL > (le64toh(f
->header
->field_hash_table_size
) / sizeof(HashItem
)) * 3ULL) {
2474 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2476 100.0 * (double) le64toh(f
->header
->n_fields
) / ((double) (le64toh(f
->header
->field_hash_table_size
) / sizeof(HashItem
))),
2477 (unsigned long long) le64toh(f
->header
->n_fields
),
2478 (unsigned long long) (le64toh(f
->header
->field_hash_table_size
) / sizeof(HashItem
)));