]> git.ipfire.org Git - thirdparty/systemd.git/blame_incremental - src/journal/journal-file.c
Update TODO
[thirdparty/systemd.git] / src / journal / journal-file.c
... / ...
CommitLineData
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
30#include "journal-def.h"
31#include "journal-file.h"
32#include "journal-authenticate.h"
33#include "lookup3.h"
34#include "compress.h"
35#include "fsprg.h"
36
37#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
39
40#define COMPRESSION_SIZE_THRESHOLD (512ULL)
41
42/* This is the minimum journal file size */
43#define JOURNAL_FILE_SIZE_MIN (4ULL*1024ULL*1024ULL) /* 4 MiB */
44
45/* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
49
50/* This is the upper bound if we deduce max_size from max_use */
51#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
52
53/* This is the upper bound if we deduce the keep_free value from the
54 * file system size */
55#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57/* This is the keep_free value when we can't determine the system
58 * size */
59#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
60
61/* n_data was the first entry we added after the initial file format design */
62#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
63
64/* How many entries to keep in the entry array chain cache at max */
65#define CHAIN_CACHE_MAX 20
66
67/* How much to increase the journal file size at once each time we allocate something new. */
68#define FILE_SIZE_INCREASE (8ULL*1024ULL*1024ULL) /* 8MB */
69
70/* Reread fstat() of the file for detecting deletions at least this often */
71#define LAST_STAT_REFRESH_USEC (5*USEC_PER_SEC)
72
73/* The mmap context to use for the header we pick as one above the last defined typed */
74#define CONTEXT_HEADER _OBJECT_TYPE_MAX
75
76static int journal_file_set_online(JournalFile *f) {
77 assert(f);
78
79 if (!f->writable)
80 return -EPERM;
81
82 if (!(f->fd >= 0 && f->header))
83 return -EINVAL;
84
85 if (mmap_cache_got_sigbus(f->mmap, f->fd))
86 return -EIO;
87
88 switch(f->header->state) {
89 case STATE_ONLINE:
90 return 0;
91
92 case STATE_OFFLINE:
93 f->header->state = STATE_ONLINE;
94 fsync(f->fd);
95 return 0;
96
97 default:
98 return -EINVAL;
99 }
100}
101
102int journal_file_set_offline(JournalFile *f) {
103 assert(f);
104
105 if (!f->writable)
106 return -EPERM;
107
108 if (!(f->fd >= 0 && f->header))
109 return -EINVAL;
110
111 if (f->header->state != STATE_ONLINE)
112 return 0;
113
114 fsync(f->fd);
115
116 if (mmap_cache_got_sigbus(f->mmap, f->fd))
117 return -EIO;
118
119 f->header->state = STATE_OFFLINE;
120
121 if (mmap_cache_got_sigbus(f->mmap, f->fd))
122 return -EIO;
123
124 fsync(f->fd);
125
126 return 0;
127}
128
129void journal_file_close(JournalFile *f) {
130 assert(f);
131
132#ifdef HAVE_GCRYPT
133 /* Write the final tag */
134 if (f->seal && f->writable)
135 journal_file_append_tag(f);
136#endif
137
138 journal_file_set_offline(f);
139
140 if (f->mmap && f->fd >= 0)
141 mmap_cache_close_fd(f->mmap, f->fd);
142
143 safe_close(f->fd);
144 free(f->path);
145
146 if (f->mmap)
147 mmap_cache_unref(f->mmap);
148
149 ordered_hashmap_free_free(f->chain_cache);
150
151#if defined(HAVE_XZ) || defined(HAVE_LZ4)
152 free(f->compress_buffer);
153#endif
154
155#ifdef HAVE_GCRYPT
156 if (f->fss_file)
157 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
158 else if (f->fsprg_state)
159 free(f->fsprg_state);
160
161 free(f->fsprg_seed);
162
163 if (f->hmac)
164 gcry_md_close(f->hmac);
165#endif
166
167 free(f);
168}
169
170static int journal_file_init_header(JournalFile *f, JournalFile *template) {
171 Header h = {};
172 ssize_t k;
173 int r;
174
175 assert(f);
176
177 memcpy(h.signature, HEADER_SIGNATURE, 8);
178 h.header_size = htole64(ALIGN64(sizeof(h)));
179
180 h.incompatible_flags |= htole32(
181 f->compress_xz * HEADER_INCOMPATIBLE_COMPRESSED_XZ |
182 f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4);
183
184 h.compatible_flags = htole32(
185 f->seal * HEADER_COMPATIBLE_SEALED);
186
187 r = sd_id128_randomize(&h.file_id);
188 if (r < 0)
189 return r;
190
191 if (template) {
192 h.seqnum_id = template->header->seqnum_id;
193 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
194 } else
195 h.seqnum_id = h.file_id;
196
197 k = pwrite(f->fd, &h, sizeof(h), 0);
198 if (k < 0)
199 return -errno;
200
201 if (k != sizeof(h))
202 return -EIO;
203
204 return 0;
205}
206
207static int journal_file_refresh_header(JournalFile *f) {
208 sd_id128_t boot_id;
209 int r;
210
211 assert(f);
212
213 r = sd_id128_get_machine(&f->header->machine_id);
214 if (r < 0)
215 return r;
216
217 r = sd_id128_get_boot(&boot_id);
218 if (r < 0)
219 return r;
220
221 if (sd_id128_equal(boot_id, f->header->boot_id))
222 f->tail_entry_monotonic_valid = true;
223
224 f->header->boot_id = boot_id;
225
226 r = journal_file_set_online(f);
227
228 /* Sync the online state to disk */
229 fsync(f->fd);
230
231 return r;
232}
233
234static int journal_file_verify_header(JournalFile *f) {
235 uint32_t flags;
236
237 assert(f);
238
239 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
240 return -EBADMSG;
241
242 /* In both read and write mode we refuse to open files with
243 * incompatible flags we don't know */
244 flags = le32toh(f->header->incompatible_flags);
245 if (flags & ~HEADER_INCOMPATIBLE_SUPPORTED) {
246 if (flags & ~HEADER_INCOMPATIBLE_ANY)
247 log_debug("Journal file %s has unknown incompatible flags %"PRIx32,
248 f->path, flags & ~HEADER_INCOMPATIBLE_ANY);
249 flags = (flags & HEADER_INCOMPATIBLE_ANY) & ~HEADER_INCOMPATIBLE_SUPPORTED;
250 if (flags)
251 log_debug("Journal file %s uses incompatible flags %"PRIx32
252 " disabled at compilation time.", f->path, flags);
253 return -EPROTONOSUPPORT;
254 }
255
256 /* When open for writing we refuse to open files with
257 * compatible flags, too */
258 flags = le32toh(f->header->compatible_flags);
259 if (f->writable && (flags & ~HEADER_COMPATIBLE_SUPPORTED)) {
260 if (flags & ~HEADER_COMPATIBLE_ANY)
261 log_debug("Journal file %s has unknown compatible flags %"PRIx32,
262 f->path, flags & ~HEADER_COMPATIBLE_ANY);
263 flags = (flags & HEADER_COMPATIBLE_ANY) & ~HEADER_COMPATIBLE_SUPPORTED;
264 if (flags)
265 log_debug("Journal file %s uses compatible flags %"PRIx32
266 " disabled at compilation time.", f->path, flags);
267 return -EPROTONOSUPPORT;
268 }
269
270 if (f->header->state >= _STATE_MAX)
271 return -EBADMSG;
272
273 /* The first addition was n_data, so check that we are at least this large */
274 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
275 return -EBADMSG;
276
277 if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
278 return -EBADMSG;
279
280 if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
281 return -ENODATA;
282
283 if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
284 return -ENODATA;
285
286 if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
287 !VALID64(le64toh(f->header->field_hash_table_offset)) ||
288 !VALID64(le64toh(f->header->tail_object_offset)) ||
289 !VALID64(le64toh(f->header->entry_array_offset)))
290 return -ENODATA;
291
292 if (f->writable) {
293 uint8_t state;
294 sd_id128_t machine_id;
295 int r;
296
297 r = sd_id128_get_machine(&machine_id);
298 if (r < 0)
299 return r;
300
301 if (!sd_id128_equal(machine_id, f->header->machine_id))
302 return -EHOSTDOWN;
303
304 state = f->header->state;
305
306 if (state == STATE_ONLINE) {
307 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
308 return -EBUSY;
309 } else if (state == STATE_ARCHIVED)
310 return -ESHUTDOWN;
311 else if (state != STATE_OFFLINE) {
312 log_debug("Journal file %s has unknown state %u.", f->path, state);
313 return -EBUSY;
314 }
315 }
316
317 f->compress_xz = JOURNAL_HEADER_COMPRESSED_XZ(f->header);
318 f->compress_lz4 = JOURNAL_HEADER_COMPRESSED_LZ4(f->header);
319
320 f->seal = JOURNAL_HEADER_SEALED(f->header);
321
322 return 0;
323}
324
325static int journal_file_fstat(JournalFile *f) {
326 assert(f);
327 assert(f->fd >= 0);
328
329 if (fstat(f->fd, &f->last_stat) < 0)
330 return -errno;
331
332 f->last_stat_usec = now(CLOCK_MONOTONIC);
333
334 /* Refuse appending to files that are already deleted */
335 if (f->last_stat.st_nlink <= 0)
336 return -EIDRM;
337
338 return 0;
339}
340
341static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
342 uint64_t old_size, new_size;
343 int r;
344
345 assert(f);
346
347 /* We assume that this file is not sparse, and we know that
348 * for sure, since we always call posix_fallocate()
349 * ourselves */
350
351 if (mmap_cache_got_sigbus(f->mmap, f->fd))
352 return -EIO;
353
354 old_size =
355 le64toh(f->header->header_size) +
356 le64toh(f->header->arena_size);
357
358 new_size = PAGE_ALIGN(offset + size);
359 if (new_size < le64toh(f->header->header_size))
360 new_size = le64toh(f->header->header_size);
361
362 if (new_size <= old_size) {
363
364 /* We already pre-allocated enough space, but before
365 * we write to it, let's check with fstat() if the
366 * file got deleted, in order make sure we don't throw
367 * away the data immediately. Don't check fstat() for
368 * all writes though, but only once ever 10s. */
369
370 if (f->last_stat_usec + LAST_STAT_REFRESH_USEC > now(CLOCK_MONOTONIC))
371 return 0;
372
373 return journal_file_fstat(f);
374 }
375
376 /* Allocate more space. */
377
378 if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
379 return -E2BIG;
380
381 if (new_size > f->metrics.min_size && f->metrics.keep_free > 0) {
382 struct statvfs svfs;
383
384 if (fstatvfs(f->fd, &svfs) >= 0) {
385 uint64_t available;
386
387 available = svfs.f_bfree * svfs.f_bsize;
388
389 if (available >= f->metrics.keep_free)
390 available -= f->metrics.keep_free;
391 else
392 available = 0;
393
394 if (new_size - old_size > available)
395 return -E2BIG;
396 }
397 }
398
399 /* Increase by larger blocks at once */
400 new_size = ((new_size+FILE_SIZE_INCREASE-1) / FILE_SIZE_INCREASE) * FILE_SIZE_INCREASE;
401 if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
402 new_size = f->metrics.max_size;
403
404 /* Note that the glibc fallocate() fallback is very
405 inefficient, hence we try to minimize the allocation area
406 as we can. */
407 r = posix_fallocate(f->fd, old_size, new_size - old_size);
408 if (r != 0)
409 return -r;
410
411 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
412
413 return journal_file_fstat(f);
414}
415
416static unsigned type_to_context(ObjectType type) {
417 /* One context for each type, plus one catch-all for the rest */
418 assert_cc(_OBJECT_TYPE_MAX <= MMAP_CACHE_MAX_CONTEXTS);
419 assert_cc(CONTEXT_HEADER < MMAP_CACHE_MAX_CONTEXTS);
420 return type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX ? type : 0;
421}
422
423static int journal_file_move_to(JournalFile *f, ObjectType type, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
424 int r;
425
426 assert(f);
427 assert(ret);
428
429 if (size <= 0)
430 return -EINVAL;
431
432 /* Avoid SIGBUS on invalid accesses */
433 if (offset + size > (uint64_t) f->last_stat.st_size) {
434 /* Hmm, out of range? Let's refresh the fstat() data
435 * first, before we trust that check. */
436
437 r = journal_file_fstat(f);
438 if (r < 0)
439 return r;
440
441 if (offset + size > (uint64_t) f->last_stat.st_size)
442 return -EADDRNOTAVAIL;
443 }
444
445 return mmap_cache_get(f->mmap, f->fd, f->prot, type_to_context(type), keep_always, offset, size, &f->last_stat, ret);
446}
447
448static uint64_t minimum_header_size(Object *o) {
449
450 static const uint64_t table[] = {
451 [OBJECT_DATA] = sizeof(DataObject),
452 [OBJECT_FIELD] = sizeof(FieldObject),
453 [OBJECT_ENTRY] = sizeof(EntryObject),
454 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
455 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
456 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
457 [OBJECT_TAG] = sizeof(TagObject),
458 };
459
460 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
461 return sizeof(ObjectHeader);
462
463 return table[o->object.type];
464}
465
466int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset, Object **ret) {
467 int r;
468 void *t;
469 Object *o;
470 uint64_t s;
471
472 assert(f);
473 assert(ret);
474
475 /* Objects may only be located at multiple of 64 bit */
476 if (!VALID64(offset))
477 return -EFAULT;
478
479 r = journal_file_move_to(f, type, false, offset, sizeof(ObjectHeader), &t);
480 if (r < 0)
481 return r;
482
483 o = (Object*) t;
484 s = le64toh(o->object.size);
485
486 if (s < sizeof(ObjectHeader))
487 return -EBADMSG;
488
489 if (o->object.type <= OBJECT_UNUSED)
490 return -EBADMSG;
491
492 if (s < minimum_header_size(o))
493 return -EBADMSG;
494
495 if (type > OBJECT_UNUSED && o->object.type != type)
496 return -EBADMSG;
497
498 if (s > sizeof(ObjectHeader)) {
499 r = journal_file_move_to(f, type, false, offset, s, &t);
500 if (r < 0)
501 return r;
502
503 o = (Object*) t;
504 }
505
506 *ret = o;
507 return 0;
508}
509
510static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
511 uint64_t r;
512
513 assert(f);
514
515 r = le64toh(f->header->tail_entry_seqnum) + 1;
516
517 if (seqnum) {
518 /* If an external seqnum counter was passed, we update
519 * both the local and the external one, and set it to
520 * the maximum of both */
521
522 if (*seqnum + 1 > r)
523 r = *seqnum + 1;
524
525 *seqnum = r;
526 }
527
528 f->header->tail_entry_seqnum = htole64(r);
529
530 if (f->header->head_entry_seqnum == 0)
531 f->header->head_entry_seqnum = htole64(r);
532
533 return r;
534}
535
536int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, Object **ret, uint64_t *offset) {
537 int r;
538 uint64_t p;
539 Object *tail, *o;
540 void *t;
541
542 assert(f);
543 assert(type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX);
544 assert(size >= sizeof(ObjectHeader));
545 assert(offset);
546 assert(ret);
547
548 r = journal_file_set_online(f);
549 if (r < 0)
550 return r;
551
552 p = le64toh(f->header->tail_object_offset);
553 if (p == 0)
554 p = le64toh(f->header->header_size);
555 else {
556 r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &tail);
557 if (r < 0)
558 return r;
559
560 p += ALIGN64(le64toh(tail->object.size));
561 }
562
563 r = journal_file_allocate(f, p, size);
564 if (r < 0)
565 return r;
566
567 r = journal_file_move_to(f, type, false, p, size, &t);
568 if (r < 0)
569 return r;
570
571 o = (Object*) t;
572
573 zero(o->object);
574 o->object.type = type;
575 o->object.size = htole64(size);
576
577 f->header->tail_object_offset = htole64(p);
578 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
579
580 *ret = o;
581 *offset = p;
582
583 return 0;
584}
585
586static int journal_file_setup_data_hash_table(JournalFile *f) {
587 uint64_t s, p;
588 Object *o;
589 int r;
590
591 assert(f);
592
593 /* We estimate that we need 1 hash table entry per 768 of
594 journal file and we want to make sure we never get beyond
595 75% fill level. Calculate the hash table size for the
596 maximum file size based on these metrics. */
597
598 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
599 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
600 s = DEFAULT_DATA_HASH_TABLE_SIZE;
601
602 log_debug("Reserving %"PRIu64" entries in hash table.", s / sizeof(HashItem));
603
604 r = journal_file_append_object(f,
605 OBJECT_DATA_HASH_TABLE,
606 offsetof(Object, hash_table.items) + s,
607 &o, &p);
608 if (r < 0)
609 return r;
610
611 memzero(o->hash_table.items, s);
612
613 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
614 f->header->data_hash_table_size = htole64(s);
615
616 return 0;
617}
618
619static int journal_file_setup_field_hash_table(JournalFile *f) {
620 uint64_t s, p;
621 Object *o;
622 int r;
623
624 assert(f);
625
626 /* We use a fixed size hash table for the fields as this
627 * number should grow very slowly only */
628
629 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
630 r = journal_file_append_object(f,
631 OBJECT_FIELD_HASH_TABLE,
632 offsetof(Object, hash_table.items) + s,
633 &o, &p);
634 if (r < 0)
635 return r;
636
637 memzero(o->hash_table.items, s);
638
639 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
640 f->header->field_hash_table_size = htole64(s);
641
642 return 0;
643}
644
645static int journal_file_map_data_hash_table(JournalFile *f) {
646 uint64_t s, p;
647 void *t;
648 int r;
649
650 assert(f);
651
652 p = le64toh(f->header->data_hash_table_offset);
653 s = le64toh(f->header->data_hash_table_size);
654
655 r = journal_file_move_to(f,
656 OBJECT_DATA_HASH_TABLE,
657 true,
658 p, s,
659 &t);
660 if (r < 0)
661 return r;
662
663 f->data_hash_table = t;
664 return 0;
665}
666
667static int journal_file_map_field_hash_table(JournalFile *f) {
668 uint64_t s, p;
669 void *t;
670 int r;
671
672 assert(f);
673
674 p = le64toh(f->header->field_hash_table_offset);
675 s = le64toh(f->header->field_hash_table_size);
676
677 r = journal_file_move_to(f,
678 OBJECT_FIELD_HASH_TABLE,
679 true,
680 p, s,
681 &t);
682 if (r < 0)
683 return r;
684
685 f->field_hash_table = t;
686 return 0;
687}
688
689static int journal_file_link_field(
690 JournalFile *f,
691 Object *o,
692 uint64_t offset,
693 uint64_t hash) {
694
695 uint64_t p, h, m;
696 int r;
697
698 assert(f);
699 assert(o);
700 assert(offset > 0);
701
702 if (o->object.type != OBJECT_FIELD)
703 return -EINVAL;
704
705 m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
706 if (m <= 0)
707 return -EBADMSG;
708
709 /* This might alter the window we are looking at */
710 o->field.next_hash_offset = o->field.head_data_offset = 0;
711
712 h = hash % m;
713 p = le64toh(f->field_hash_table[h].tail_hash_offset);
714 if (p == 0)
715 f->field_hash_table[h].head_hash_offset = htole64(offset);
716 else {
717 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
718 if (r < 0)
719 return r;
720
721 o->field.next_hash_offset = htole64(offset);
722 }
723
724 f->field_hash_table[h].tail_hash_offset = htole64(offset);
725
726 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
727 f->header->n_fields = htole64(le64toh(f->header->n_fields) + 1);
728
729 return 0;
730}
731
732static int journal_file_link_data(
733 JournalFile *f,
734 Object *o,
735 uint64_t offset,
736 uint64_t hash) {
737
738 uint64_t p, h, m;
739 int r;
740
741 assert(f);
742 assert(o);
743 assert(offset > 0);
744
745 if (o->object.type != OBJECT_DATA)
746 return -EINVAL;
747
748 m = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
749 if (m <= 0)
750 return -EBADMSG;
751
752 /* This might alter the window we are looking at */
753 o->data.next_hash_offset = o->data.next_field_offset = 0;
754 o->data.entry_offset = o->data.entry_array_offset = 0;
755 o->data.n_entries = 0;
756
757 h = hash % m;
758 p = le64toh(f->data_hash_table[h].tail_hash_offset);
759 if (p == 0)
760 /* Only entry in the hash table is easy */
761 f->data_hash_table[h].head_hash_offset = htole64(offset);
762 else {
763 /* Move back to the previous data object, to patch in
764 * pointer */
765
766 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
767 if (r < 0)
768 return r;
769
770 o->data.next_hash_offset = htole64(offset);
771 }
772
773 f->data_hash_table[h].tail_hash_offset = htole64(offset);
774
775 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
776 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
777
778 return 0;
779}
780
781int journal_file_find_field_object_with_hash(
782 JournalFile *f,
783 const void *field, uint64_t size, uint64_t hash,
784 Object **ret, uint64_t *offset) {
785
786 uint64_t p, osize, h, m;
787 int r;
788
789 assert(f);
790 assert(field && size > 0);
791
792 osize = offsetof(Object, field.payload) + size;
793
794 m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
795
796 if (m <= 0)
797 return -EBADMSG;
798
799 h = hash % m;
800 p = le64toh(f->field_hash_table[h].head_hash_offset);
801
802 while (p > 0) {
803 Object *o;
804
805 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
806 if (r < 0)
807 return r;
808
809 if (le64toh(o->field.hash) == hash &&
810 le64toh(o->object.size) == osize &&
811 memcmp(o->field.payload, field, size) == 0) {
812
813 if (ret)
814 *ret = o;
815 if (offset)
816 *offset = p;
817
818 return 1;
819 }
820
821 p = le64toh(o->field.next_hash_offset);
822 }
823
824 return 0;
825}
826
827int journal_file_find_field_object(
828 JournalFile *f,
829 const void *field, uint64_t size,
830 Object **ret, uint64_t *offset) {
831
832 uint64_t hash;
833
834 assert(f);
835 assert(field && size > 0);
836
837 hash = hash64(field, size);
838
839 return journal_file_find_field_object_with_hash(f,
840 field, size, hash,
841 ret, offset);
842}
843
844int journal_file_find_data_object_with_hash(
845 JournalFile *f,
846 const void *data, uint64_t size, uint64_t hash,
847 Object **ret, uint64_t *offset) {
848
849 uint64_t p, osize, h, m;
850 int r;
851
852 assert(f);
853 assert(data || size == 0);
854
855 osize = offsetof(Object, data.payload) + size;
856
857 m = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
858 if (m <= 0)
859 return -EBADMSG;
860
861 h = hash % m;
862 p = le64toh(f->data_hash_table[h].head_hash_offset);
863
864 while (p > 0) {
865 Object *o;
866
867 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
868 if (r < 0)
869 return r;
870
871 if (le64toh(o->data.hash) != hash)
872 goto next;
873
874 if (o->object.flags & OBJECT_COMPRESSION_MASK) {
875#if defined(HAVE_XZ) || defined(HAVE_LZ4)
876 uint64_t l;
877 size_t rsize;
878
879 l = le64toh(o->object.size);
880 if (l <= offsetof(Object, data.payload))
881 return -EBADMSG;
882
883 l -= offsetof(Object, data.payload);
884
885 r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
886 o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize, 0);
887 if (r < 0)
888 return r;
889
890 if (rsize == size &&
891 memcmp(f->compress_buffer, data, size) == 0) {
892
893 if (ret)
894 *ret = o;
895
896 if (offset)
897 *offset = p;
898
899 return 1;
900 }
901#else
902 return -EPROTONOSUPPORT;
903#endif
904 } else if (le64toh(o->object.size) == osize &&
905 memcmp(o->data.payload, data, size) == 0) {
906
907 if (ret)
908 *ret = o;
909
910 if (offset)
911 *offset = p;
912
913 return 1;
914 }
915
916 next:
917 p = le64toh(o->data.next_hash_offset);
918 }
919
920 return 0;
921}
922
923int journal_file_find_data_object(
924 JournalFile *f,
925 const void *data, uint64_t size,
926 Object **ret, uint64_t *offset) {
927
928 uint64_t hash;
929
930 assert(f);
931 assert(data || size == 0);
932
933 hash = hash64(data, size);
934
935 return journal_file_find_data_object_with_hash(f,
936 data, size, hash,
937 ret, offset);
938}
939
940static int journal_file_append_field(
941 JournalFile *f,
942 const void *field, uint64_t size,
943 Object **ret, uint64_t *offset) {
944
945 uint64_t hash, p;
946 uint64_t osize;
947 Object *o;
948 int r;
949
950 assert(f);
951 assert(field && size > 0);
952
953 hash = hash64(field, size);
954
955 r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
956 if (r < 0)
957 return r;
958 else if (r > 0) {
959
960 if (ret)
961 *ret = o;
962
963 if (offset)
964 *offset = p;
965
966 return 0;
967 }
968
969 osize = offsetof(Object, field.payload) + size;
970 r = journal_file_append_object(f, OBJECT_FIELD, osize, &o, &p);
971 if (r < 0)
972 return r;
973
974 o->field.hash = htole64(hash);
975 memcpy(o->field.payload, field, size);
976
977 r = journal_file_link_field(f, o, p, hash);
978 if (r < 0)
979 return r;
980
981 /* The linking might have altered the window, so let's
982 * refresh our pointer */
983 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
984 if (r < 0)
985 return r;
986
987#ifdef HAVE_GCRYPT
988 r = journal_file_hmac_put_object(f, OBJECT_FIELD, o, p);
989 if (r < 0)
990 return r;
991#endif
992
993 if (ret)
994 *ret = o;
995
996 if (offset)
997 *offset = p;
998
999 return 0;
1000}
1001
1002static int journal_file_append_data(
1003 JournalFile *f,
1004 const void *data, uint64_t size,
1005 Object **ret, uint64_t *offset) {
1006
1007 uint64_t hash, p;
1008 uint64_t osize;
1009 Object *o;
1010 int r, compression = 0;
1011 const void *eq;
1012
1013 assert(f);
1014 assert(data || size == 0);
1015
1016 hash = hash64(data, size);
1017
1018 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
1019 if (r < 0)
1020 return r;
1021 else if (r > 0) {
1022
1023 if (ret)
1024 *ret = o;
1025
1026 if (offset)
1027 *offset = p;
1028
1029 return 0;
1030 }
1031
1032 osize = offsetof(Object, data.payload) + size;
1033 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
1034 if (r < 0)
1035 return r;
1036
1037 o->data.hash = htole64(hash);
1038
1039#if defined(HAVE_XZ) || defined(HAVE_LZ4)
1040 if (f->compress_xz &&
1041 size >= COMPRESSION_SIZE_THRESHOLD) {
1042 size_t rsize;
1043
1044 compression = compress_blob(data, size, o->data.payload, &rsize);
1045
1046 if (compression) {
1047 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
1048 o->object.flags |= compression;
1049
1050 log_debug("Compressed data object %"PRIu64" -> %zu using %s",
1051 size, rsize, object_compressed_to_string(compression));
1052 }
1053 }
1054#endif
1055
1056 if (!compression && size > 0)
1057 memcpy(o->data.payload, data, size);
1058
1059 r = journal_file_link_data(f, o, p, hash);
1060 if (r < 0)
1061 return r;
1062
1063 /* The linking might have altered the window, so let's
1064 * refresh our pointer */
1065 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1066 if (r < 0)
1067 return r;
1068
1069 if (!data)
1070 eq = NULL;
1071 else
1072 eq = memchr(data, '=', size);
1073 if (eq && eq > data) {
1074 Object *fo = NULL;
1075 uint64_t fp;
1076
1077 /* Create field object ... */
1078 r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp);
1079 if (r < 0)
1080 return r;
1081
1082 /* ... and link it in. */
1083 o->data.next_field_offset = fo->field.head_data_offset;
1084 fo->field.head_data_offset = le64toh(p);
1085 }
1086
1087#ifdef HAVE_GCRYPT
1088 r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
1089 if (r < 0)
1090 return r;
1091#endif
1092
1093 if (ret)
1094 *ret = o;
1095
1096 if (offset)
1097 *offset = p;
1098
1099 return 0;
1100}
1101
1102uint64_t journal_file_entry_n_items(Object *o) {
1103 assert(o);
1104
1105 if (o->object.type != OBJECT_ENTRY)
1106 return 0;
1107
1108 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
1109}
1110
1111uint64_t journal_file_entry_array_n_items(Object *o) {
1112 assert(o);
1113
1114 if (o->object.type != OBJECT_ENTRY_ARRAY)
1115 return 0;
1116
1117 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
1118}
1119
1120uint64_t journal_file_hash_table_n_items(Object *o) {
1121 assert(o);
1122
1123 if (o->object.type != OBJECT_DATA_HASH_TABLE &&
1124 o->object.type != OBJECT_FIELD_HASH_TABLE)
1125 return 0;
1126
1127 return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
1128}
1129
1130static int link_entry_into_array(JournalFile *f,
1131 le64_t *first,
1132 le64_t *idx,
1133 uint64_t p) {
1134 int r;
1135 uint64_t n = 0, ap = 0, q, i, a, hidx;
1136 Object *o;
1137
1138 assert(f);
1139 assert(first);
1140 assert(idx);
1141 assert(p > 0);
1142
1143 a = le64toh(*first);
1144 i = hidx = le64toh(*idx);
1145 while (a > 0) {
1146
1147 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1148 if (r < 0)
1149 return r;
1150
1151 n = journal_file_entry_array_n_items(o);
1152 if (i < n) {
1153 o->entry_array.items[i] = htole64(p);
1154 *idx = htole64(hidx + 1);
1155 return 0;
1156 }
1157
1158 i -= n;
1159 ap = a;
1160 a = le64toh(o->entry_array.next_entry_array_offset);
1161 }
1162
1163 if (hidx > n)
1164 n = (hidx+1) * 2;
1165 else
1166 n = n * 2;
1167
1168 if (n < 4)
1169 n = 4;
1170
1171 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
1172 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
1173 &o, &q);
1174 if (r < 0)
1175 return r;
1176
1177#ifdef HAVE_GCRYPT
1178 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
1179 if (r < 0)
1180 return r;
1181#endif
1182
1183 o->entry_array.items[i] = htole64(p);
1184
1185 if (ap == 0)
1186 *first = htole64(q);
1187 else {
1188 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
1189 if (r < 0)
1190 return r;
1191
1192 o->entry_array.next_entry_array_offset = htole64(q);
1193 }
1194
1195 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1196 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
1197
1198 *idx = htole64(hidx + 1);
1199
1200 return 0;
1201}
1202
1203static int link_entry_into_array_plus_one(JournalFile *f,
1204 le64_t *extra,
1205 le64_t *first,
1206 le64_t *idx,
1207 uint64_t p) {
1208
1209 int r;
1210
1211 assert(f);
1212 assert(extra);
1213 assert(first);
1214 assert(idx);
1215 assert(p > 0);
1216
1217 if (*idx == 0)
1218 *extra = htole64(p);
1219 else {
1220 le64_t i;
1221
1222 i = htole64(le64toh(*idx) - 1);
1223 r = link_entry_into_array(f, first, &i, p);
1224 if (r < 0)
1225 return r;
1226 }
1227
1228 *idx = htole64(le64toh(*idx) + 1);
1229 return 0;
1230}
1231
1232static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
1233 uint64_t p;
1234 int r;
1235 assert(f);
1236 assert(o);
1237 assert(offset > 0);
1238
1239 p = le64toh(o->entry.items[i].object_offset);
1240 if (p == 0)
1241 return -EINVAL;
1242
1243 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1244 if (r < 0)
1245 return r;
1246
1247 return link_entry_into_array_plus_one(f,
1248 &o->data.entry_offset,
1249 &o->data.entry_array_offset,
1250 &o->data.n_entries,
1251 offset);
1252}
1253
1254static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
1255 uint64_t n, i;
1256 int r;
1257
1258 assert(f);
1259 assert(o);
1260 assert(offset > 0);
1261
1262 if (o->object.type != OBJECT_ENTRY)
1263 return -EINVAL;
1264
1265 __sync_synchronize();
1266
1267 /* Link up the entry itself */
1268 r = link_entry_into_array(f,
1269 &f->header->entry_array_offset,
1270 &f->header->n_entries,
1271 offset);
1272 if (r < 0)
1273 return r;
1274
1275 /* log_debug("=> %s seqnr=%"PRIu64" n_entries=%"PRIu64, f->path, o->entry.seqnum, f->header->n_entries); */
1276
1277 if (f->header->head_entry_realtime == 0)
1278 f->header->head_entry_realtime = o->entry.realtime;
1279
1280 f->header->tail_entry_realtime = o->entry.realtime;
1281 f->header->tail_entry_monotonic = o->entry.monotonic;
1282
1283 f->tail_entry_monotonic_valid = true;
1284
1285 /* Link up the items */
1286 n = journal_file_entry_n_items(o);
1287 for (i = 0; i < n; i++) {
1288 r = journal_file_link_entry_item(f, o, offset, i);
1289 if (r < 0)
1290 return r;
1291 }
1292
1293 return 0;
1294}
1295
1296static int journal_file_append_entry_internal(
1297 JournalFile *f,
1298 const dual_timestamp *ts,
1299 uint64_t xor_hash,
1300 const EntryItem items[], unsigned n_items,
1301 uint64_t *seqnum,
1302 Object **ret, uint64_t *offset) {
1303 uint64_t np;
1304 uint64_t osize;
1305 Object *o;
1306 int r;
1307
1308 assert(f);
1309 assert(items || n_items == 0);
1310 assert(ts);
1311
1312 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1313
1314 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
1315 if (r < 0)
1316 return r;
1317
1318 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
1319 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
1320 o->entry.realtime = htole64(ts->realtime);
1321 o->entry.monotonic = htole64(ts->monotonic);
1322 o->entry.xor_hash = htole64(xor_hash);
1323 o->entry.boot_id = f->header->boot_id;
1324
1325#ifdef HAVE_GCRYPT
1326 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
1327 if (r < 0)
1328 return r;
1329#endif
1330
1331 r = journal_file_link_entry(f, o, np);
1332 if (r < 0)
1333 return r;
1334
1335 if (ret)
1336 *ret = o;
1337
1338 if (offset)
1339 *offset = np;
1340
1341 return 0;
1342}
1343
1344void journal_file_post_change(JournalFile *f) {
1345 assert(f);
1346
1347 /* inotify() does not receive IN_MODIFY events from file
1348 * accesses done via mmap(). After each access we hence
1349 * trigger IN_MODIFY by truncating the journal file to its
1350 * current size which triggers IN_MODIFY. */
1351
1352 __sync_synchronize();
1353
1354 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1355 log_error_errno(errno, "Failed to truncate file to its own size: %m");
1356}
1357
1358static int entry_item_cmp(const void *_a, const void *_b) {
1359 const EntryItem *a = _a, *b = _b;
1360
1361 if (le64toh(a->object_offset) < le64toh(b->object_offset))
1362 return -1;
1363 if (le64toh(a->object_offset) > le64toh(b->object_offset))
1364 return 1;
1365 return 0;
1366}
1367
1368int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1369 unsigned i;
1370 EntryItem *items;
1371 int r;
1372 uint64_t xor_hash = 0;
1373 struct dual_timestamp _ts;
1374
1375 assert(f);
1376 assert(iovec || n_iovec == 0);
1377
1378 if (!ts) {
1379 dual_timestamp_get(&_ts);
1380 ts = &_ts;
1381 }
1382
1383 if (f->tail_entry_monotonic_valid &&
1384 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1385 return -EINVAL;
1386
1387#ifdef HAVE_GCRYPT
1388 r = journal_file_maybe_append_tag(f, ts->realtime);
1389 if (r < 0)
1390 return r;
1391#endif
1392
1393 /* alloca() can't take 0, hence let's allocate at least one */
1394 items = alloca(sizeof(EntryItem) * MAX(1u, n_iovec));
1395
1396 for (i = 0; i < n_iovec; i++) {
1397 uint64_t p;
1398 Object *o;
1399
1400 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1401 if (r < 0)
1402 return r;
1403
1404 xor_hash ^= le64toh(o->data.hash);
1405 items[i].object_offset = htole64(p);
1406 items[i].hash = o->data.hash;
1407 }
1408
1409 /* Order by the position on disk, in order to improve seek
1410 * times for rotating media. */
1411 qsort_safe(items, n_iovec, sizeof(EntryItem), entry_item_cmp);
1412
1413 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1414
1415 /* If the memory mapping triggered a SIGBUS then we return an
1416 * IO error and ignore the error code passed down to us, since
1417 * it is very likely just an effect of a nullified replacement
1418 * mapping page */
1419
1420 if (mmap_cache_got_sigbus(f->mmap, f->fd))
1421 r = -EIO;
1422
1423 journal_file_post_change(f);
1424
1425 return r;
1426}
1427
1428typedef struct ChainCacheItem {
1429 uint64_t first; /* the array at the beginning of the chain */
1430 uint64_t array; /* the cached array */
1431 uint64_t begin; /* the first item in the cached array */
1432 uint64_t total; /* the total number of items in all arrays before this one in the chain */
1433 uint64_t last_index; /* the last index we looked at, to optimize locality when bisecting */
1434} ChainCacheItem;
1435
1436static void chain_cache_put(
1437 OrderedHashmap *h,
1438 ChainCacheItem *ci,
1439 uint64_t first,
1440 uint64_t array,
1441 uint64_t begin,
1442 uint64_t total,
1443 uint64_t last_index) {
1444
1445 if (!ci) {
1446 /* If the chain item to cache for this chain is the
1447 * first one it's not worth caching anything */
1448 if (array == first)
1449 return;
1450
1451 if (ordered_hashmap_size(h) >= CHAIN_CACHE_MAX) {
1452 ci = ordered_hashmap_steal_first(h);
1453 assert(ci);
1454 } else {
1455 ci = new(ChainCacheItem, 1);
1456 if (!ci)
1457 return;
1458 }
1459
1460 ci->first = first;
1461
1462 if (ordered_hashmap_put(h, &ci->first, ci) < 0) {
1463 free(ci);
1464 return;
1465 }
1466 } else
1467 assert(ci->first == first);
1468
1469 ci->array = array;
1470 ci->begin = begin;
1471 ci->total = total;
1472 ci->last_index = last_index;
1473}
1474
1475static int generic_array_get(
1476 JournalFile *f,
1477 uint64_t first,
1478 uint64_t i,
1479 Object **ret, uint64_t *offset) {
1480
1481 Object *o;
1482 uint64_t p = 0, a, t = 0;
1483 int r;
1484 ChainCacheItem *ci;
1485
1486 assert(f);
1487
1488 a = first;
1489
1490 /* Try the chain cache first */
1491 ci = ordered_hashmap_get(f->chain_cache, &first);
1492 if (ci && i > ci->total) {
1493 a = ci->array;
1494 i -= ci->total;
1495 t = ci->total;
1496 }
1497
1498 while (a > 0) {
1499 uint64_t k;
1500
1501 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1502 if (r < 0)
1503 return r;
1504
1505 k = journal_file_entry_array_n_items(o);
1506 if (i < k) {
1507 p = le64toh(o->entry_array.items[i]);
1508 goto found;
1509 }
1510
1511 i -= k;
1512 t += k;
1513 a = le64toh(o->entry_array.next_entry_array_offset);
1514 }
1515
1516 return 0;
1517
1518found:
1519 /* Let's cache this item for the next invocation */
1520 chain_cache_put(f->chain_cache, ci, first, a, le64toh(o->entry_array.items[0]), t, i);
1521
1522 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1523 if (r < 0)
1524 return r;
1525
1526 if (ret)
1527 *ret = o;
1528
1529 if (offset)
1530 *offset = p;
1531
1532 return 1;
1533}
1534
1535static int generic_array_get_plus_one(
1536 JournalFile *f,
1537 uint64_t extra,
1538 uint64_t first,
1539 uint64_t i,
1540 Object **ret, uint64_t *offset) {
1541
1542 Object *o;
1543
1544 assert(f);
1545
1546 if (i == 0) {
1547 int r;
1548
1549 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1550 if (r < 0)
1551 return r;
1552
1553 if (ret)
1554 *ret = o;
1555
1556 if (offset)
1557 *offset = extra;
1558
1559 return 1;
1560 }
1561
1562 return generic_array_get(f, first, i-1, ret, offset);
1563}
1564
1565enum {
1566 TEST_FOUND,
1567 TEST_LEFT,
1568 TEST_RIGHT
1569};
1570
1571static int generic_array_bisect(
1572 JournalFile *f,
1573 uint64_t first,
1574 uint64_t n,
1575 uint64_t needle,
1576 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1577 direction_t direction,
1578 Object **ret,
1579 uint64_t *offset,
1580 uint64_t *idx) {
1581
1582 uint64_t a, p, t = 0, i = 0, last_p = 0, last_index = (uint64_t) -1;
1583 bool subtract_one = false;
1584 Object *o, *array = NULL;
1585 int r;
1586 ChainCacheItem *ci;
1587
1588 assert(f);
1589 assert(test_object);
1590
1591 /* Start with the first array in the chain */
1592 a = first;
1593
1594 ci = ordered_hashmap_get(f->chain_cache, &first);
1595 if (ci && n > ci->total) {
1596 /* Ah, we have iterated this bisection array chain
1597 * previously! Let's see if we can skip ahead in the
1598 * chain, as far as the last time. But we can't jump
1599 * backwards in the chain, so let's check that
1600 * first. */
1601
1602 r = test_object(f, ci->begin, needle);
1603 if (r < 0)
1604 return r;
1605
1606 if (r == TEST_LEFT) {
1607 /* OK, what we are looking for is right of the
1608 * begin of this EntryArray, so let's jump
1609 * straight to previously cached array in the
1610 * chain */
1611
1612 a = ci->array;
1613 n -= ci->total;
1614 t = ci->total;
1615 last_index = ci->last_index;
1616 }
1617 }
1618
1619 while (a > 0) {
1620 uint64_t left, right, k, lp;
1621
1622 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1623 if (r < 0)
1624 return r;
1625
1626 k = journal_file_entry_array_n_items(array);
1627 right = MIN(k, n);
1628 if (right <= 0)
1629 return 0;
1630
1631 i = right - 1;
1632 lp = p = le64toh(array->entry_array.items[i]);
1633 if (p <= 0)
1634 return -EBADMSG;
1635
1636 r = test_object(f, p, needle);
1637 if (r < 0)
1638 return r;
1639
1640 if (r == TEST_FOUND)
1641 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1642
1643 if (r == TEST_RIGHT) {
1644 left = 0;
1645 right -= 1;
1646
1647 if (last_index != (uint64_t) -1) {
1648 assert(last_index <= right);
1649
1650 /* If we cached the last index we
1651 * looked at, let's try to not to jump
1652 * too wildly around and see if we can
1653 * limit the range to look at early to
1654 * the immediate neighbors of the last
1655 * index we looked at. */
1656
1657 if (last_index > 0) {
1658 uint64_t x = last_index - 1;
1659
1660 p = le64toh(array->entry_array.items[x]);
1661 if (p <= 0)
1662 return -EBADMSG;
1663
1664 r = test_object(f, p, needle);
1665 if (r < 0)
1666 return r;
1667
1668 if (r == TEST_FOUND)
1669 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1670
1671 if (r == TEST_RIGHT)
1672 right = x;
1673 else
1674 left = x + 1;
1675 }
1676
1677 if (last_index < right) {
1678 uint64_t y = last_index + 1;
1679
1680 p = le64toh(array->entry_array.items[y]);
1681 if (p <= 0)
1682 return -EBADMSG;
1683
1684 r = test_object(f, p, needle);
1685 if (r < 0)
1686 return r;
1687
1688 if (r == TEST_FOUND)
1689 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1690
1691 if (r == TEST_RIGHT)
1692 right = y;
1693 else
1694 left = y + 1;
1695 }
1696 }
1697
1698 for (;;) {
1699 if (left == right) {
1700 if (direction == DIRECTION_UP)
1701 subtract_one = true;
1702
1703 i = left;
1704 goto found;
1705 }
1706
1707 assert(left < right);
1708 i = (left + right) / 2;
1709
1710 p = le64toh(array->entry_array.items[i]);
1711 if (p <= 0)
1712 return -EBADMSG;
1713
1714 r = test_object(f, p, needle);
1715 if (r < 0)
1716 return r;
1717
1718 if (r == TEST_FOUND)
1719 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1720
1721 if (r == TEST_RIGHT)
1722 right = i;
1723 else
1724 left = i + 1;
1725 }
1726 }
1727
1728 if (k >= n) {
1729 if (direction == DIRECTION_UP) {
1730 i = n;
1731 subtract_one = true;
1732 goto found;
1733 }
1734
1735 return 0;
1736 }
1737
1738 last_p = lp;
1739
1740 n -= k;
1741 t += k;
1742 last_index = (uint64_t) -1;
1743 a = le64toh(array->entry_array.next_entry_array_offset);
1744 }
1745
1746 return 0;
1747
1748found:
1749 if (subtract_one && t == 0 && i == 0)
1750 return 0;
1751
1752 /* Let's cache this item for the next invocation */
1753 chain_cache_put(f->chain_cache, ci, first, a, le64toh(array->entry_array.items[0]), t, subtract_one ? (i > 0 ? i-1 : (uint64_t) -1) : i);
1754
1755 if (subtract_one && i == 0)
1756 p = last_p;
1757 else if (subtract_one)
1758 p = le64toh(array->entry_array.items[i-1]);
1759 else
1760 p = le64toh(array->entry_array.items[i]);
1761
1762 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1763 if (r < 0)
1764 return r;
1765
1766 if (ret)
1767 *ret = o;
1768
1769 if (offset)
1770 *offset = p;
1771
1772 if (idx)
1773 *idx = t + i + (subtract_one ? -1 : 0);
1774
1775 return 1;
1776}
1777
1778static int generic_array_bisect_plus_one(
1779 JournalFile *f,
1780 uint64_t extra,
1781 uint64_t first,
1782 uint64_t n,
1783 uint64_t needle,
1784 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1785 direction_t direction,
1786 Object **ret,
1787 uint64_t *offset,
1788 uint64_t *idx) {
1789
1790 int r;
1791 bool step_back = false;
1792 Object *o;
1793
1794 assert(f);
1795 assert(test_object);
1796
1797 if (n <= 0)
1798 return 0;
1799
1800 /* This bisects the array in object 'first', but first checks
1801 * an extra */
1802 r = test_object(f, extra, needle);
1803 if (r < 0)
1804 return r;
1805
1806 if (r == TEST_FOUND)
1807 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1808
1809 /* if we are looking with DIRECTION_UP then we need to first
1810 see if in the actual array there is a matching entry, and
1811 return the last one of that. But if there isn't any we need
1812 to return this one. Hence remember this, and return it
1813 below. */
1814 if (r == TEST_LEFT)
1815 step_back = direction == DIRECTION_UP;
1816
1817 if (r == TEST_RIGHT) {
1818 if (direction == DIRECTION_DOWN)
1819 goto found;
1820 else
1821 return 0;
1822 }
1823
1824 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1825
1826 if (r == 0 && step_back)
1827 goto found;
1828
1829 if (r > 0 && idx)
1830 (*idx) ++;
1831
1832 return r;
1833
1834found:
1835 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1836 if (r < 0)
1837 return r;
1838
1839 if (ret)
1840 *ret = o;
1841
1842 if (offset)
1843 *offset = extra;
1844
1845 if (idx)
1846 *idx = 0;
1847
1848 return 1;
1849}
1850
1851_pure_ static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1852 assert(f);
1853 assert(p > 0);
1854
1855 if (p == needle)
1856 return TEST_FOUND;
1857 else if (p < needle)
1858 return TEST_LEFT;
1859 else
1860 return TEST_RIGHT;
1861}
1862
1863static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1864 Object *o;
1865 int r;
1866
1867 assert(f);
1868 assert(p > 0);
1869
1870 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1871 if (r < 0)
1872 return r;
1873
1874 if (le64toh(o->entry.seqnum) == needle)
1875 return TEST_FOUND;
1876 else if (le64toh(o->entry.seqnum) < needle)
1877 return TEST_LEFT;
1878 else
1879 return TEST_RIGHT;
1880}
1881
1882int journal_file_move_to_entry_by_seqnum(
1883 JournalFile *f,
1884 uint64_t seqnum,
1885 direction_t direction,
1886 Object **ret,
1887 uint64_t *offset) {
1888
1889 return generic_array_bisect(f,
1890 le64toh(f->header->entry_array_offset),
1891 le64toh(f->header->n_entries),
1892 seqnum,
1893 test_object_seqnum,
1894 direction,
1895 ret, offset, NULL);
1896}
1897
1898static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1899 Object *o;
1900 int r;
1901
1902 assert(f);
1903 assert(p > 0);
1904
1905 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1906 if (r < 0)
1907 return r;
1908
1909 if (le64toh(o->entry.realtime) == needle)
1910 return TEST_FOUND;
1911 else if (le64toh(o->entry.realtime) < needle)
1912 return TEST_LEFT;
1913 else
1914 return TEST_RIGHT;
1915}
1916
1917int journal_file_move_to_entry_by_realtime(
1918 JournalFile *f,
1919 uint64_t realtime,
1920 direction_t direction,
1921 Object **ret,
1922 uint64_t *offset) {
1923
1924 return generic_array_bisect(f,
1925 le64toh(f->header->entry_array_offset),
1926 le64toh(f->header->n_entries),
1927 realtime,
1928 test_object_realtime,
1929 direction,
1930 ret, offset, NULL);
1931}
1932
1933static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1934 Object *o;
1935 int r;
1936
1937 assert(f);
1938 assert(p > 0);
1939
1940 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1941 if (r < 0)
1942 return r;
1943
1944 if (le64toh(o->entry.monotonic) == needle)
1945 return TEST_FOUND;
1946 else if (le64toh(o->entry.monotonic) < needle)
1947 return TEST_LEFT;
1948 else
1949 return TEST_RIGHT;
1950}
1951
1952static inline int find_data_object_by_boot_id(
1953 JournalFile *f,
1954 sd_id128_t boot_id,
1955 Object **o,
1956 uint64_t *b) {
1957 char t[sizeof("_BOOT_ID=")-1 + 32 + 1] = "_BOOT_ID=";
1958
1959 sd_id128_to_string(boot_id, t + 9);
1960 return journal_file_find_data_object(f, t, sizeof(t) - 1, o, b);
1961}
1962
1963int journal_file_move_to_entry_by_monotonic(
1964 JournalFile *f,
1965 sd_id128_t boot_id,
1966 uint64_t monotonic,
1967 direction_t direction,
1968 Object **ret,
1969 uint64_t *offset) {
1970
1971 Object *o;
1972 int r;
1973
1974 assert(f);
1975
1976 r = find_data_object_by_boot_id(f, boot_id, &o, NULL);
1977 if (r < 0)
1978 return r;
1979 if (r == 0)
1980 return -ENOENT;
1981
1982 return generic_array_bisect_plus_one(f,
1983 le64toh(o->data.entry_offset),
1984 le64toh(o->data.entry_array_offset),
1985 le64toh(o->data.n_entries),
1986 monotonic,
1987 test_object_monotonic,
1988 direction,
1989 ret, offset, NULL);
1990}
1991
1992void journal_file_reset_location(JournalFile *f) {
1993 f->location_type = LOCATION_HEAD;
1994 f->current_offset = 0;
1995 f->current_seqnum = 0;
1996 f->current_realtime = 0;
1997 f->current_monotonic = 0;
1998 zero(f->current_boot_id);
1999 f->current_xor_hash = 0;
2000}
2001
2002void journal_file_save_location(JournalFile *f, direction_t direction, Object *o, uint64_t offset) {
2003 f->last_direction = direction;
2004 f->location_type = LOCATION_SEEK;
2005 f->current_offset = offset;
2006 f->current_seqnum = le64toh(o->entry.seqnum);
2007 f->current_realtime = le64toh(o->entry.realtime);
2008 f->current_monotonic = le64toh(o->entry.monotonic);
2009 f->current_boot_id = o->entry.boot_id;
2010 f->current_xor_hash = le64toh(o->entry.xor_hash);
2011}
2012
2013int journal_file_compare_locations(JournalFile *af, JournalFile *bf) {
2014 assert(af);
2015 assert(bf);
2016 assert(af->location_type == LOCATION_SEEK);
2017 assert(bf->location_type == LOCATION_SEEK);
2018
2019 /* If contents and timestamps match, these entries are
2020 * identical, even if the seqnum does not match */
2021 if (sd_id128_equal(af->current_boot_id, bf->current_boot_id) &&
2022 af->current_monotonic == bf->current_monotonic &&
2023 af->current_realtime == bf->current_realtime &&
2024 af->current_xor_hash == bf->current_xor_hash)
2025 return 0;
2026
2027 if (sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id)) {
2028
2029 /* If this is from the same seqnum source, compare
2030 * seqnums */
2031 if (af->current_seqnum < bf->current_seqnum)
2032 return -1;
2033 if (af->current_seqnum > bf->current_seqnum)
2034 return 1;
2035
2036 /* Wow! This is weird, different data but the same
2037 * seqnums? Something is borked, but let's make the
2038 * best of it and compare by time. */
2039 }
2040
2041 if (sd_id128_equal(af->current_boot_id, bf->current_boot_id)) {
2042
2043 /* If the boot id matches, compare monotonic time */
2044 if (af->current_monotonic < bf->current_monotonic)
2045 return -1;
2046 if (af->current_monotonic > bf->current_monotonic)
2047 return 1;
2048 }
2049
2050 /* Otherwise, compare UTC time */
2051 if (af->current_realtime < bf->current_realtime)
2052 return -1;
2053 if (af->current_realtime > bf->current_realtime)
2054 return 1;
2055
2056 /* Finally, compare by contents */
2057 if (af->current_xor_hash < bf->current_xor_hash)
2058 return -1;
2059 if (af->current_xor_hash > bf->current_xor_hash)
2060 return 1;
2061
2062 return 0;
2063}
2064
2065int journal_file_next_entry(
2066 JournalFile *f,
2067 uint64_t p,
2068 direction_t direction,
2069 Object **ret, uint64_t *offset) {
2070
2071 uint64_t i, n, ofs;
2072 int r;
2073
2074 assert(f);
2075
2076 n = le64toh(f->header->n_entries);
2077 if (n <= 0)
2078 return 0;
2079
2080 if (p == 0)
2081 i = direction == DIRECTION_DOWN ? 0 : n - 1;
2082 else {
2083 r = generic_array_bisect(f,
2084 le64toh(f->header->entry_array_offset),
2085 le64toh(f->header->n_entries),
2086 p,
2087 test_object_offset,
2088 DIRECTION_DOWN,
2089 NULL, NULL,
2090 &i);
2091 if (r <= 0)
2092 return r;
2093
2094 if (direction == DIRECTION_DOWN) {
2095 if (i >= n - 1)
2096 return 0;
2097
2098 i++;
2099 } else {
2100 if (i <= 0)
2101 return 0;
2102
2103 i--;
2104 }
2105 }
2106
2107 /* And jump to it */
2108 r = generic_array_get(f,
2109 le64toh(f->header->entry_array_offset),
2110 i,
2111 ret, &ofs);
2112 if (r <= 0)
2113 return r;
2114
2115 if (p > 0 &&
2116 (direction == DIRECTION_DOWN ? ofs <= p : ofs >= p)) {
2117 log_debug("%s: entry array corrupted at entry %"PRIu64,
2118 f->path, i);
2119 return -EBADMSG;
2120 }
2121
2122 if (offset)
2123 *offset = ofs;
2124
2125 return 1;
2126}
2127
2128int journal_file_next_entry_for_data(
2129 JournalFile *f,
2130 Object *o, uint64_t p,
2131 uint64_t data_offset,
2132 direction_t direction,
2133 Object **ret, uint64_t *offset) {
2134
2135 uint64_t n, i;
2136 int r;
2137 Object *d;
2138
2139 assert(f);
2140 assert(p > 0 || !o);
2141
2142 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2143 if (r < 0)
2144 return r;
2145
2146 n = le64toh(d->data.n_entries);
2147 if (n <= 0)
2148 return n;
2149
2150 if (!o)
2151 i = direction == DIRECTION_DOWN ? 0 : n - 1;
2152 else {
2153 if (o->object.type != OBJECT_ENTRY)
2154 return -EINVAL;
2155
2156 r = generic_array_bisect_plus_one(f,
2157 le64toh(d->data.entry_offset),
2158 le64toh(d->data.entry_array_offset),
2159 le64toh(d->data.n_entries),
2160 p,
2161 test_object_offset,
2162 DIRECTION_DOWN,
2163 NULL, NULL,
2164 &i);
2165
2166 if (r <= 0)
2167 return r;
2168
2169 if (direction == DIRECTION_DOWN) {
2170 if (i >= n - 1)
2171 return 0;
2172
2173 i++;
2174 } else {
2175 if (i <= 0)
2176 return 0;
2177
2178 i--;
2179 }
2180
2181 }
2182
2183 return generic_array_get_plus_one(f,
2184 le64toh(d->data.entry_offset),
2185 le64toh(d->data.entry_array_offset),
2186 i,
2187 ret, offset);
2188}
2189
2190int journal_file_move_to_entry_by_offset_for_data(
2191 JournalFile *f,
2192 uint64_t data_offset,
2193 uint64_t p,
2194 direction_t direction,
2195 Object **ret, uint64_t *offset) {
2196
2197 int r;
2198 Object *d;
2199
2200 assert(f);
2201
2202 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2203 if (r < 0)
2204 return r;
2205
2206 return generic_array_bisect_plus_one(f,
2207 le64toh(d->data.entry_offset),
2208 le64toh(d->data.entry_array_offset),
2209 le64toh(d->data.n_entries),
2210 p,
2211 test_object_offset,
2212 direction,
2213 ret, offset, NULL);
2214}
2215
2216int journal_file_move_to_entry_by_monotonic_for_data(
2217 JournalFile *f,
2218 uint64_t data_offset,
2219 sd_id128_t boot_id,
2220 uint64_t monotonic,
2221 direction_t direction,
2222 Object **ret, uint64_t *offset) {
2223
2224 Object *o, *d;
2225 int r;
2226 uint64_t b, z;
2227
2228 assert(f);
2229
2230 /* First, seek by time */
2231 r = find_data_object_by_boot_id(f, boot_id, &o, &b);
2232 if (r < 0)
2233 return r;
2234 if (r == 0)
2235 return -ENOENT;
2236
2237 r = generic_array_bisect_plus_one(f,
2238 le64toh(o->data.entry_offset),
2239 le64toh(o->data.entry_array_offset),
2240 le64toh(o->data.n_entries),
2241 monotonic,
2242 test_object_monotonic,
2243 direction,
2244 NULL, &z, NULL);
2245 if (r <= 0)
2246 return r;
2247
2248 /* And now, continue seeking until we find an entry that
2249 * exists in both bisection arrays */
2250
2251 for (;;) {
2252 Object *qo;
2253 uint64_t p, q;
2254
2255 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2256 if (r < 0)
2257 return r;
2258
2259 r = generic_array_bisect_plus_one(f,
2260 le64toh(d->data.entry_offset),
2261 le64toh(d->data.entry_array_offset),
2262 le64toh(d->data.n_entries),
2263 z,
2264 test_object_offset,
2265 direction,
2266 NULL, &p, NULL);
2267 if (r <= 0)
2268 return r;
2269
2270 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
2271 if (r < 0)
2272 return r;
2273
2274 r = generic_array_bisect_plus_one(f,
2275 le64toh(o->data.entry_offset),
2276 le64toh(o->data.entry_array_offset),
2277 le64toh(o->data.n_entries),
2278 p,
2279 test_object_offset,
2280 direction,
2281 &qo, &q, NULL);
2282
2283 if (r <= 0)
2284 return r;
2285
2286 if (p == q) {
2287 if (ret)
2288 *ret = qo;
2289 if (offset)
2290 *offset = q;
2291
2292 return 1;
2293 }
2294
2295 z = q;
2296 }
2297}
2298
2299int journal_file_move_to_entry_by_seqnum_for_data(
2300 JournalFile *f,
2301 uint64_t data_offset,
2302 uint64_t seqnum,
2303 direction_t direction,
2304 Object **ret, uint64_t *offset) {
2305
2306 Object *d;
2307 int r;
2308
2309 assert(f);
2310
2311 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2312 if (r < 0)
2313 return r;
2314
2315 return generic_array_bisect_plus_one(f,
2316 le64toh(d->data.entry_offset),
2317 le64toh(d->data.entry_array_offset),
2318 le64toh(d->data.n_entries),
2319 seqnum,
2320 test_object_seqnum,
2321 direction,
2322 ret, offset, NULL);
2323}
2324
2325int journal_file_move_to_entry_by_realtime_for_data(
2326 JournalFile *f,
2327 uint64_t data_offset,
2328 uint64_t realtime,
2329 direction_t direction,
2330 Object **ret, uint64_t *offset) {
2331
2332 Object *d;
2333 int r;
2334
2335 assert(f);
2336
2337 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2338 if (r < 0)
2339 return r;
2340
2341 return generic_array_bisect_plus_one(f,
2342 le64toh(d->data.entry_offset),
2343 le64toh(d->data.entry_array_offset),
2344 le64toh(d->data.n_entries),
2345 realtime,
2346 test_object_realtime,
2347 direction,
2348 ret, offset, NULL);
2349}
2350
2351void journal_file_dump(JournalFile *f) {
2352 Object *o;
2353 int r;
2354 uint64_t p;
2355
2356 assert(f);
2357
2358 journal_file_print_header(f);
2359
2360 p = le64toh(f->header->header_size);
2361 while (p != 0) {
2362 r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o);
2363 if (r < 0)
2364 goto fail;
2365
2366 switch (o->object.type) {
2367
2368 case OBJECT_UNUSED:
2369 printf("Type: OBJECT_UNUSED\n");
2370 break;
2371
2372 case OBJECT_DATA:
2373 printf("Type: OBJECT_DATA\n");
2374 break;
2375
2376 case OBJECT_FIELD:
2377 printf("Type: OBJECT_FIELD\n");
2378 break;
2379
2380 case OBJECT_ENTRY:
2381 printf("Type: OBJECT_ENTRY seqnum=%"PRIu64" monotonic=%"PRIu64" realtime=%"PRIu64"\n",
2382 le64toh(o->entry.seqnum),
2383 le64toh(o->entry.monotonic),
2384 le64toh(o->entry.realtime));
2385 break;
2386
2387 case OBJECT_FIELD_HASH_TABLE:
2388 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
2389 break;
2390
2391 case OBJECT_DATA_HASH_TABLE:
2392 printf("Type: OBJECT_DATA_HASH_TABLE\n");
2393 break;
2394
2395 case OBJECT_ENTRY_ARRAY:
2396 printf("Type: OBJECT_ENTRY_ARRAY\n");
2397 break;
2398
2399 case OBJECT_TAG:
2400 printf("Type: OBJECT_TAG seqnum=%"PRIu64" epoch=%"PRIu64"\n",
2401 le64toh(o->tag.seqnum),
2402 le64toh(o->tag.epoch));
2403 break;
2404
2405 default:
2406 printf("Type: unknown (%u)\n", o->object.type);
2407 break;
2408 }
2409
2410 if (o->object.flags & OBJECT_COMPRESSION_MASK)
2411 printf("Flags: %s\n",
2412 object_compressed_to_string(o->object.flags & OBJECT_COMPRESSION_MASK));
2413
2414 if (p == le64toh(f->header->tail_object_offset))
2415 p = 0;
2416 else
2417 p = p + ALIGN64(le64toh(o->object.size));
2418 }
2419
2420 return;
2421fail:
2422 log_error("File corrupt");
2423}
2424
2425static const char* format_timestamp_safe(char *buf, size_t l, usec_t t) {
2426 const char *x;
2427
2428 x = format_timestamp(buf, l, t);
2429 if (x)
2430 return x;
2431 return " --- ";
2432}
2433
2434void journal_file_print_header(JournalFile *f) {
2435 char a[33], b[33], c[33], d[33];
2436 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX], z[FORMAT_TIMESTAMP_MAX];
2437 struct stat st;
2438 char bytes[FORMAT_BYTES_MAX];
2439
2440 assert(f);
2441
2442 printf("File Path: %s\n"
2443 "File ID: %s\n"
2444 "Machine ID: %s\n"
2445 "Boot ID: %s\n"
2446 "Sequential Number ID: %s\n"
2447 "State: %s\n"
2448 "Compatible Flags:%s%s\n"
2449 "Incompatible Flags:%s%s%s\n"
2450 "Header size: %"PRIu64"\n"
2451 "Arena size: %"PRIu64"\n"
2452 "Data Hash Table Size: %"PRIu64"\n"
2453 "Field Hash Table Size: %"PRIu64"\n"
2454 "Rotate Suggested: %s\n"
2455 "Head Sequential Number: %"PRIu64"\n"
2456 "Tail Sequential Number: %"PRIu64"\n"
2457 "Head Realtime Timestamp: %s\n"
2458 "Tail Realtime Timestamp: %s\n"
2459 "Tail Monotonic Timestamp: %s\n"
2460 "Objects: %"PRIu64"\n"
2461 "Entry Objects: %"PRIu64"\n",
2462 f->path,
2463 sd_id128_to_string(f->header->file_id, a),
2464 sd_id128_to_string(f->header->machine_id, b),
2465 sd_id128_to_string(f->header->boot_id, c),
2466 sd_id128_to_string(f->header->seqnum_id, d),
2467 f->header->state == STATE_OFFLINE ? "OFFLINE" :
2468 f->header->state == STATE_ONLINE ? "ONLINE" :
2469 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
2470 JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
2471 (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_ANY) ? " ???" : "",
2472 JOURNAL_HEADER_COMPRESSED_XZ(f->header) ? " COMPRESSED-XZ" : "",
2473 JOURNAL_HEADER_COMPRESSED_LZ4(f->header) ? " COMPRESSED-LZ4" : "",
2474 (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_ANY) ? " ???" : "",
2475 le64toh(f->header->header_size),
2476 le64toh(f->header->arena_size),
2477 le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2478 le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
2479 yes_no(journal_file_rotate_suggested(f, 0)),
2480 le64toh(f->header->head_entry_seqnum),
2481 le64toh(f->header->tail_entry_seqnum),
2482 format_timestamp_safe(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
2483 format_timestamp_safe(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
2484 format_timespan(z, sizeof(z), le64toh(f->header->tail_entry_monotonic), USEC_PER_MSEC),
2485 le64toh(f->header->n_objects),
2486 le64toh(f->header->n_entries));
2487
2488 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2489 printf("Data Objects: %"PRIu64"\n"
2490 "Data Hash Table Fill: %.1f%%\n",
2491 le64toh(f->header->n_data),
2492 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
2493
2494 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2495 printf("Field Objects: %"PRIu64"\n"
2496 "Field Hash Table Fill: %.1f%%\n",
2497 le64toh(f->header->n_fields),
2498 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
2499
2500 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
2501 printf("Tag Objects: %"PRIu64"\n",
2502 le64toh(f->header->n_tags));
2503 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
2504 printf("Entry Array Objects: %"PRIu64"\n",
2505 le64toh(f->header->n_entry_arrays));
2506
2507 if (fstat(f->fd, &st) >= 0)
2508 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
2509}
2510
2511int journal_file_open(
2512 const char *fname,
2513 int flags,
2514 mode_t mode,
2515 bool compress,
2516 bool seal,
2517 JournalMetrics *metrics,
2518 MMapCache *mmap_cache,
2519 JournalFile *template,
2520 JournalFile **ret) {
2521
2522 bool newly_created = false;
2523 JournalFile *f;
2524 void *h;
2525 int r;
2526
2527 assert(fname);
2528 assert(ret);
2529
2530 if ((flags & O_ACCMODE) != O_RDONLY &&
2531 (flags & O_ACCMODE) != O_RDWR)
2532 return -EINVAL;
2533
2534 if (!endswith(fname, ".journal") &&
2535 !endswith(fname, ".journal~"))
2536 return -EINVAL;
2537
2538 f = new0(JournalFile, 1);
2539 if (!f)
2540 return -ENOMEM;
2541
2542 f->fd = -1;
2543 f->mode = mode;
2544
2545 f->flags = flags;
2546 f->prot = prot_from_flags(flags);
2547 f->writable = (flags & O_ACCMODE) != O_RDONLY;
2548#if defined(HAVE_LZ4)
2549 f->compress_lz4 = compress;
2550#elif defined(HAVE_XZ)
2551 f->compress_xz = compress;
2552#endif
2553#ifdef HAVE_GCRYPT
2554 f->seal = seal;
2555#endif
2556
2557 if (mmap_cache)
2558 f->mmap = mmap_cache_ref(mmap_cache);
2559 else {
2560 f->mmap = mmap_cache_new();
2561 if (!f->mmap) {
2562 r = -ENOMEM;
2563 goto fail;
2564 }
2565 }
2566
2567 f->path = strdup(fname);
2568 if (!f->path) {
2569 r = -ENOMEM;
2570 goto fail;
2571 }
2572
2573 f->chain_cache = ordered_hashmap_new(&uint64_hash_ops);
2574 if (!f->chain_cache) {
2575 r = -ENOMEM;
2576 goto fail;
2577 }
2578
2579 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2580 if (f->fd < 0) {
2581 r = -errno;
2582 goto fail;
2583 }
2584
2585 r = journal_file_fstat(f);
2586 if (r < 0)
2587 goto fail;
2588
2589 if (f->last_stat.st_size == 0 && f->writable) {
2590 /* Let's attach the creation time to the journal file,
2591 * so that the vacuuming code knows the age of this
2592 * file even if the file might end up corrupted one
2593 * day... Ideally we'd just use the creation time many
2594 * file systems maintain for each file, but there is
2595 * currently no usable API to query this, hence let's
2596 * emulate this via extended attributes. If extended
2597 * attributes are not supported we'll just skip this,
2598 * and rely solely on mtime/atime/ctime of the file. */
2599
2600 fd_setcrtime(f->fd, now(CLOCK_REALTIME));
2601
2602#ifdef HAVE_GCRYPT
2603 /* Try to load the FSPRG state, and if we can't, then
2604 * just don't do sealing */
2605 if (f->seal) {
2606 r = journal_file_fss_load(f);
2607 if (r < 0)
2608 f->seal = false;
2609 }
2610#endif
2611
2612 r = journal_file_init_header(f, template);
2613 if (r < 0)
2614 goto fail;
2615
2616 r = journal_file_fstat(f);
2617 if (r < 0)
2618 goto fail;
2619
2620 newly_created = true;
2621 }
2622
2623 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2624 r = -EIO;
2625 goto fail;
2626 }
2627
2628 r = mmap_cache_get(f->mmap, f->fd, f->prot, CONTEXT_HEADER, true, 0, PAGE_ALIGN(sizeof(Header)), &f->last_stat, &h);
2629 if (r < 0) {
2630 r = -errno;
2631 goto fail;
2632 }
2633
2634 f->header = h;
2635
2636 if (!newly_created) {
2637 r = journal_file_verify_header(f);
2638 if (r < 0)
2639 goto fail;
2640 }
2641
2642#ifdef HAVE_GCRYPT
2643 if (!newly_created && f->writable) {
2644 r = journal_file_fss_load(f);
2645 if (r < 0)
2646 goto fail;
2647 }
2648#endif
2649
2650 if (f->writable) {
2651 if (metrics) {
2652 journal_default_metrics(metrics, f->fd);
2653 f->metrics = *metrics;
2654 } else if (template)
2655 f->metrics = template->metrics;
2656
2657 r = journal_file_refresh_header(f);
2658 if (r < 0)
2659 goto fail;
2660 }
2661
2662#ifdef HAVE_GCRYPT
2663 r = journal_file_hmac_setup(f);
2664 if (r < 0)
2665 goto fail;
2666#endif
2667
2668 if (newly_created) {
2669 r = journal_file_setup_field_hash_table(f);
2670 if (r < 0)
2671 goto fail;
2672
2673 r = journal_file_setup_data_hash_table(f);
2674 if (r < 0)
2675 goto fail;
2676
2677#ifdef HAVE_GCRYPT
2678 r = journal_file_append_first_tag(f);
2679 if (r < 0)
2680 goto fail;
2681#endif
2682 }
2683
2684 r = journal_file_map_field_hash_table(f);
2685 if (r < 0)
2686 goto fail;
2687
2688 r = journal_file_map_data_hash_table(f);
2689 if (r < 0)
2690 goto fail;
2691
2692 if (mmap_cache_got_sigbus(f->mmap, f->fd)) {
2693 r = -EIO;
2694 goto fail;
2695 }
2696
2697 *ret = f;
2698 return 0;
2699
2700fail:
2701 if (f->fd >= 0 && mmap_cache_got_sigbus(f->mmap, f->fd))
2702 r = -EIO;
2703
2704 journal_file_close(f);
2705
2706 return r;
2707}
2708
2709int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
2710 _cleanup_free_ char *p = NULL;
2711 size_t l;
2712 JournalFile *old_file, *new_file = NULL;
2713 int r;
2714
2715 assert(f);
2716 assert(*f);
2717
2718 old_file = *f;
2719
2720 if (!old_file->writable)
2721 return -EINVAL;
2722
2723 if (!endswith(old_file->path, ".journal"))
2724 return -EINVAL;
2725
2726 l = strlen(old_file->path);
2727 r = asprintf(&p, "%.*s@" SD_ID128_FORMAT_STR "-%016"PRIx64"-%016"PRIx64".journal",
2728 (int) l - 8, old_file->path,
2729 SD_ID128_FORMAT_VAL(old_file->header->seqnum_id),
2730 le64toh((*f)->header->head_entry_seqnum),
2731 le64toh((*f)->header->head_entry_realtime));
2732 if (r < 0)
2733 return -ENOMEM;
2734
2735 /* Try to rename the file to the archived version. If the file
2736 * already was deleted, we'll get ENOENT, let's ignore that
2737 * case. */
2738 r = rename(old_file->path, p);
2739 if (r < 0 && errno != ENOENT)
2740 return -errno;
2741
2742 old_file->header->state = STATE_ARCHIVED;
2743
2744 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
2745 journal_file_close(old_file);
2746
2747 *f = new_file;
2748 return r;
2749}
2750
2751int journal_file_open_reliably(
2752 const char *fname,
2753 int flags,
2754 mode_t mode,
2755 bool compress,
2756 bool seal,
2757 JournalMetrics *metrics,
2758 MMapCache *mmap_cache,
2759 JournalFile *template,
2760 JournalFile **ret) {
2761
2762 int r;
2763 size_t l;
2764 _cleanup_free_ char *p = NULL;
2765
2766 r = journal_file_open(fname, flags, mode, compress, seal,
2767 metrics, mmap_cache, template, ret);
2768 if (r != -EBADMSG && /* corrupted */
2769 r != -ENODATA && /* truncated */
2770 r != -EHOSTDOWN && /* other machine */
2771 r != -EPROTONOSUPPORT && /* incompatible feature */
2772 r != -EBUSY && /* unclean shutdown */
2773 r != -ESHUTDOWN && /* already archived */
2774 r != -EIO /* IO error, including SIGBUS on mmap */)
2775 return r;
2776
2777 if ((flags & O_ACCMODE) == O_RDONLY)
2778 return r;
2779
2780 if (!(flags & O_CREAT))
2781 return r;
2782
2783 if (!endswith(fname, ".journal"))
2784 return r;
2785
2786 /* The file is corrupted. Rotate it away and try it again (but only once) */
2787
2788 l = strlen(fname);
2789 if (asprintf(&p, "%.*s@%016llx-%016" PRIx64 ".journal~",
2790 (int) l - 8, fname,
2791 (unsigned long long) now(CLOCK_REALTIME),
2792 random_u64()) < 0)
2793 return -ENOMEM;
2794
2795 r = rename(fname, p);
2796 if (r < 0)
2797 return -errno;
2798
2799 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2800
2801 return journal_file_open(fname, flags, mode, compress, seal,
2802 metrics, mmap_cache, template, ret);
2803}
2804
2805int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2806 uint64_t i, n;
2807 uint64_t q, xor_hash = 0;
2808 int r;
2809 EntryItem *items;
2810 dual_timestamp ts;
2811
2812 assert(from);
2813 assert(to);
2814 assert(o);
2815 assert(p);
2816
2817 if (!to->writable)
2818 return -EPERM;
2819
2820 ts.monotonic = le64toh(o->entry.monotonic);
2821 ts.realtime = le64toh(o->entry.realtime);
2822
2823 n = journal_file_entry_n_items(o);
2824 /* alloca() can't take 0, hence let's allocate at least one */
2825 items = alloca(sizeof(EntryItem) * MAX(1u, n));
2826
2827 for (i = 0; i < n; i++) {
2828 uint64_t l, h;
2829 le64_t le_hash;
2830 size_t t;
2831 void *data;
2832 Object *u;
2833
2834 q = le64toh(o->entry.items[i].object_offset);
2835 le_hash = o->entry.items[i].hash;
2836
2837 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2838 if (r < 0)
2839 return r;
2840
2841 if (le_hash != o->data.hash)
2842 return -EBADMSG;
2843
2844 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2845 t = (size_t) l;
2846
2847 /* We hit the limit on 32bit machines */
2848 if ((uint64_t) t != l)
2849 return -E2BIG;
2850
2851 if (o->object.flags & OBJECT_COMPRESSION_MASK) {
2852#if defined(HAVE_XZ) || defined(HAVE_LZ4)
2853 size_t rsize;
2854
2855 r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
2856 o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize, 0);
2857 if (r < 0)
2858 return r;
2859
2860 data = from->compress_buffer;
2861 l = rsize;
2862#else
2863 return -EPROTONOSUPPORT;
2864#endif
2865 } else
2866 data = o->data.payload;
2867
2868 r = journal_file_append_data(to, data, l, &u, &h);
2869 if (r < 0)
2870 return r;
2871
2872 xor_hash ^= le64toh(u->data.hash);
2873 items[i].object_offset = htole64(h);
2874 items[i].hash = u->data.hash;
2875
2876 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2877 if (r < 0)
2878 return r;
2879 }
2880
2881 r = journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2882
2883 if (mmap_cache_got_sigbus(to->mmap, to->fd))
2884 return -EIO;
2885
2886 return r;
2887}
2888
2889void journal_default_metrics(JournalMetrics *m, int fd) {
2890 uint64_t fs_size = 0;
2891 struct statvfs ss;
2892 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2893
2894 assert(m);
2895 assert(fd >= 0);
2896
2897 if (fstatvfs(fd, &ss) >= 0)
2898 fs_size = ss.f_frsize * ss.f_blocks;
2899
2900 if (m->max_use == (uint64_t) -1) {
2901
2902 if (fs_size > 0) {
2903 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2904
2905 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2906 m->max_use = DEFAULT_MAX_USE_UPPER;
2907
2908 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2909 m->max_use = DEFAULT_MAX_USE_LOWER;
2910 } else
2911 m->max_use = DEFAULT_MAX_USE_LOWER;
2912 } else {
2913 m->max_use = PAGE_ALIGN(m->max_use);
2914
2915 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2916 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2917 }
2918
2919 if (m->max_size == (uint64_t) -1) {
2920 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2921
2922 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2923 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2924 } else
2925 m->max_size = PAGE_ALIGN(m->max_size);
2926
2927 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2928 m->max_size = JOURNAL_FILE_SIZE_MIN;
2929
2930 if (m->max_size*2 > m->max_use)
2931 m->max_use = m->max_size*2;
2932
2933 if (m->min_size == (uint64_t) -1)
2934 m->min_size = JOURNAL_FILE_SIZE_MIN;
2935 else {
2936 m->min_size = PAGE_ALIGN(m->min_size);
2937
2938 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2939 m->min_size = JOURNAL_FILE_SIZE_MIN;
2940
2941 if (m->min_size > m->max_size)
2942 m->max_size = m->min_size;
2943 }
2944
2945 if (m->keep_free == (uint64_t) -1) {
2946
2947 if (fs_size > 0) {
2948 m->keep_free = PAGE_ALIGN(fs_size * 3 / 20); /* 15% of file system size */
2949
2950 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2951 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2952
2953 } else
2954 m->keep_free = DEFAULT_KEEP_FREE;
2955 }
2956
2957 log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2958 format_bytes(a, sizeof(a), m->max_use),
2959 format_bytes(b, sizeof(b), m->max_size),
2960 format_bytes(c, sizeof(c), m->min_size),
2961 format_bytes(d, sizeof(d), m->keep_free));
2962}
2963
2964int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2965 assert(f);
2966 assert(from || to);
2967
2968 if (from) {
2969 if (f->header->head_entry_realtime == 0)
2970 return -ENOENT;
2971
2972 *from = le64toh(f->header->head_entry_realtime);
2973 }
2974
2975 if (to) {
2976 if (f->header->tail_entry_realtime == 0)
2977 return -ENOENT;
2978
2979 *to = le64toh(f->header->tail_entry_realtime);
2980 }
2981
2982 return 1;
2983}
2984
2985int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2986 Object *o;
2987 uint64_t p;
2988 int r;
2989
2990 assert(f);
2991 assert(from || to);
2992
2993 r = find_data_object_by_boot_id(f, boot_id, &o, &p);
2994 if (r <= 0)
2995 return r;
2996
2997 if (le64toh(o->data.n_entries) <= 0)
2998 return 0;
2999
3000 if (from) {
3001 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
3002 if (r < 0)
3003 return r;
3004
3005 *from = le64toh(o->entry.monotonic);
3006 }
3007
3008 if (to) {
3009 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
3010 if (r < 0)
3011 return r;
3012
3013 r = generic_array_get_plus_one(f,
3014 le64toh(o->data.entry_offset),
3015 le64toh(o->data.entry_array_offset),
3016 le64toh(o->data.n_entries)-1,
3017 &o, NULL);
3018 if (r <= 0)
3019 return r;
3020
3021 *to = le64toh(o->entry.monotonic);
3022 }
3023
3024 return 1;
3025}
3026
3027bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
3028 assert(f);
3029
3030 /* If we gained new header fields we gained new features,
3031 * hence suggest a rotation */
3032 if (le64toh(f->header->header_size) < sizeof(Header)) {
3033 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
3034 return true;
3035 }
3036
3037 /* Let's check if the hash tables grew over a certain fill
3038 * level (75%, borrowing this value from Java's hash table
3039 * implementation), and if so suggest a rotation. To calculate
3040 * the fill level we need the n_data field, which only exists
3041 * in newer versions. */
3042
3043 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
3044 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
3045 log_debug("Data hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items, %llu file size, %"PRIu64" bytes per hash table item), suggesting rotation.",
3046 f->path,
3047 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
3048 le64toh(f->header->n_data),
3049 le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
3050 (unsigned long long) f->last_stat.st_size,
3051 f->last_stat.st_size / le64toh(f->header->n_data));
3052 return true;
3053 }
3054
3055 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
3056 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
3057 log_debug("Field hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items), suggesting rotation.",
3058 f->path,
3059 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
3060 le64toh(f->header->n_fields),
3061 le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
3062 return true;
3063 }
3064
3065 /* Are the data objects properly indexed by field objects? */
3066 if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
3067 JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
3068 le64toh(f->header->n_data) > 0 &&
3069 le64toh(f->header->n_fields) == 0)
3070 return true;
3071
3072 if (max_file_usec > 0) {
3073 usec_t t, h;
3074
3075 h = le64toh(f->header->head_entry_realtime);
3076 t = now(CLOCK_REALTIME);
3077
3078 if (h > 0 && t > h + max_file_usec)
3079 return true;
3080 }
3081
3082 return false;
3083}