]> git.ipfire.org Git - thirdparty/systemd.git/blame_incremental - src/journal/journal-file.c
journal: fix bad memory access
[thirdparty/systemd.git] / src / journal / journal-file.c
... / ...
CommitLineData
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
30#include "journal-def.h"
31#include "journal-file.h"
32#include "lookup3.h"
33#include "compress.h"
34
35#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*16ULL)
36#define DEFAULT_FIELD_HASH_TABLE_SIZE (2047ULL*16ULL)
37
38#define DEFAULT_WINDOW_SIZE (8ULL*1024ULL*1024ULL)
39
40#define COMPRESSION_SIZE_THRESHOLD (512ULL)
41
42/* This is the minimum journal file size */
43#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
44
45/* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
49
50/* This is the upper bound if we deduce max_size from max_use */
51#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
52
53/* This is the upper bound if we deduce the keep_free value from the
54 * file system size */
55#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57/* This is the keep_free value when we can't determine the system
58 * size */
59#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
60
61static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' };
62
63#define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
64
65void journal_file_close(JournalFile *f) {
66 int t;
67
68 assert(f);
69
70 if (f->header) {
71 if (f->writable)
72 f->header->state = STATE_OFFLINE;
73
74 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
75 }
76
77 for (t = 0; t < _WINDOW_MAX; t++)
78 if (f->windows[t].ptr)
79 munmap(f->windows[t].ptr, f->windows[t].size);
80
81 if (f->fd >= 0)
82 close_nointr_nofail(f->fd);
83
84 free(f->path);
85
86#ifdef HAVE_XZ
87 free(f->compress_buffer);
88#endif
89
90 free(f);
91}
92
93static int journal_file_init_header(JournalFile *f, JournalFile *template) {
94 Header h;
95 ssize_t k;
96 int r;
97
98 assert(f);
99
100 zero(h);
101 memcpy(h.signature, signature, 8);
102 h.header_size = htole64(ALIGN64(sizeof(h)));
103
104 r = sd_id128_randomize(&h.file_id);
105 if (r < 0)
106 return r;
107
108 if (template) {
109 h.seqnum_id = template->header->seqnum_id;
110 h.seqnum = template->header->seqnum;
111 } else
112 h.seqnum_id = h.file_id;
113
114 k = pwrite(f->fd, &h, sizeof(h), 0);
115 if (k < 0)
116 return -errno;
117
118 if (k != sizeof(h))
119 return -EIO;
120
121 return 0;
122}
123
124static int journal_file_refresh_header(JournalFile *f) {
125 int r;
126 sd_id128_t boot_id;
127
128 assert(f);
129
130 r = sd_id128_get_machine(&f->header->machine_id);
131 if (r < 0)
132 return r;
133
134 r = sd_id128_get_boot(&boot_id);
135 if (r < 0)
136 return r;
137
138 if (sd_id128_equal(boot_id, f->header->boot_id))
139 f->tail_entry_monotonic_valid = true;
140
141 f->header->boot_id = boot_id;
142
143 f->header->state = STATE_ONLINE;
144
145 __sync_synchronize();
146
147 return 0;
148}
149
150static int journal_file_verify_header(JournalFile *f) {
151 assert(f);
152
153 if (memcmp(f->header, signature, 8))
154 return -EBADMSG;
155
156#ifdef HAVE_XZ
157 if ((le64toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
158 return -EPROTONOSUPPORT;
159#else
160 if (f->header->incompatible_flags != 0)
161 return -EPROTONOSUPPORT;
162#endif
163
164 if (f->header->header_size != htole64(ALIGN64(sizeof(*(f->header)))))
165 return -EBADMSG;
166
167 if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
168 return -ENODATA;
169
170 if (f->writable) {
171 uint8_t state;
172 sd_id128_t machine_id;
173 int r;
174
175 r = sd_id128_get_machine(&machine_id);
176 if (r < 0)
177 return r;
178
179 if (!sd_id128_equal(machine_id, f->header->machine_id))
180 return -EHOSTDOWN;
181
182 state = f->header->state;
183
184 if (state == STATE_ONLINE)
185 log_debug("Journal file %s is already online. Assuming unclean closing. Ignoring.", f->path);
186 /* FIXME: immediately rotate */
187 else if (state == STATE_ARCHIVED)
188 return -ESHUTDOWN;
189 else if (state != STATE_OFFLINE)
190 log_debug("Journal file %s has unknown state %u. Ignoring.", f->path, state);
191 }
192
193 return 0;
194}
195
196static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
197 uint64_t old_size, new_size;
198 int r;
199
200 assert(f);
201
202 /* We assume that this file is not sparse, and we know that
203 * for sure, since we always call posix_fallocate()
204 * ourselves */
205
206 old_size =
207 le64toh(f->header->header_size) +
208 le64toh(f->header->arena_size);
209
210 new_size = PAGE_ALIGN(offset + size);
211 if (new_size < le64toh(f->header->header_size))
212 new_size = le64toh(f->header->header_size);
213
214 if (new_size <= old_size)
215 return 0;
216
217 if (f->metrics.max_size > 0 &&
218 new_size > f->metrics.max_size)
219 return -E2BIG;
220
221 if (new_size > f->metrics.min_size &&
222 f->metrics.keep_free > 0) {
223 struct statvfs svfs;
224
225 if (fstatvfs(f->fd, &svfs) >= 0) {
226 uint64_t available;
227
228 available = svfs.f_bfree * svfs.f_bsize;
229
230 if (available >= f->metrics.keep_free)
231 available -= f->metrics.keep_free;
232 else
233 available = 0;
234
235 if (new_size - old_size > available)
236 return -E2BIG;
237 }
238 }
239
240 /* Note that the glibc fallocate() fallback is very
241 inefficient, hence we try to minimize the allocation area
242 as we can. */
243 r = posix_fallocate(f->fd, old_size, new_size - old_size);
244 if (r != 0)
245 return -r;
246
247 if (fstat(f->fd, &f->last_stat) < 0)
248 return -errno;
249
250 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
251
252 return 0;
253}
254
255static int journal_file_map(
256 JournalFile *f,
257 uint64_t offset,
258 uint64_t size,
259 void **_window,
260 uint64_t *_woffset,
261 uint64_t *_wsize,
262 void **ret) {
263
264 uint64_t woffset, wsize;
265 void *window;
266
267 assert(f);
268 assert(size > 0);
269 assert(ret);
270
271 woffset = offset & ~((uint64_t) page_size() - 1ULL);
272 wsize = size + (offset - woffset);
273 wsize = PAGE_ALIGN(wsize);
274
275 /* Avoid SIGBUS on invalid accesses */
276 if (woffset + wsize > (uint64_t) PAGE_ALIGN(f->last_stat.st_size))
277 return -EADDRNOTAVAIL;
278
279 window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset);
280 if (window == MAP_FAILED)
281 return -errno;
282
283 if (_window)
284 *_window = window;
285
286 if (_woffset)
287 *_woffset = woffset;
288
289 if (_wsize)
290 *_wsize = wsize;
291
292 *ret = (uint8_t*) window + (offset - woffset);
293
294 return 0;
295}
296
297static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_t size, void **ret) {
298 void *p = NULL;
299 uint64_t delta;
300 int r;
301 Window *w;
302
303 assert(f);
304 assert(ret);
305 assert(wt >= 0);
306 assert(wt < _WINDOW_MAX);
307
308 if (offset + size > (uint64_t) f->last_stat.st_size) {
309 /* Hmm, out of range? Let's refresh the fstat() data
310 * first, before we trust that check. */
311
312 if (fstat(f->fd, &f->last_stat) < 0 ||
313 offset + size > (uint64_t) f->last_stat.st_size)
314 return -EADDRNOTAVAIL;
315 }
316
317 w = f->windows + wt;
318
319 if (_likely_(w->ptr &&
320 w->offset <= offset &&
321 w->offset + w->size >= offset + size)) {
322
323 *ret = (uint8_t*) w->ptr + (offset - w->offset);
324 return 0;
325 }
326
327 if (w->ptr) {
328 if (munmap(w->ptr, w->size) < 0)
329 return -errno;
330
331 w->ptr = NULL;
332 w->size = w->offset = 0;
333 }
334
335 if (size < DEFAULT_WINDOW_SIZE) {
336 /* If the default window size is larger then what was
337 * asked for extend the mapping a bit in the hope to
338 * minimize needed remappings later on. We add half
339 * the window space before and half behind the
340 * requested mapping */
341
342 delta = (DEFAULT_WINDOW_SIZE - size) / 2;
343
344 if (delta > offset)
345 delta = offset;
346
347 offset -= delta;
348 size = DEFAULT_WINDOW_SIZE;
349 } else
350 delta = 0;
351
352 if (offset + size > (uint64_t) f->last_stat.st_size)
353 size = (uint64_t) f->last_stat.st_size - offset;
354
355 if (size <= 0)
356 return -EADDRNOTAVAIL;
357
358 r = journal_file_map(f,
359 offset, size,
360 &w->ptr, &w->offset, &w->size,
361 &p);
362
363 if (r < 0)
364 return r;
365
366 *ret = (uint8_t*) p + delta;
367 return 0;
368}
369
370static bool verify_hash(Object *o) {
371 uint64_t h1, h2;
372
373 assert(o);
374
375 if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
376 h1 = le64toh(o->data.hash);
377 h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
378 } else if (o->object.type == OBJECT_FIELD) {
379 h1 = le64toh(o->field.hash);
380 h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
381 } else
382 return true;
383
384 return h1 == h2;
385}
386
387int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
388 int r;
389 void *t;
390 Object *o;
391 uint64_t s;
392
393 assert(f);
394 assert(ret);
395 assert(type < _OBJECT_TYPE_MAX);
396
397 r = journal_file_move_to(f, type >= 0 ? type : WINDOW_UNKNOWN, offset, sizeof(ObjectHeader), &t);
398 if (r < 0)
399 return r;
400
401 o = (Object*) t;
402 s = le64toh(o->object.size);
403
404 if (s < sizeof(ObjectHeader))
405 return -EBADMSG;
406
407 if (type >= 0 && o->object.type != type)
408 return -EBADMSG;
409
410 if (s > sizeof(ObjectHeader)) {
411 r = journal_file_move_to(f, o->object.type, offset, s, &t);
412 if (r < 0)
413 return r;
414
415 o = (Object*) t;
416 }
417
418 if (!verify_hash(o))
419 return -EBADMSG;
420
421 *ret = o;
422 return 0;
423}
424
425static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) {
426 uint64_t r;
427
428 assert(f);
429
430 r = le64toh(f->header->seqnum) + 1;
431
432 if (seqnum) {
433 /* If an external seqnum counter was passed, we update
434 * both the local and the external one, and set it to
435 * the maximum of both */
436
437 if (*seqnum + 1 > r)
438 r = *seqnum + 1;
439
440 *seqnum = r;
441 }
442
443 f->header->seqnum = htole64(r);
444
445 if (f->header->first_seqnum == 0)
446 f->header->first_seqnum = htole64(r);
447
448 return r;
449}
450
451static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
452 int r;
453 uint64_t p;
454 Object *tail, *o;
455 void *t;
456
457 assert(f);
458 assert(size >= sizeof(ObjectHeader));
459 assert(offset);
460 assert(ret);
461
462 p = le64toh(f->header->tail_object_offset);
463 if (p == 0)
464 p = le64toh(f->header->header_size);
465 else {
466 r = journal_file_move_to_object(f, -1, p, &tail);
467 if (r < 0)
468 return r;
469
470 p += ALIGN64(le64toh(tail->object.size));
471 }
472
473 r = journal_file_allocate(f, p, size);
474 if (r < 0)
475 return r;
476
477 r = journal_file_move_to(f, type, p, size, &t);
478 if (r < 0)
479 return r;
480
481 o = (Object*) t;
482
483 zero(o->object);
484 o->object.type = type;
485 o->object.size = htole64(size);
486
487 f->header->tail_object_offset = htole64(p);
488 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
489
490 *ret = o;
491 *offset = p;
492
493 return 0;
494}
495
496static int journal_file_setup_data_hash_table(JournalFile *f) {
497 uint64_t s, p;
498 Object *o;
499 int r;
500
501 assert(f);
502
503 s = DEFAULT_DATA_HASH_TABLE_SIZE;
504 r = journal_file_append_object(f,
505 OBJECT_DATA_HASH_TABLE,
506 offsetof(Object, hash_table.items) + s,
507 &o, &p);
508 if (r < 0)
509 return r;
510
511 memset(o->hash_table.items, 0, s);
512
513 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
514 f->header->data_hash_table_size = htole64(s);
515
516 return 0;
517}
518
519static int journal_file_setup_field_hash_table(JournalFile *f) {
520 uint64_t s, p;
521 Object *o;
522 int r;
523
524 assert(f);
525
526 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
527 r = journal_file_append_object(f,
528 OBJECT_FIELD_HASH_TABLE,
529 offsetof(Object, hash_table.items) + s,
530 &o, &p);
531 if (r < 0)
532 return r;
533
534 memset(o->hash_table.items, 0, s);
535
536 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
537 f->header->field_hash_table_size = htole64(s);
538
539 return 0;
540}
541
542static int journal_file_map_data_hash_table(JournalFile *f) {
543 uint64_t s, p;
544 void *t;
545 int r;
546
547 assert(f);
548
549 p = le64toh(f->header->data_hash_table_offset);
550 s = le64toh(f->header->data_hash_table_size);
551
552 r = journal_file_move_to(f,
553 WINDOW_DATA_HASH_TABLE,
554 p, s,
555 &t);
556 if (r < 0)
557 return r;
558
559 f->data_hash_table = t;
560 return 0;
561}
562
563static int journal_file_map_field_hash_table(JournalFile *f) {
564 uint64_t s, p;
565 void *t;
566 int r;
567
568 assert(f);
569
570 p = le64toh(f->header->field_hash_table_offset);
571 s = le64toh(f->header->field_hash_table_size);
572
573 r = journal_file_move_to(f,
574 WINDOW_FIELD_HASH_TABLE,
575 p, s,
576 &t);
577 if (r < 0)
578 return r;
579
580 f->field_hash_table = t;
581 return 0;
582}
583
584static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
585 uint64_t p, h;
586 int r;
587
588 assert(f);
589 assert(o);
590 assert(offset > 0);
591 assert(o->object.type == OBJECT_DATA);
592
593 /* This might alter the window we are looking at */
594
595 o->data.next_hash_offset = o->data.next_field_offset = 0;
596 o->data.entry_offset = o->data.entry_array_offset = 0;
597 o->data.n_entries = 0;
598
599 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
600 p = le64toh(f->data_hash_table[h].tail_hash_offset);
601 if (p == 0) {
602 /* Only entry in the hash table is easy */
603 f->data_hash_table[h].head_hash_offset = htole64(offset);
604 } else {
605 /* Move back to the previous data object, to patch in
606 * pointer */
607
608 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
609 if (r < 0)
610 return r;
611
612 o->data.next_hash_offset = htole64(offset);
613 }
614
615 f->data_hash_table[h].tail_hash_offset = htole64(offset);
616
617 return 0;
618}
619
620int journal_file_find_data_object_with_hash(
621 JournalFile *f,
622 const void *data, uint64_t size, uint64_t hash,
623 Object **ret, uint64_t *offset) {
624
625 uint64_t p, osize, h;
626 int r;
627
628 assert(f);
629 assert(data || size == 0);
630
631 osize = offsetof(Object, data.payload) + size;
632
633 if (f->header->data_hash_table_size == 0)
634 return -EBADMSG;
635
636 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
637 p = le64toh(f->data_hash_table[h].head_hash_offset);
638
639 while (p > 0) {
640 Object *o;
641
642 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
643 if (r < 0)
644 return r;
645
646 if (le64toh(o->data.hash) != hash)
647 goto next;
648
649 if (o->object.flags & OBJECT_COMPRESSED) {
650#ifdef HAVE_XZ
651 uint64_t l, rsize;
652
653 l = le64toh(o->object.size);
654 if (l <= offsetof(Object, data.payload))
655 return -EBADMSG;
656
657 l -= offsetof(Object, data.payload);
658
659 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
660 return -EBADMSG;
661
662 if (rsize == size &&
663 memcmp(f->compress_buffer, data, size) == 0) {
664
665 if (ret)
666 *ret = o;
667
668 if (offset)
669 *offset = p;
670
671 return 1;
672 }
673#else
674 return -EPROTONOSUPPORT;
675#endif
676
677 } else if (le64toh(o->object.size) == osize &&
678 memcmp(o->data.payload, data, size) == 0) {
679
680 if (ret)
681 *ret = o;
682
683 if (offset)
684 *offset = p;
685
686 return 1;
687 }
688
689 next:
690 p = le64toh(o->data.next_hash_offset);
691 }
692
693 return 0;
694}
695
696int journal_file_find_data_object(
697 JournalFile *f,
698 const void *data, uint64_t size,
699 Object **ret, uint64_t *offset) {
700
701 uint64_t hash;
702
703 assert(f);
704 assert(data || size == 0);
705
706 hash = hash64(data, size);
707
708 return journal_file_find_data_object_with_hash(f,
709 data, size, hash,
710 ret, offset);
711}
712
713static int journal_file_append_data(
714 JournalFile *f,
715 const void *data, uint64_t size,
716 Object **ret, uint64_t *offset) {
717
718 uint64_t hash, p;
719 uint64_t osize;
720 Object *o;
721 int r;
722 bool compressed = false;
723
724 assert(f);
725 assert(data || size == 0);
726
727 hash = hash64(data, size);
728
729 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
730 if (r < 0)
731 return r;
732 else if (r > 0) {
733
734 if (ret)
735 *ret = o;
736
737 if (offset)
738 *offset = p;
739
740 return 0;
741 }
742
743 osize = offsetof(Object, data.payload) + size;
744 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
745 if (r < 0)
746 return r;
747
748 o->data.hash = htole64(hash);
749
750#ifdef HAVE_XZ
751 if (f->compress &&
752 size >= COMPRESSION_SIZE_THRESHOLD) {
753 uint64_t rsize;
754
755 compressed = compress_blob(data, size, o->data.payload, &rsize);
756
757 if (compressed) {
758 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
759 o->object.flags |= OBJECT_COMPRESSED;
760
761 f->header->incompatible_flags = htole32(le32toh(f->header->incompatible_flags) | HEADER_INCOMPATIBLE_COMPRESSED);
762
763 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
764 }
765 }
766#endif
767
768 if (!compressed)
769 memcpy(o->data.payload, data, size);
770
771 r = journal_file_link_data(f, o, p, hash);
772 if (r < 0)
773 return r;
774
775 /* The linking might have altered the window, so let's
776 * refresh our pointer */
777 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
778 if (r < 0)
779 return r;
780
781 if (ret)
782 *ret = o;
783
784 if (offset)
785 *offset = p;
786
787 return 0;
788}
789
790uint64_t journal_file_entry_n_items(Object *o) {
791 assert(o);
792 assert(o->object.type == OBJECT_ENTRY);
793
794 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
795}
796
797static uint64_t journal_file_entry_array_n_items(Object *o) {
798 assert(o);
799 assert(o->object.type == OBJECT_ENTRY_ARRAY);
800
801 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
802}
803
804static int link_entry_into_array(JournalFile *f,
805 le64_t *first,
806 le64_t *idx,
807 uint64_t p) {
808 int r;
809 uint64_t n = 0, ap = 0, q, i, a, hidx;
810 Object *o;
811
812 assert(f);
813 assert(first);
814 assert(idx);
815 assert(p > 0);
816
817 a = le64toh(*first);
818 i = hidx = le64toh(*idx);
819 while (a > 0) {
820
821 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
822 if (r < 0)
823 return r;
824
825 n = journal_file_entry_array_n_items(o);
826 if (i < n) {
827 o->entry_array.items[i] = htole64(p);
828 *idx = htole64(hidx + 1);
829 return 0;
830 }
831
832 i -= n;
833 ap = a;
834 a = le64toh(o->entry_array.next_entry_array_offset);
835 }
836
837 if (hidx > n)
838 n = (hidx+1) * 2;
839 else
840 n = n * 2;
841
842 if (n < 4)
843 n = 4;
844
845 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
846 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
847 &o, &q);
848 if (r < 0)
849 return r;
850
851 o->entry_array.items[i] = htole64(p);
852
853 if (ap == 0)
854 *first = htole64(q);
855 else {
856 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
857 if (r < 0)
858 return r;
859
860 o->entry_array.next_entry_array_offset = htole64(q);
861 }
862
863 *idx = htole64(hidx + 1);
864
865 return 0;
866}
867
868static int link_entry_into_array_plus_one(JournalFile *f,
869 le64_t *extra,
870 le64_t *first,
871 le64_t *idx,
872 uint64_t p) {
873
874 int r;
875
876 assert(f);
877 assert(extra);
878 assert(first);
879 assert(idx);
880 assert(p > 0);
881
882 if (*idx == 0)
883 *extra = htole64(p);
884 else {
885 le64_t i;
886
887 i = htole64(le64toh(*idx) - 1);
888 r = link_entry_into_array(f, first, &i, p);
889 if (r < 0)
890 return r;
891 }
892
893 *idx = htole64(le64toh(*idx) + 1);
894 return 0;
895}
896
897static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
898 uint64_t p;
899 int r;
900 assert(f);
901 assert(o);
902 assert(offset > 0);
903
904 p = le64toh(o->entry.items[i].object_offset);
905 if (p == 0)
906 return -EINVAL;
907
908 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
909 if (r < 0)
910 return r;
911
912 return link_entry_into_array_plus_one(f,
913 &o->data.entry_offset,
914 &o->data.entry_array_offset,
915 &o->data.n_entries,
916 offset);
917}
918
919static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
920 uint64_t n, i;
921 int r;
922
923 assert(f);
924 assert(o);
925 assert(offset > 0);
926 assert(o->object.type == OBJECT_ENTRY);
927
928 __sync_synchronize();
929
930 /* Link up the entry itself */
931 r = link_entry_into_array(f,
932 &f->header->entry_array_offset,
933 &f->header->n_entries,
934 offset);
935 if (r < 0)
936 return r;
937
938 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
939
940 if (f->header->head_entry_realtime == 0)
941 f->header->head_entry_realtime = o->entry.realtime;
942
943 f->header->tail_entry_realtime = o->entry.realtime;
944 f->header->tail_entry_monotonic = o->entry.monotonic;
945
946 f->tail_entry_monotonic_valid = true;
947
948 /* Link up the items */
949 n = journal_file_entry_n_items(o);
950 for (i = 0; i < n; i++) {
951 r = journal_file_link_entry_item(f, o, offset, i);
952 if (r < 0)
953 return r;
954 }
955
956 return 0;
957}
958
959static int journal_file_append_entry_internal(
960 JournalFile *f,
961 const dual_timestamp *ts,
962 uint64_t xor_hash,
963 const EntryItem items[], unsigned n_items,
964 uint64_t *seqnum,
965 Object **ret, uint64_t *offset) {
966 uint64_t np;
967 uint64_t osize;
968 Object *o;
969 int r;
970
971 assert(f);
972 assert(items || n_items == 0);
973 assert(ts);
974
975 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
976
977 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
978 if (r < 0)
979 return r;
980
981 o->entry.seqnum = htole64(journal_file_seqnum(f, seqnum));
982 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
983 o->entry.realtime = htole64(ts->realtime);
984 o->entry.monotonic = htole64(ts->monotonic);
985 o->entry.xor_hash = htole64(xor_hash);
986 o->entry.boot_id = f->header->boot_id;
987
988 r = journal_file_link_entry(f, o, np);
989 if (r < 0)
990 return r;
991
992 if (ret)
993 *ret = o;
994
995 if (offset)
996 *offset = np;
997
998 return 0;
999}
1000
1001void journal_file_post_change(JournalFile *f) {
1002 assert(f);
1003
1004 /* inotify() does not receive IN_MODIFY events from file
1005 * accesses done via mmap(). After each access we hence
1006 * trigger IN_MODIFY by truncating the journal file to its
1007 * current size which triggers IN_MODIFY. */
1008
1009 __sync_synchronize();
1010
1011 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1012 log_error("Failed to to truncate file to its own size: %m");
1013}
1014
1015int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1016 unsigned i;
1017 EntryItem *items;
1018 int r;
1019 uint64_t xor_hash = 0;
1020 struct dual_timestamp _ts;
1021
1022 assert(f);
1023 assert(iovec || n_iovec == 0);
1024
1025 if (!f->writable)
1026 return -EPERM;
1027
1028 if (!ts) {
1029 dual_timestamp_get(&_ts);
1030 ts = &_ts;
1031 }
1032
1033 if (f->tail_entry_monotonic_valid &&
1034 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1035 return -EINVAL;
1036
1037 items = alloca(sizeof(EntryItem) * n_iovec);
1038
1039 for (i = 0; i < n_iovec; i++) {
1040 uint64_t p;
1041 Object *o;
1042
1043 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1044 if (r < 0)
1045 return r;
1046
1047 xor_hash ^= le64toh(o->data.hash);
1048 items[i].object_offset = htole64(p);
1049 items[i].hash = o->data.hash;
1050 }
1051
1052 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1053
1054 journal_file_post_change(f);
1055
1056 return r;
1057}
1058
1059static int generic_array_get(JournalFile *f,
1060 uint64_t first,
1061 uint64_t i,
1062 Object **ret, uint64_t *offset) {
1063
1064 Object *o;
1065 uint64_t p = 0, a;
1066 int r;
1067
1068 assert(f);
1069
1070 a = first;
1071 while (a > 0) {
1072 uint64_t n;
1073
1074 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1075 if (r < 0)
1076 return r;
1077
1078 n = journal_file_entry_array_n_items(o);
1079 if (i < n) {
1080 p = le64toh(o->entry_array.items[i]);
1081 break;
1082 }
1083
1084 i -= n;
1085 a = le64toh(o->entry_array.next_entry_array_offset);
1086 }
1087
1088 if (a <= 0 || p <= 0)
1089 return 0;
1090
1091 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1092 if (r < 0)
1093 return r;
1094
1095 if (ret)
1096 *ret = o;
1097
1098 if (offset)
1099 *offset = p;
1100
1101 return 1;
1102}
1103
1104static int generic_array_get_plus_one(JournalFile *f,
1105 uint64_t extra,
1106 uint64_t first,
1107 uint64_t i,
1108 Object **ret, uint64_t *offset) {
1109
1110 Object *o;
1111
1112 assert(f);
1113
1114 if (i == 0) {
1115 int r;
1116
1117 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1118 if (r < 0)
1119 return r;
1120
1121 if (ret)
1122 *ret = o;
1123
1124 if (offset)
1125 *offset = extra;
1126
1127 return 1;
1128 }
1129
1130 return generic_array_get(f, first, i-1, ret, offset);
1131}
1132
1133enum {
1134 TEST_FOUND,
1135 TEST_LEFT,
1136 TEST_RIGHT
1137};
1138
1139static int generic_array_bisect(JournalFile *f,
1140 uint64_t first,
1141 uint64_t n,
1142 uint64_t needle,
1143 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1144 direction_t direction,
1145 Object **ret,
1146 uint64_t *offset,
1147 uint64_t *idx) {
1148
1149 uint64_t a, p, t = 0, i = 0, last_p = 0;
1150 bool subtract_one = false;
1151 Object *o, *array = NULL;
1152 int r;
1153
1154 assert(f);
1155 assert(test_object);
1156
1157 a = first;
1158 while (a > 0) {
1159 uint64_t left, right, k, lp;
1160
1161 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1162 if (r < 0)
1163 return r;
1164
1165 k = journal_file_entry_array_n_items(array);
1166 right = MIN(k, n);
1167 if (right <= 0)
1168 return 0;
1169
1170 i = right - 1;
1171 lp = p = le64toh(array->entry_array.items[i]);
1172 if (p <= 0)
1173 return -EBADMSG;
1174
1175 r = test_object(f, p, needle);
1176 if (r < 0)
1177 return r;
1178
1179 if (r == TEST_FOUND)
1180 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1181
1182 if (r == TEST_RIGHT) {
1183 left = 0;
1184 right -= 1;
1185 for (;;) {
1186 if (left == right) {
1187 if (direction == DIRECTION_UP)
1188 subtract_one = true;
1189
1190 i = left;
1191 goto found;
1192 }
1193
1194 assert(left < right);
1195
1196 i = (left + right) / 2;
1197 p = le64toh(array->entry_array.items[i]);
1198 if (p <= 0)
1199 return -EBADMSG;
1200
1201 r = test_object(f, p, needle);
1202 if (r < 0)
1203 return r;
1204
1205 if (r == TEST_FOUND)
1206 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1207
1208 if (r == TEST_RIGHT)
1209 right = i;
1210 else
1211 left = i + 1;
1212 }
1213 }
1214
1215 if (k > n)
1216 return 0;
1217
1218 last_p = lp;
1219
1220 n -= k;
1221 t += k;
1222 a = le64toh(array->entry_array.next_entry_array_offset);
1223 }
1224
1225 return 0;
1226
1227found:
1228 if (subtract_one && t == 0 && i == 0)
1229 return 0;
1230
1231 if (subtract_one && i == 0)
1232 p = last_p;
1233 else if (subtract_one)
1234 p = le64toh(array->entry_array.items[i-1]);
1235 else
1236 p = le64toh(array->entry_array.items[i]);
1237
1238 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1239 if (r < 0)
1240 return r;
1241
1242 if (ret)
1243 *ret = o;
1244
1245 if (offset)
1246 *offset = p;
1247
1248 if (idx)
1249 *idx = t + i - (subtract_one ? 1 : 0);
1250
1251 return 1;
1252}
1253
1254static int generic_array_bisect_plus_one(JournalFile *f,
1255 uint64_t extra,
1256 uint64_t first,
1257 uint64_t n,
1258 uint64_t needle,
1259 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1260 direction_t direction,
1261 Object **ret,
1262 uint64_t *offset,
1263 uint64_t *idx) {
1264
1265 int r;
1266
1267 assert(f);
1268 assert(test_object);
1269
1270 if (n <= 0)
1271 return 0;
1272
1273 /* This bisects the array in object 'first', but first checks
1274 * an extra */
1275 r = test_object(f, extra, needle);
1276 if (r < 0)
1277 return r;
1278
1279 if (r == TEST_FOUND)
1280 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1281
1282 if (r == TEST_RIGHT) {
1283 Object *o;
1284
1285 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1286 if (r < 0)
1287 return r;
1288
1289 if (ret)
1290 *ret = o;
1291
1292 if (offset)
1293 *offset = extra;
1294
1295 if (idx)
1296 *idx = 0;
1297
1298 return 1;
1299 }
1300
1301 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1302
1303 if (r > 0 && idx)
1304 (*idx) ++;
1305
1306 return r;
1307}
1308
1309static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1310 Object *o;
1311 int r;
1312
1313 assert(f);
1314 assert(p > 0);
1315
1316 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1317 if (r < 0)
1318 return r;
1319
1320 if (le64toh(o->entry.seqnum) == needle)
1321 return TEST_FOUND;
1322 else if (le64toh(o->entry.seqnum) < needle)
1323 return TEST_LEFT;
1324 else
1325 return TEST_RIGHT;
1326}
1327
1328int journal_file_move_to_entry_by_seqnum(
1329 JournalFile *f,
1330 uint64_t seqnum,
1331 direction_t direction,
1332 Object **ret,
1333 uint64_t *offset) {
1334
1335 return generic_array_bisect(f,
1336 le64toh(f->header->entry_array_offset),
1337 le64toh(f->header->n_entries),
1338 seqnum,
1339 test_object_seqnum,
1340 direction,
1341 ret, offset, NULL);
1342}
1343
1344static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1345 Object *o;
1346 int r;
1347
1348 assert(f);
1349 assert(p > 0);
1350
1351 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1352 if (r < 0)
1353 return r;
1354
1355 if (le64toh(o->entry.realtime) == needle)
1356 return TEST_FOUND;
1357 else if (le64toh(o->entry.realtime) < needle)
1358 return TEST_LEFT;
1359 else
1360 return TEST_RIGHT;
1361}
1362
1363int journal_file_move_to_entry_by_realtime(
1364 JournalFile *f,
1365 uint64_t realtime,
1366 direction_t direction,
1367 Object **ret,
1368 uint64_t *offset) {
1369
1370 return generic_array_bisect(f,
1371 le64toh(f->header->entry_array_offset),
1372 le64toh(f->header->n_entries),
1373 realtime,
1374 test_object_realtime,
1375 direction,
1376 ret, offset, NULL);
1377}
1378
1379static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1380 Object *o;
1381 int r;
1382
1383 assert(f);
1384 assert(p > 0);
1385
1386 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1387 if (r < 0)
1388 return r;
1389
1390 if (le64toh(o->entry.monotonic) == needle)
1391 return TEST_FOUND;
1392 else if (le64toh(o->entry.monotonic) < needle)
1393 return TEST_LEFT;
1394 else
1395 return TEST_RIGHT;
1396}
1397
1398int journal_file_move_to_entry_by_monotonic(
1399 JournalFile *f,
1400 sd_id128_t boot_id,
1401 uint64_t monotonic,
1402 direction_t direction,
1403 Object **ret,
1404 uint64_t *offset) {
1405
1406 char t[9+32+1] = "_BOOT_ID=";
1407 Object *o;
1408 int r;
1409
1410 sd_id128_to_string(boot_id, t + 9);
1411
1412 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1413 if (r < 0)
1414 return r;
1415 else if (r == 0)
1416 return -ENOENT;
1417
1418 return generic_array_bisect_plus_one(f,
1419 le64toh(o->data.entry_offset),
1420 le64toh(o->data.entry_array_offset),
1421 le64toh(o->data.n_entries),
1422 monotonic,
1423 test_object_monotonic,
1424 direction,
1425 ret, offset, NULL);
1426}
1427
1428static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1429 assert(f);
1430 assert(p > 0);
1431
1432 if (p == needle)
1433 return TEST_FOUND;
1434 else if (p < needle)
1435 return TEST_LEFT;
1436 else
1437 return TEST_RIGHT;
1438}
1439
1440int journal_file_next_entry(
1441 JournalFile *f,
1442 Object *o, uint64_t p,
1443 direction_t direction,
1444 Object **ret, uint64_t *offset) {
1445
1446 uint64_t i, n;
1447 int r;
1448
1449 assert(f);
1450 assert(p > 0 || !o);
1451
1452 n = le64toh(f->header->n_entries);
1453 if (n <= 0)
1454 return 0;
1455
1456 if (!o)
1457 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1458 else {
1459 if (o->object.type != OBJECT_ENTRY)
1460 return -EINVAL;
1461
1462 r = generic_array_bisect(f,
1463 le64toh(f->header->entry_array_offset),
1464 le64toh(f->header->n_entries),
1465 p,
1466 test_object_offset,
1467 DIRECTION_DOWN,
1468 NULL, NULL,
1469 &i);
1470 if (r <= 0)
1471 return r;
1472
1473 if (direction == DIRECTION_DOWN) {
1474 if (i >= n - 1)
1475 return 0;
1476
1477 i++;
1478 } else {
1479 if (i <= 0)
1480 return 0;
1481
1482 i--;
1483 }
1484 }
1485
1486 /* And jump to it */
1487 return generic_array_get(f,
1488 le64toh(f->header->entry_array_offset),
1489 i,
1490 ret, offset);
1491}
1492
1493int journal_file_skip_entry(
1494 JournalFile *f,
1495 Object *o, uint64_t p,
1496 int64_t skip,
1497 Object **ret, uint64_t *offset) {
1498
1499 uint64_t i, n;
1500 int r;
1501
1502 assert(f);
1503 assert(o);
1504 assert(p > 0);
1505
1506 if (o->object.type != OBJECT_ENTRY)
1507 return -EINVAL;
1508
1509 r = generic_array_bisect(f,
1510 le64toh(f->header->entry_array_offset),
1511 le64toh(f->header->n_entries),
1512 p,
1513 test_object_offset,
1514 DIRECTION_DOWN,
1515 NULL, NULL,
1516 &i);
1517 if (r <= 0)
1518 return r;
1519
1520 /* Calculate new index */
1521 if (skip < 0) {
1522 if ((uint64_t) -skip >= i)
1523 i = 0;
1524 else
1525 i = i - (uint64_t) -skip;
1526 } else
1527 i += (uint64_t) skip;
1528
1529 n = le64toh(f->header->n_entries);
1530 if (n <= 0)
1531 return -EBADMSG;
1532
1533 if (i >= n)
1534 i = n-1;
1535
1536 return generic_array_get(f,
1537 le64toh(f->header->entry_array_offset),
1538 i,
1539 ret, offset);
1540}
1541
1542int journal_file_next_entry_for_data(
1543 JournalFile *f,
1544 Object *o, uint64_t p,
1545 uint64_t data_offset,
1546 direction_t direction,
1547 Object **ret, uint64_t *offset) {
1548
1549 uint64_t n, i;
1550 int r;
1551 Object *d;
1552
1553 assert(f);
1554 assert(p > 0 || !o);
1555
1556 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1557 if (r < 0)
1558 return r;
1559
1560 n = le64toh(d->data.n_entries);
1561 if (n <= 0)
1562 return n;
1563
1564 if (!o)
1565 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1566 else {
1567 if (o->object.type != OBJECT_ENTRY)
1568 return -EINVAL;
1569
1570 r = generic_array_bisect_plus_one(f,
1571 le64toh(d->data.entry_offset),
1572 le64toh(d->data.entry_array_offset),
1573 le64toh(d->data.n_entries),
1574 p,
1575 test_object_offset,
1576 DIRECTION_DOWN,
1577 NULL, NULL,
1578 &i);
1579
1580 if (r <= 0)
1581 return r;
1582
1583 if (direction == DIRECTION_DOWN) {
1584 if (i >= n - 1)
1585 return 0;
1586
1587 i++;
1588 } else {
1589 if (i <= 0)
1590 return 0;
1591
1592 i--;
1593 }
1594
1595 }
1596
1597 return generic_array_get_plus_one(f,
1598 le64toh(d->data.entry_offset),
1599 le64toh(d->data.entry_array_offset),
1600 i,
1601 ret, offset);
1602}
1603
1604int journal_file_move_to_entry_by_seqnum_for_data(
1605 JournalFile *f,
1606 uint64_t data_offset,
1607 uint64_t seqnum,
1608 direction_t direction,
1609 Object **ret, uint64_t *offset) {
1610
1611 Object *d;
1612 int r;
1613
1614 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1615 if (r <= 0)
1616 return r;
1617
1618 return generic_array_bisect_plus_one(f,
1619 le64toh(d->data.entry_offset),
1620 le64toh(d->data.entry_array_offset),
1621 le64toh(d->data.n_entries),
1622 seqnum,
1623 test_object_seqnum,
1624 direction,
1625 ret, offset, NULL);
1626}
1627
1628int journal_file_move_to_entry_by_realtime_for_data(
1629 JournalFile *f,
1630 uint64_t data_offset,
1631 uint64_t realtime,
1632 direction_t direction,
1633 Object **ret, uint64_t *offset) {
1634
1635 Object *d;
1636 int r;
1637
1638 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1639 if (r <= 0)
1640 return r;
1641
1642 return generic_array_bisect_plus_one(f,
1643 le64toh(d->data.entry_offset),
1644 le64toh(d->data.entry_array_offset),
1645 le64toh(d->data.n_entries),
1646 realtime,
1647 test_object_realtime,
1648 direction,
1649 ret, offset, NULL);
1650}
1651
1652void journal_file_dump(JournalFile *f) {
1653 char a[33], b[33], c[33];
1654 Object *o;
1655 int r;
1656 uint64_t p;
1657
1658 assert(f);
1659
1660 printf("File Path: %s\n"
1661 "File ID: %s\n"
1662 "Machine ID: %s\n"
1663 "Boot ID: %s\n"
1664 "Arena size: %llu\n"
1665 "Objects: %lu\n"
1666 "Entries: %lu\n",
1667 f->path,
1668 sd_id128_to_string(f->header->file_id, a),
1669 sd_id128_to_string(f->header->machine_id, b),
1670 sd_id128_to_string(f->header->boot_id, c),
1671 (unsigned long long) le64toh(f->header->arena_size),
1672 (unsigned long) le64toh(f->header->n_objects),
1673 (unsigned long) le64toh(f->header->n_entries));
1674
1675 p = le64toh(f->header->header_size);
1676 while (p != 0) {
1677 r = journal_file_move_to_object(f, -1, p, &o);
1678 if (r < 0)
1679 goto fail;
1680
1681 switch (o->object.type) {
1682
1683 case OBJECT_UNUSED:
1684 printf("Type: OBJECT_UNUSED\n");
1685 break;
1686
1687 case OBJECT_DATA:
1688 printf("Type: OBJECT_DATA\n");
1689 break;
1690
1691 case OBJECT_ENTRY:
1692 printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
1693 (unsigned long long) le64toh(o->entry.seqnum),
1694 (unsigned long long) le64toh(o->entry.monotonic),
1695 (unsigned long long) le64toh(o->entry.realtime));
1696 break;
1697
1698 case OBJECT_FIELD_HASH_TABLE:
1699 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1700 break;
1701
1702 case OBJECT_DATA_HASH_TABLE:
1703 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1704 break;
1705
1706 case OBJECT_ENTRY_ARRAY:
1707 printf("Type: OBJECT_ENTRY_ARRAY\n");
1708 break;
1709
1710 case OBJECT_SIGNATURE:
1711 printf("Type: OBJECT_SIGNATURE\n");
1712 break;
1713 }
1714
1715 if (o->object.flags & OBJECT_COMPRESSED)
1716 printf("Flags: COMPRESSED\n");
1717
1718 if (p == le64toh(f->header->tail_object_offset))
1719 p = 0;
1720 else
1721 p = p + ALIGN64(le64toh(o->object.size));
1722 }
1723
1724 return;
1725fail:
1726 log_error("File corrupt");
1727}
1728
1729int journal_file_open(
1730 const char *fname,
1731 int flags,
1732 mode_t mode,
1733 JournalFile *template,
1734 JournalFile **ret) {
1735
1736 JournalFile *f;
1737 int r;
1738 bool newly_created = false;
1739
1740 assert(fname);
1741
1742 if ((flags & O_ACCMODE) != O_RDONLY &&
1743 (flags & O_ACCMODE) != O_RDWR)
1744 return -EINVAL;
1745
1746 if (!endswith(fname, ".journal"))
1747 return -EINVAL;
1748
1749 f = new0(JournalFile, 1);
1750 if (!f)
1751 return -ENOMEM;
1752
1753 f->fd = -1;
1754 f->flags = flags;
1755 f->mode = mode;
1756 f->writable = (flags & O_ACCMODE) != O_RDONLY;
1757 f->prot = prot_from_flags(flags);
1758
1759 if (template) {
1760 f->metrics = template->metrics;
1761 f->compress = template->compress;
1762 }
1763
1764 f->path = strdup(fname);
1765 if (!f->path) {
1766 r = -ENOMEM;
1767 goto fail;
1768 }
1769
1770 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
1771 if (f->fd < 0) {
1772 r = -errno;
1773 goto fail;
1774 }
1775
1776 if (fstat(f->fd, &f->last_stat) < 0) {
1777 r = -errno;
1778 goto fail;
1779 }
1780
1781 if (f->last_stat.st_size == 0 && f->writable) {
1782 newly_created = true;
1783
1784 r = journal_file_init_header(f, template);
1785 if (r < 0)
1786 goto fail;
1787
1788 if (fstat(f->fd, &f->last_stat) < 0) {
1789 r = -errno;
1790 goto fail;
1791 }
1792 }
1793
1794 if (f->last_stat.st_size < (off_t) sizeof(Header)) {
1795 r = -EIO;
1796 goto fail;
1797 }
1798
1799 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
1800 if (f->header == MAP_FAILED) {
1801 f->header = NULL;
1802 r = -errno;
1803 goto fail;
1804 }
1805
1806 if (!newly_created) {
1807 r = journal_file_verify_header(f);
1808 if (r < 0)
1809 goto fail;
1810 }
1811
1812 if (f->writable) {
1813 r = journal_file_refresh_header(f);
1814 if (r < 0)
1815 goto fail;
1816 }
1817
1818 if (newly_created) {
1819
1820 r = journal_file_setup_field_hash_table(f);
1821 if (r < 0)
1822 goto fail;
1823
1824 r = journal_file_setup_data_hash_table(f);
1825 if (r < 0)
1826 goto fail;
1827 }
1828
1829 r = journal_file_map_field_hash_table(f);
1830 if (r < 0)
1831 goto fail;
1832
1833 r = journal_file_map_data_hash_table(f);
1834 if (r < 0)
1835 goto fail;
1836
1837 if (ret)
1838 *ret = f;
1839
1840 return 0;
1841
1842fail:
1843 journal_file_close(f);
1844
1845 return r;
1846}
1847
1848int journal_file_rotate(JournalFile **f) {
1849 char *p;
1850 size_t l;
1851 JournalFile *old_file, *new_file = NULL;
1852 int r;
1853
1854 assert(f);
1855 assert(*f);
1856
1857 old_file = *f;
1858
1859 if (!old_file->writable)
1860 return -EINVAL;
1861
1862 if (!endswith(old_file->path, ".journal"))
1863 return -EINVAL;
1864
1865 l = strlen(old_file->path);
1866
1867 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
1868 if (!p)
1869 return -ENOMEM;
1870
1871 memcpy(p, old_file->path, l - 8);
1872 p[l-8] = '@';
1873 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
1874 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
1875 "-%016llx-%016llx.journal",
1876 (unsigned long long) le64toh((*f)->header->seqnum),
1877 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
1878
1879 r = rename(old_file->path, p);
1880 free(p);
1881
1882 if (r < 0)
1883 return -errno;
1884
1885 old_file->header->state = STATE_ARCHIVED;
1886
1887 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, old_file, &new_file);
1888 journal_file_close(old_file);
1889
1890 *f = new_file;
1891 return r;
1892}
1893
1894int journal_file_open_reliably(
1895 const char *fname,
1896 int flags,
1897 mode_t mode,
1898 JournalFile *template,
1899 JournalFile **ret) {
1900
1901 int r;
1902 size_t l;
1903 char *p;
1904
1905 r = journal_file_open(fname, flags, mode, template, ret);
1906 if (r != -EBADMSG && /* corrupted */
1907 r != -ENODATA && /* truncated */
1908 r != -EHOSTDOWN && /* other machine */
1909 r != -EPROTONOSUPPORT) /* incompatible feature */
1910 return r;
1911
1912 if ((flags & O_ACCMODE) == O_RDONLY)
1913 return r;
1914
1915 if (!(flags & O_CREAT))
1916 return r;
1917
1918 /* The file is corrupted. Rotate it away and try it again (but only once) */
1919
1920 l = strlen(fname);
1921 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
1922 (int) (l-8), fname,
1923 (unsigned long long) now(CLOCK_REALTIME),
1924 random_ull()) < 0)
1925 return -ENOMEM;
1926
1927 r = rename(fname, p);
1928 free(p);
1929 if (r < 0)
1930 return -errno;
1931
1932 log_warning("File %s corrupted, renaming and replacing.", fname);
1933
1934 return journal_file_open(fname, flags, mode, template, ret);
1935}
1936
1937struct vacuum_info {
1938 off_t usage;
1939 char *filename;
1940
1941 uint64_t realtime;
1942 sd_id128_t seqnum_id;
1943 uint64_t seqnum;
1944
1945 bool have_seqnum;
1946};
1947
1948static int vacuum_compare(const void *_a, const void *_b) {
1949 const struct vacuum_info *a, *b;
1950
1951 a = _a;
1952 b = _b;
1953
1954 if (a->have_seqnum && b->have_seqnum &&
1955 sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
1956 if (a->seqnum < b->seqnum)
1957 return -1;
1958 else if (a->seqnum > b->seqnum)
1959 return 1;
1960 else
1961 return 0;
1962 }
1963
1964 if (a->realtime < b->realtime)
1965 return -1;
1966 else if (a->realtime > b->realtime)
1967 return 1;
1968 else if (a->have_seqnum && b->have_seqnum)
1969 return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
1970 else
1971 return strcmp(a->filename, b->filename);
1972}
1973
1974int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
1975 DIR *d;
1976 int r = 0;
1977 struct vacuum_info *list = NULL;
1978 unsigned n_list = 0, n_allocated = 0, i;
1979 uint64_t sum = 0;
1980
1981 assert(directory);
1982
1983 if (max_use <= 0)
1984 return 0;
1985
1986 d = opendir(directory);
1987 if (!d)
1988 return -errno;
1989
1990 for (;;) {
1991 int k;
1992 struct dirent buf, *de;
1993 size_t q;
1994 struct stat st;
1995 char *p;
1996 unsigned long long seqnum = 0, realtime;
1997 sd_id128_t seqnum_id;
1998 bool have_seqnum;
1999
2000 k = readdir_r(d, &buf, &de);
2001 if (k != 0) {
2002 r = -k;
2003 goto finish;
2004 }
2005
2006 if (!de)
2007 break;
2008
2009 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
2010 continue;
2011
2012 if (!S_ISREG(st.st_mode))
2013 continue;
2014
2015 q = strlen(de->d_name);
2016
2017 if (endswith(de->d_name, ".journal")) {
2018
2019 /* Vacuum archived files */
2020
2021 if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
2022 continue;
2023
2024 if (de->d_name[q-8-16-1] != '-' ||
2025 de->d_name[q-8-16-1-16-1] != '-' ||
2026 de->d_name[q-8-16-1-16-1-32-1] != '@')
2027 continue;
2028
2029 p = strdup(de->d_name);
2030 if (!p) {
2031 r = -ENOMEM;
2032 goto finish;
2033 }
2034
2035 de->d_name[q-8-16-1-16-1] = 0;
2036 if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
2037 free(p);
2038 continue;
2039 }
2040
2041 if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
2042 free(p);
2043 continue;
2044 }
2045
2046 have_seqnum = true;
2047
2048 } else if (endswith(de->d_name, ".journal~")) {
2049 unsigned long long tmp;
2050
2051 /* Vacuum corrupted files */
2052
2053 if (q < 1 + 16 + 1 + 16 + 8 + 1)
2054 continue;
2055
2056 if (de->d_name[q-1-8-16-1] != '-' ||
2057 de->d_name[q-1-8-16-1-16-1] != '@')
2058 continue;
2059
2060 p = strdup(de->d_name);
2061 if (!p) {
2062 r = -ENOMEM;
2063 goto finish;
2064 }
2065
2066 if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
2067 free(p);
2068 continue;
2069 }
2070
2071 have_seqnum = false;
2072 } else
2073 continue;
2074
2075 if (n_list >= n_allocated) {
2076 struct vacuum_info *j;
2077
2078 n_allocated = MAX(n_allocated * 2U, 8U);
2079 j = realloc(list, n_allocated * sizeof(struct vacuum_info));
2080 if (!j) {
2081 free(p);
2082 r = -ENOMEM;
2083 goto finish;
2084 }
2085
2086 list = j;
2087 }
2088
2089 list[n_list].filename = p;
2090 list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
2091 list[n_list].seqnum = seqnum;
2092 list[n_list].realtime = realtime;
2093 list[n_list].seqnum_id = seqnum_id;
2094 list[n_list].have_seqnum = have_seqnum;
2095
2096 sum += list[n_list].usage;
2097
2098 n_list ++;
2099 }
2100
2101 qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
2102
2103 for(i = 0; i < n_list; i++) {
2104 struct statvfs ss;
2105
2106 if (fstatvfs(dirfd(d), &ss) < 0) {
2107 r = -errno;
2108 goto finish;
2109 }
2110
2111 if (sum <= max_use &&
2112 (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
2113 break;
2114
2115 if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
2116 log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
2117 sum -= list[i].usage;
2118 } else if (errno != ENOENT)
2119 log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
2120 }
2121
2122finish:
2123 for (i = 0; i < n_list; i++)
2124 free(list[i].filename);
2125
2126 free(list);
2127
2128 if (d)
2129 closedir(d);
2130
2131 return r;
2132}
2133
2134int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2135 uint64_t i, n;
2136 uint64_t q, xor_hash = 0;
2137 int r;
2138 EntryItem *items;
2139 dual_timestamp ts;
2140
2141 assert(from);
2142 assert(to);
2143 assert(o);
2144 assert(p);
2145
2146 if (!to->writable)
2147 return -EPERM;
2148
2149 ts.monotonic = le64toh(o->entry.monotonic);
2150 ts.realtime = le64toh(o->entry.realtime);
2151
2152 if (to->tail_entry_monotonic_valid &&
2153 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2154 return -EINVAL;
2155
2156 if (ts.realtime < le64toh(to->header->tail_entry_realtime))
2157 return -EINVAL;
2158
2159 n = journal_file_entry_n_items(o);
2160 items = alloca(sizeof(EntryItem) * n);
2161
2162 for (i = 0; i < n; i++) {
2163 uint64_t l, h;
2164 le64_t le_hash;
2165 size_t t;
2166 void *data;
2167 Object *u;
2168
2169 q = le64toh(o->entry.items[i].object_offset);
2170 le_hash = o->entry.items[i].hash;
2171
2172 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2173 if (r < 0)
2174 return r;
2175
2176 if (le_hash != o->data.hash)
2177 return -EBADMSG;
2178
2179 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2180 t = (size_t) l;
2181
2182 /* We hit the limit on 32bit machines */
2183 if ((uint64_t) t != l)
2184 return -E2BIG;
2185
2186 if (o->object.flags & OBJECT_COMPRESSED) {
2187#ifdef HAVE_XZ
2188 uint64_t rsize;
2189
2190 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2191 return -EBADMSG;
2192
2193 data = from->compress_buffer;
2194 l = rsize;
2195#else
2196 return -EPROTONOSUPPORT;
2197#endif
2198 } else
2199 data = o->data.payload;
2200
2201 r = journal_file_append_data(to, data, l, &u, &h);
2202 if (r < 0)
2203 return r;
2204
2205 xor_hash ^= le64toh(u->data.hash);
2206 items[i].object_offset = htole64(h);
2207 items[i].hash = u->data.hash;
2208
2209 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2210 if (r < 0)
2211 return r;
2212 }
2213
2214 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2215}
2216
2217void journal_default_metrics(JournalMetrics *m, int fd) {
2218 uint64_t fs_size = 0;
2219 struct statvfs ss;
2220 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2221
2222 assert(m);
2223 assert(fd >= 0);
2224
2225 if (fstatvfs(fd, &ss) >= 0)
2226 fs_size = ss.f_frsize * ss.f_blocks;
2227
2228 if (m->max_use == (uint64_t) -1) {
2229
2230 if (fs_size > 0) {
2231 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2232
2233 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2234 m->max_use = DEFAULT_MAX_USE_UPPER;
2235
2236 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2237 m->max_use = DEFAULT_MAX_USE_LOWER;
2238 } else
2239 m->max_use = DEFAULT_MAX_USE_LOWER;
2240 } else {
2241 m->max_use = PAGE_ALIGN(m->max_use);
2242
2243 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2244 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2245 }
2246
2247 if (m->max_size == (uint64_t) -1) {
2248 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2249
2250 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2251 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2252 } else
2253 m->max_size = PAGE_ALIGN(m->max_size);
2254
2255 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2256 m->max_size = JOURNAL_FILE_SIZE_MIN;
2257
2258 if (m->max_size*2 > m->max_use)
2259 m->max_use = m->max_size*2;
2260
2261 if (m->min_size == (uint64_t) -1)
2262 m->min_size = JOURNAL_FILE_SIZE_MIN;
2263 else {
2264 m->min_size = PAGE_ALIGN(m->min_size);
2265
2266 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2267 m->min_size = JOURNAL_FILE_SIZE_MIN;
2268
2269 if (m->min_size > m->max_size)
2270 m->max_size = m->min_size;
2271 }
2272
2273 if (m->keep_free == (uint64_t) -1) {
2274
2275 if (fs_size > 0) {
2276 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2277
2278 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2279 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2280
2281 } else
2282 m->keep_free = DEFAULT_KEEP_FREE;
2283 }
2284
2285 log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2286 format_bytes(a, sizeof(a), m->max_use),
2287 format_bytes(b, sizeof(b), m->max_size),
2288 format_bytes(c, sizeof(c), m->min_size),
2289 format_bytes(d, sizeof(d), m->keep_free));
2290}
2291
2292int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2293 Object *o;
2294 int r;
2295
2296 assert(f);
2297 assert(from || to);
2298
2299 if (from) {
2300 r = journal_file_next_entry(f, NULL, 0, DIRECTION_DOWN, &o, NULL);
2301 if (r <= 0)
2302 return r;
2303
2304 *from = le64toh(o->entry.realtime);
2305 }
2306
2307 if (to) {
2308 r = journal_file_next_entry(f, NULL, 0, DIRECTION_UP, &o, NULL);
2309 if (r <= 0)
2310 return r;
2311
2312 *to = le64toh(o->entry.realtime);
2313 }
2314
2315 return 1;
2316}
2317
2318int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2319 char t[9+32+1] = "_BOOT_ID=";
2320 Object *o;
2321 uint64_t p;
2322 int r;
2323
2324 assert(f);
2325 assert(from || to);
2326
2327 sd_id128_to_string(boot_id, t + 9);
2328
2329 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2330 if (r <= 0)
2331 return r;
2332
2333 if (le64toh(o->data.n_entries) <= 0)
2334 return 0;
2335
2336 if (from) {
2337 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2338 if (r < 0)
2339 return r;
2340
2341 *from = le64toh(o->entry.monotonic);
2342 }
2343
2344 if (to) {
2345 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2346 if (r < 0)
2347 return r;
2348
2349 r = generic_array_get_plus_one(f,
2350 le64toh(o->data.entry_offset),
2351 le64toh(o->data.entry_array_offset),
2352 le64toh(o->data.n_entries)-1,
2353 &o, NULL);
2354 if (r <= 0)
2355 return r;
2356
2357 *to = le64toh(o->entry.monotonic);
2358 }
2359
2360 return 1;
2361}