]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
journal: fix field retrieval by name
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
30#include "journal-def.h"
31#include "journal-file.h"
32#include "lookup3.h"
33
34#define DEFAULT_ARENA_MAX_SIZE (16ULL*1024ULL*1024ULL*1024ULL)
35#define DEFAULT_ARENA_MIN_SIZE (256ULL*1024ULL)
36#define DEFAULT_ARENA_KEEP_FREE (1ULL*1024ULL*1024ULL)
37
0ac38b70
LP
38#define DEFAULT_MAX_USE (16ULL*1024ULL*1024ULL*16ULL)
39
cec736d2
LP
40#define DEFAULT_HASH_TABLE_SIZE (2047ULL*16ULL)
41#define DEFAULT_BISECT_TABLE_SIZE ((DEFAULT_ARENA_MAX_SIZE/(64ULL*1024ULL))*8ULL)
42
43#define DEFAULT_WINDOW_SIZE (128ULL*1024ULL*1024ULL)
44
45static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' };
46
47#define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
48
49void journal_file_close(JournalFile *f) {
50 assert(f);
51
0ac38b70
LP
52 if (f->header) {
53 if (f->writable && f->header->state == htole32(STATE_ONLINE))
54 f->header->state = htole32(STATE_OFFLINE);
cec736d2 55
cec736d2 56 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
0ac38b70 57 }
cec736d2
LP
58
59 if (f->hash_table_window)
60 munmap(f->hash_table_window, f->hash_table_window_size);
61
62 if (f->bisect_table_window)
63 munmap(f->bisect_table_window, f->bisect_table_window_size);
64
65 if (f->window)
66 munmap(f->window, f->window_size);
67
0ac38b70
LP
68 if (f->fd >= 0)
69 close_nointr_nofail(f->fd);
70
cec736d2
LP
71 free(f->path);
72 free(f);
73}
74
0ac38b70 75static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
76 Header h;
77 ssize_t k;
78 int r;
79
80 assert(f);
81
82 zero(h);
83 memcpy(h.signature, signature, 8);
84 h.arena_offset = htole64(ALIGN64(sizeof(h)));
85 h.arena_max_size = htole64(DEFAULT_ARENA_MAX_SIZE);
86 h.arena_min_size = htole64(DEFAULT_ARENA_MIN_SIZE);
87 h.arena_keep_free = htole64(DEFAULT_ARENA_KEEP_FREE);
88
89 r = sd_id128_randomize(&h.file_id);
90 if (r < 0)
91 return r;
92
0ac38b70
LP
93 if (template) {
94 h.seqnum_id = template->header->seqnum_id;
95 h.seqnum = template->header->seqnum;
96 } else
97 h.seqnum_id = h.file_id;
cec736d2
LP
98
99 k = pwrite(f->fd, &h, sizeof(h), 0);
100 if (k < 0)
101 return -errno;
102
103 if (k != sizeof(h))
104 return -EIO;
105
106 return 0;
107}
108
109static int journal_file_refresh_header(JournalFile *f) {
110 int r;
111
112 assert(f);
113
114 r = sd_id128_get_machine(&f->header->machine_id);
115 if (r < 0)
116 return r;
117
118 r = sd_id128_get_boot(&f->header->boot_id);
119 if (r < 0)
120 return r;
121
122 f->header->state = htole32(STATE_ONLINE);
123 return 0;
124}
125
126static int journal_file_verify_header(JournalFile *f) {
127 assert(f);
128
129 if (memcmp(f->header, signature, 8))
130 return -EBADMSG;
131
132 if (f->header->incompatible_flags != 0)
133 return -EPROTONOSUPPORT;
134
135 if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->arena_offset) + le64toh(f->header->arena_size)))
136 return -ENODATA;
137
138 if (f->writable) {
139 uint32_t state;
140 sd_id128_t machine_id;
141 int r;
142
143 r = sd_id128_get_machine(&machine_id);
144 if (r < 0)
145 return r;
146
147 if (!sd_id128_equal(machine_id, f->header->machine_id))
148 return -EHOSTDOWN;
149
150 state = le32toh(f->header->state);
151
152 if (state == STATE_ONLINE)
153 log_debug("Journal file %s is already online. Assuming unclean closing. Ignoring.", f->path);
154 else if (state == STATE_ARCHIVED)
155 return -ESHUTDOWN;
156 else if (state != STATE_OFFLINE)
157 log_debug("Journal file %s has unknown state %u. Ignoring.", f->path, state);
158 }
159
160 return 0;
161}
162
163static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
164 uint64_t asize;
165 uint64_t old_size, new_size;
166
167 assert(f);
168
169 if (offset < le64toh(f->header->arena_offset))
170 return -EINVAL;
171
172 new_size = PAGE_ALIGN(offset + size);
173
174 /* We assume that this file is not sparse, and we know that
38ac38b2 175 * for sure, since we always call posix_fallocate()
cec736d2
LP
176 * ourselves */
177
178 old_size =
179 le64toh(f->header->arena_offset) +
180 le64toh(f->header->arena_size);
181
182 if (old_size >= new_size)
183 return 0;
184
185 asize = new_size - le64toh(f->header->arena_offset);
186
187 if (asize > le64toh(f->header->arena_min_size)) {
188 struct statvfs svfs;
189
190 if (fstatvfs(f->fd, &svfs) >= 0) {
191 uint64_t available;
192
193 available = svfs.f_bfree * svfs.f_bsize;
194
195 if (available >= f->header->arena_keep_free)
196 available -= f->header->arena_keep_free;
197 else
198 available = 0;
199
200 if (new_size - old_size > available)
201 return -E2BIG;
202 }
203 }
204
205 if (asize > le64toh(f->header->arena_max_size))
206 return -E2BIG;
207
38ac38b2 208 if (posix_fallocate(f->fd, old_size, new_size - old_size) < 0)
cec736d2
LP
209 return -errno;
210
211 if (fstat(f->fd, &f->last_stat) < 0)
212 return -errno;
213
214 f->header->arena_size = htole64(asize);
215
216 return 0;
217}
218
219static int journal_file_map(
220 JournalFile *f,
221 uint64_t offset,
222 uint64_t size,
223 void **_window,
224 uint64_t *_woffset,
225 uint64_t *_wsize,
226 void **ret) {
227
228 uint64_t woffset, wsize;
229 void *window;
230
231 assert(f);
232 assert(size > 0);
233 assert(ret);
234
235 woffset = offset & ~((uint64_t) page_size() - 1ULL);
236 wsize = size + (offset - woffset);
237 wsize = PAGE_ALIGN(wsize);
238
239 window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset);
240 if (window == MAP_FAILED)
241 return -errno;
242
243 if (_window)
244 *_window = window;
245
246 if (_woffset)
247 *_woffset = woffset;
248
249 if (_wsize)
250 *_wsize = wsize;
251
252 *ret = (uint8_t*) window + (offset - woffset);
253
254 return 0;
255}
256
257static int journal_file_move_to(JournalFile *f, uint64_t offset, uint64_t size, void **ret) {
258 void *p;
259 uint64_t delta;
260 int r;
261
262 assert(f);
263 assert(ret);
264
265 if (_likely_(f->window &&
266 f->window_offset <= offset &&
267 f->window_offset+f->window_size >= offset + size)) {
268
269 *ret = (uint8_t*) f->window + (offset - f->window_offset);
270 return 0;
271 }
272
273 if (f->window) {
274 if (munmap(f->window, f->window_size) < 0)
275 return -errno;
276
277 f->window = NULL;
278 f->window_size = f->window_offset = 0;
279 }
280
281 if (size < DEFAULT_WINDOW_SIZE) {
282 /* If the default window size is larger then what was
283 * asked for extend the mapping a bit in the hope to
284 * minimize needed remappings later on. We add half
285 * the window space before and half behind the
286 * requested mapping */
287
288 delta = PAGE_ALIGN((DEFAULT_WINDOW_SIZE - size) / 2);
289
290 if (offset < delta)
291 delta = offset;
292
293 offset -= delta;
294 size += (DEFAULT_WINDOW_SIZE - delta);
295 } else
296 delta = 0;
297
298 r = journal_file_map(f,
299 offset, size,
300 &f->window, &f->window_offset, &f->window_size,
301 & p);
302
303 if (r < 0)
304 return r;
305
306 *ret = (uint8_t*) p + delta;
307 return 0;
308}
309
310static bool verify_hash(Object *o) {
311 uint64_t t;
312
313 assert(o);
314
315 t = le64toh(o->object.type);
316 if (t == OBJECT_DATA) {
317 uint64_t s, h1, h2;
318
319 s = le64toh(o->object.size);
320
321 h1 = le64toh(o->data.hash);
322 h2 = hash64(o->data.payload, s - offsetof(Object, data.payload));
323
324 return h1 == h2;
325 }
326
327 return true;
328}
329
330int journal_file_move_to_object(JournalFile *f, uint64_t offset, int type, Object **ret) {
331 int r;
332 void *t;
333 Object *o;
334 uint64_t s;
335
336 assert(f);
337 assert(ret);
338
339 r = journal_file_move_to(f, offset, sizeof(ObjectHeader), &t);
340 if (r < 0)
341 return r;
342
343 o = (Object*) t;
344 s = le64toh(o->object.size);
345
346 if (s < sizeof(ObjectHeader))
347 return -EBADMSG;
348
349 if (type >= 0 && le64toh(o->object.type) != type)
350 return -EBADMSG;
351
352 if (s > sizeof(ObjectHeader)) {
353 r = journal_file_move_to(f, offset, s, &t);
354 if (r < 0)
355 return r;
356
357 o = (Object*) t;
358 }
359
360 if (!verify_hash(o))
361 return -EBADMSG;
362
363 *ret = o;
364 return 0;
365}
366
367static uint64_t journal_file_seqnum(JournalFile *f) {
368 uint64_t r;
369
370 assert(f);
371
372 r = le64toh(f->header->seqnum) + 1;
373 f->header->seqnum = htole64(r);
374
375 return r;
376}
377
378static int journal_file_append_object(JournalFile *f, uint64_t size, Object **ret, uint64_t *offset) {
379 int r;
380 uint64_t p;
381 Object *tail, *o;
382 void *t;
383
384 assert(f);
385 assert(size >= sizeof(ObjectHeader));
386 assert(offset);
387 assert(ret);
388
389 p = le64toh(f->header->tail_object_offset);
390
391 if (p == 0)
392 p = le64toh(f->header->arena_offset);
393 else {
394 r = journal_file_move_to_object(f, p, -1, &tail);
395 if (r < 0)
396 return r;
397
398 p += ALIGN64(le64toh(tail->object.size));
399 }
400
401 r = journal_file_allocate(f, p, size);
402 if (r < 0)
403 return r;
404
405 r = journal_file_move_to(f, p, size, &t);
406 if (r < 0)
407 return r;
408
409 o = (Object*) t;
410
411 zero(o->object);
412 o->object.type = htole64(OBJECT_UNUSED);
413 zero(o->object.reserved);
414 o->object.size = htole64(size);
415
416 f->header->tail_object_offset = htole64(p);
417 if (f->header->head_object_offset == 0)
418 f->header->head_object_offset = htole64(p);
419
420 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
421
422 *ret = o;
423 *offset = p;
424
425 return 0;
426}
427
428static int journal_file_setup_hash_table(JournalFile *f) {
429 uint64_t s, p;
430 Object *o;
431 int r;
432
433 assert(f);
434
435 s = DEFAULT_HASH_TABLE_SIZE;
436 r = journal_file_append_object(f, offsetof(Object, hash_table.table) + s, &o, &p);
437 if (r < 0)
438 return r;
439
440 o->object.type = htole64(OBJECT_HASH_TABLE);
441 memset(o->hash_table.table, 0, s);
442
443 f->header->hash_table_offset = htole64(p + offsetof(Object, hash_table.table));
444 f->header->hash_table_size = htole64(s);
445
446 return 0;
447}
448
449static int journal_file_setup_bisect_table(JournalFile *f) {
450 uint64_t s, p;
451 Object *o;
452 int r;
453
454 assert(f);
455
456 s = DEFAULT_BISECT_TABLE_SIZE;
457 r = journal_file_append_object(f, offsetof(Object, bisect_table.table) + s, &o, &p);
458 if (r < 0)
459 return r;
460
461 o->object.type = htole64(OBJECT_BISECT_TABLE);
462 memset(o->bisect_table.table, 0, s);
463
464 f->header->bisect_table_offset = htole64(p + offsetof(Object, bisect_table.table));
465 f->header->bisect_table_size = htole64(s);
466
467 return 0;
468}
469
470static int journal_file_map_hash_table(JournalFile *f) {
471 uint64_t s, p;
472 void *t;
473 int r;
474
475 assert(f);
476
477 p = le64toh(f->header->hash_table_offset);
478 s = le64toh(f->header->hash_table_size);
479
480 r = journal_file_map(f,
481 p, s,
482 &f->hash_table_window, NULL, &f->hash_table_window_size,
483 &t);
484 if (r < 0)
485 return r;
486
487 f->hash_table = t;
488 return 0;
489}
490
491static int journal_file_map_bisect_table(JournalFile *f) {
492 uint64_t s, p;
493 void *t;
494 int r;
495
496 assert(f);
497
498 p = le64toh(f->header->bisect_table_offset);
499 s = le64toh(f->header->bisect_table_size);
500
501 r = journal_file_map(f,
502 p, s,
503 &f->bisect_table_window, NULL, &f->bisect_table_window_size,
504 &t);
505
506 if (r < 0)
507 return r;
508
509 f->bisect_table = t;
510 return 0;
511}
512
513static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash_index) {
514 uint64_t p;
515 int r;
516
517 assert(f);
518 assert(o);
519 assert(offset > 0);
520 assert(o->object.type == htole64(OBJECT_DATA));
521
522 o->data.head_entry_offset = o->data.tail_entry_offset = 0;
523 o->data.next_hash_offset = 0;
524
525 p = le64toh(f->hash_table[hash_index].tail_hash_offset);
526 if (p == 0) {
527 /* Only entry in the hash table is easy */
528
529 o->data.prev_hash_offset = 0;
530 f->hash_table[hash_index].head_hash_offset = htole64(offset);
531 } else {
532 o->data.prev_hash_offset = htole64(p);
533
534 /* Temporarily move back to the previous data object,
535 * to patch in pointer */
536
537 r = journal_file_move_to_object(f, p, OBJECT_DATA, &o);
538 if (r < 0)
539 return r;
540
541 o->data.next_hash_offset = offset;
542
543 r = journal_file_move_to_object(f, offset, OBJECT_DATA, &o);
544 if (r < 0)
545 return r;
546 }
547
548 f->hash_table[hash_index].tail_hash_offset = htole64(offset);
549
550 return 0;
551}
552
553static int journal_file_append_data(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) {
554 uint64_t hash, h, p, np;
555 uint64_t osize;
556 Object *o;
557 int r;
558
559 assert(f);
560 assert(data || size == 0);
561
562 osize = offsetof(Object, data.payload) + size;
563
564 hash = hash64(data, size);
565 h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem));
566 p = le64toh(f->hash_table[h].head_hash_offset);
567
568 while (p != 0) {
569 /* Look for this data object in the hash table */
570
571 r = journal_file_move_to_object(f, p, OBJECT_DATA, &o);
572 if (r < 0)
573 return r;
574
575 if (le64toh(o->object.size) == osize &&
576 memcmp(o->data.payload, data, size) == 0) {
577
578 if (le64toh(o->data.hash) != hash)
579 return -EBADMSG;
580
581 if (ret)
582 *ret = o;
583
584 if (offset)
585 *offset = p;
586
587 return 0;
588 }
589
590 p = le64toh(o->data.next_hash_offset);
591 }
592
593 r = journal_file_append_object(f, osize, &o, &np);
594 if (r < 0)
595 return r;
596
597 o->object.type = htole64(OBJECT_DATA);
598 o->data.hash = htole64(hash);
599 memcpy(o->data.payload, data, size);
600
601 r = journal_file_link_data(f, o, np, h);
602 if (r < 0)
603 return r;
604
605 if (ret)
606 *ret = o;
607
608 if (offset)
609 *offset = np;
610
611 return 0;
612}
613
614uint64_t journal_file_entry_n_items(Object *o) {
615 assert(o);
616 assert(o->object.type == htole64(OBJECT_ENTRY));
617
618 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
619}
620
621static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
622 uint64_t p, q;
623 int r;
624 assert(f);
625 assert(o);
626 assert(offset > 0);
627
628 p = le64toh(o->entry.items[i].object_offset);
629 if (p == 0)
630 return -EINVAL;
631
632 o->entry.items[i].next_entry_offset = 0;
633
634 /* Move to the data object */
635 r = journal_file_move_to_object(f, p, OBJECT_DATA, &o);
636 if (r < 0)
637 return r;
638
639 q = le64toh(o->data.tail_entry_offset);
640 o->data.tail_entry_offset = htole64(offset);
641
642 if (q == 0)
643 o->data.head_entry_offset = htole64(offset);
644 else {
645 uint64_t n, j;
646
647 /* Move to previous entry */
648 r = journal_file_move_to_object(f, q, OBJECT_ENTRY, &o);
649 if (r < 0)
650 return r;
651
652 n = journal_file_entry_n_items(o);
653 for (j = 0; j < n; j++)
654 if (le64toh(o->entry.items[j].object_offset) == p)
655 break;
656
657 if (j >= n)
658 return -EBADMSG;
659
660 o->entry.items[j].next_entry_offset = offset;
661 }
662
663 /* Move back to original entry */
664 r = journal_file_move_to_object(f, offset, OBJECT_ENTRY, &o);
665 if (r < 0)
666 return r;
667
668 o->entry.items[i].prev_entry_offset = q;
669 return 0;
670}
671
672static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
673 uint64_t p, i, n, k, a, b;
674 int r;
675
676 assert(f);
677 assert(o);
678 assert(offset > 0);
679 assert(o->object.type == htole64(OBJECT_ENTRY));
680
681 /* Link up the entry itself */
682 p = le64toh(f->header->tail_entry_offset);
683
684 o->entry.prev_entry_offset = f->header->tail_entry_offset;
685 o->entry.next_entry_offset = 0;
686
0ac38b70 687 if (p == 0) {
cec736d2 688 f->header->head_entry_offset = htole64(offset);
0ac38b70
LP
689 f->header->head_entry_realtime = o->entry.realtime;
690 } else {
cec736d2
LP
691 /* Temporarily move back to the previous entry, to
692 * patch in pointer */
693
694 r = journal_file_move_to_object(f, p, OBJECT_ENTRY, &o);
695 if (r < 0)
696 return r;
697
698 o->entry.next_entry_offset = htole64(offset);
699
700 r = journal_file_move_to_object(f, offset, OBJECT_ENTRY, &o);
701 if (r < 0)
702 return r;
703 }
704
705 f->header->tail_entry_offset = htole64(offset);
0ac38b70 706 f->header->tail_entry_realtime = o->entry.realtime;
cec736d2
LP
707
708 /* Link up the items */
709 n = journal_file_entry_n_items(o);
710 for (i = 0; i < n; i++) {
711 r = journal_file_link_entry_item(f, o, offset, i);
712 if (r < 0)
713 return r;
714 }
715
716 /* Link up the entry in the bisect table */
717 n = le64toh(f->header->bisect_table_size) / sizeof(uint64_t);
718 k = le64toh(f->header->arena_max_size) / n;
719
720 a = (le64toh(f->header->last_bisect_offset) + k - 1) / k;
721 b = offset / k;
722
723 for (; a <= b; a++)
724 f->bisect_table[a] = htole64(offset);
725
726 f->header->last_bisect_offset = htole64(offset + le64toh(o->object.size));
727
728 return 0;
729}
730
731static int journal_file_append_entry_internal(
732 JournalFile *f,
733 const dual_timestamp *ts,
734 uint64_t xor_hash,
735 const EntryItem items[], unsigned n_items,
736 Object **ret, uint64_t *offset) {
737 uint64_t np;
738 uint64_t osize;
739 Object *o;
740 int r;
741
742 assert(f);
743 assert(items || n_items == 0);
744
745 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
746
747 r = journal_file_append_object(f, osize, &o, &np);
748 if (r < 0)
749 return r;
750
751 o->object.type = htole64(OBJECT_ENTRY);
752 o->entry.seqnum = htole64(journal_file_seqnum(f));
753 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
3fbf9cbb
LP
754 o->entry.realtime = htole64(ts ? ts->realtime : now(CLOCK_REALTIME));
755 o->entry.monotonic = htole64(ts ? ts->monotonic : now(CLOCK_MONOTONIC));
cec736d2
LP
756 o->entry.xor_hash = htole64(xor_hash);
757 o->entry.boot_id = f->header->boot_id;
758
759 r = journal_file_link_entry(f, o, np);
760 if (r < 0)
761 return r;
762
763 if (ret)
764 *ret = o;
765
766 if (offset)
767 *offset = np;
768
769 return 0;
770}
771
772int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, Object **ret, uint64_t *offset) {
773 unsigned i;
774 EntryItem *items;
775 int r;
776 uint64_t xor_hash = 0;
777
778 assert(f);
779 assert(iovec || n_iovec == 0);
780
781 items = new(EntryItem, n_iovec);
782 if (!items)
783 return -ENOMEM;
784
785 for (i = 0; i < n_iovec; i++) {
786 uint64_t p;
787 Object *o;
788
789 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
790 if (r < 0)
791 goto finish;
792
793 xor_hash ^= le64toh(o->data.hash);
794 items[i].object_offset = htole64(p);
795 }
796
797 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, ret, offset);
798
799finish:
800 free(items);
801
802 return r;
803}
804
805int journal_file_move_to_entry(JournalFile *f, uint64_t seqnum, Object **ret, uint64_t *offset) {
806 Object *o;
807 uint64_t lower, upper, p, n, k;
808 int r;
809
810 assert(f);
811
812 n = le64toh(f->header->bisect_table_size) / sizeof(uint64_t);
813 k = le64toh(f->header->arena_max_size) / n;
814
815 lower = 0;
816 upper = le64toh(f->header->last_bisect_offset)/k+1;
817
818 while (lower < upper) {
819 k = (upper + lower) / 2;
820 p = le64toh(f->bisect_table[k]);
821
822 if (p == 0) {
823 upper = k;
824 continue;
825 }
826
827 r = journal_file_move_to_object(f, p, OBJECT_ENTRY, &o);
828 if (r < 0)
829 return r;
830
831 if (o->entry.seqnum == seqnum) {
832 if (ret)
833 *ret = o;
834
835 if (offset)
836 *offset = p;
837
838 return 1;
839 } else if (seqnum < o->entry.seqnum)
840 upper = k;
841 else if (seqnum > o->entry.seqnum)
842 lower = k+1;
843 }
844
845 assert(lower == upper);
846
847 if (lower <= 0)
848 return 0;
849
850 /* The object we are looking for is between
851 * bisect_table[lower-1] and bisect_table[lower] */
852
853 p = le64toh(f->bisect_table[lower-1]);
854
855 for (;;) {
856 r = journal_file_move_to_object(f, p, OBJECT_ENTRY, &o);
857 if (r < 0)
858 return r;
859
860 if (o->entry.seqnum == seqnum) {
861 if (ret)
862 *ret = o;
863
864 if (offset)
865 *offset = p;
866
867 return 1;
868
869 } if (seqnum < o->entry.seqnum)
870 return 0;
871
872 if (o->entry.next_entry_offset == 0)
873 return 0;
874
875 p = le64toh(o->entry.next_entry_offset);
876 }
877
878 return 0;
879}
880
881int journal_file_next_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset) {
882 uint64_t np;
883 int r;
884
885 assert(f);
886
887 if (!o)
888 np = le64toh(f->header->head_entry_offset);
889 else {
890 if (le64toh(o->object.type) != OBJECT_ENTRY)
891 return -EINVAL;
892
893 np = le64toh(o->entry.next_entry_offset);
894 }
895
896 if (np == 0)
897 return 0;
898
899 r = journal_file_move_to_object(f, np, OBJECT_ENTRY, &o);
900 if (r < 0)
901 return r;
902
903 if (ret)
904 *ret = o;
905
906 if (offset)
907 *offset = np;
908
909 return 1;
910}
911
912int journal_file_prev_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset) {
913 uint64_t np;
914 int r;
915
916 assert(f);
917
918 if (!o)
919 np = le64toh(f->header->tail_entry_offset);
920 else {
921 if (le64toh(o->object.type) != OBJECT_ENTRY)
922 return -EINVAL;
923
924 np = le64toh(o->entry.prev_entry_offset);
925 }
926
927 if (np == 0)
928 return 0;
929
930 r = journal_file_move_to_object(f, np, OBJECT_ENTRY, &o);
931 if (r < 0)
932 return r;
933
934 if (ret)
935 *ret = o;
936
937 if (offset)
938 *offset = np;
939
940 return 1;
941}
942
943int journal_file_find_first_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) {
944 uint64_t p, osize, hash, h;
945 int r;
946
947 assert(f);
948 assert(data || size == 0);
949
950 osize = offsetof(Object, data.payload) + size;
951
952 hash = hash64(data, size);
953 h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem));
954 p = le64toh(f->hash_table[h].head_hash_offset);
955
956 while (p != 0) {
957 Object *o;
958
959 r = journal_file_move_to_object(f, p, OBJECT_DATA, &o);
960 if (r < 0)
961 return r;
962
963 if (le64toh(o->object.size) == osize &&
964 memcmp(o->data.payload, data, size) == 0) {
965
966 if (le64toh(o->data.hash) != hash)
967 return -EBADMSG;
968
969 if (o->data.head_entry_offset == 0)
970 return 0;
971
972 p = le64toh(o->data.head_entry_offset);
973 r = journal_file_move_to_object(f, p, OBJECT_ENTRY, &o);
974 if (r < 0)
975 return r;
976
977 if (ret)
978 *ret = o;
979
980 if (offset)
981 *offset = p;
982
983 return 1;
984 }
985
986 p = le64toh(o->data.next_hash_offset);
987 }
988
989 return 0;
990}
991
992int journal_file_find_last_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) {
993 uint64_t p, osize, hash, h;
994 int r;
995
996 assert(f);
997 assert(data || size == 0);
998
999 osize = offsetof(Object, data.payload) + size;
1000
1001 hash = hash64(data, size);
1002 h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem));
1003 p = le64toh(f->hash_table[h].tail_hash_offset);
1004
1005 while (p != 0) {
1006 Object *o;
1007
1008 r = journal_file_move_to_object(f, p, OBJECT_DATA, &o);
1009 if (r < 0)
1010 return r;
1011
1012 if (le64toh(o->object.size) == osize &&
1013 memcmp(o->data.payload, data, size) == 0) {
1014
1015 if (le64toh(o->data.hash) != hash)
1016 return -EBADMSG;
1017
1018 if (o->data.tail_entry_offset == 0)
1019 return 0;
1020
1021 p = le64toh(o->data.tail_entry_offset);
1022 r = journal_file_move_to_object(f, p, OBJECT_ENTRY, &o);
1023 if (r < 0)
1024 return r;
1025
1026 if (ret)
1027 *ret = o;
1028
1029 if (offset)
1030 *offset = p;
1031
1032 return 1;
1033 }
1034
1035 p = le64toh(o->data.prev_hash_offset);
1036 }
1037
1038 return 0;
1039}
1040
1041void journal_file_dump(JournalFile *f) {
1042 char a[33], b[33], c[33];
1043 Object *o;
1044 int r;
1045 uint64_t p;
1046
1047 assert(f);
1048
1049 printf("File ID: %s\n"
1050 "Machine ID: %s\n"
1051 "Boot ID: %s\n"
1052 "Arena size: %llu\n",
1053 sd_id128_to_string(f->header->file_id, a),
1054 sd_id128_to_string(f->header->machine_id, b),
1055 sd_id128_to_string(f->header->boot_id, c),
1056 (unsigned long long) le64toh(f->header->arena_size));
1057
1058 p = le64toh(f->header->head_object_offset);
1059 while (p != 0) {
1060 r = journal_file_move_to_object(f, p, -1, &o);
1061 if (r < 0)
1062 goto fail;
1063
1064 switch (o->object.type) {
1065
1066 case OBJECT_UNUSED:
1067 printf("Type: OBJECT_UNUSED\n");
1068 break;
1069
1070 case OBJECT_DATA:
1071 printf("Type: OBJECT_DATA\n");
1072 break;
1073
1074 case OBJECT_ENTRY:
3fbf9cbb
LP
1075 printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
1076 (unsigned long long) le64toh(o->entry.seqnum),
1077 (unsigned long long) le64toh(o->entry.monotonic),
1078 (unsigned long long) le64toh(o->entry.realtime));
cec736d2
LP
1079 break;
1080
1081 case OBJECT_HASH_TABLE:
1082 printf("Type: OBJECT_HASH_TABLE\n");
1083 break;
1084
1085 case OBJECT_BISECT_TABLE:
1086 printf("Type: OBJECT_BISECT_TABLE\n");
1087 break;
1088 }
1089
1090 if (p == le64toh(f->header->tail_object_offset))
1091 p = 0;
1092 else
1093 p = p + ALIGN64(le64toh(o->object.size));
1094 }
1095
1096 return;
1097fail:
1098 log_error("File corrupt");
1099}
1100
1101int journal_file_open(
1102 const char *fname,
1103 int flags,
1104 mode_t mode,
0ac38b70 1105 JournalFile *template,
cec736d2
LP
1106 JournalFile **ret) {
1107
1108 JournalFile *f;
1109 int r;
1110 bool newly_created = false;
1111
1112 assert(fname);
1113
1114 if ((flags & O_ACCMODE) != O_RDONLY &&
1115 (flags & O_ACCMODE) != O_RDWR)
1116 return -EINVAL;
1117
1118 f = new0(JournalFile, 1);
1119 if (!f)
1120 return -ENOMEM;
1121
0ac38b70
LP
1122 f->fd = -1;
1123 f->flags = flags;
1124 f->mode = mode;
cec736d2
LP
1125 f->writable = (flags & O_ACCMODE) != O_RDONLY;
1126 f->prot = prot_from_flags(flags);
1127
cec736d2
LP
1128 f->path = strdup(fname);
1129 if (!f->path) {
1130 r = -ENOMEM;
1131 goto fail;
1132 }
1133
0ac38b70
LP
1134 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
1135 if (f->fd < 0) {
1136 r = -errno;
1137 goto fail;
1138 }
1139
cec736d2
LP
1140 if (fstat(f->fd, &f->last_stat) < 0) {
1141 r = -errno;
1142 goto fail;
1143 }
1144
1145 if (f->last_stat.st_size == 0 && f->writable) {
1146 newly_created = true;
1147
0ac38b70 1148 r = journal_file_init_header(f, template);
cec736d2
LP
1149 if (r < 0)
1150 goto fail;
1151
1152 if (fstat(f->fd, &f->last_stat) < 0) {
1153 r = -errno;
1154 goto fail;
1155 }
1156 }
1157
1158 if (f->last_stat.st_size < (off_t) sizeof(Header)) {
1159 r = -EIO;
1160 goto fail;
1161 }
1162
1163 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
1164 if (f->header == MAP_FAILED) {
1165 f->header = NULL;
1166 r = -errno;
1167 goto fail;
1168 }
1169
1170 if (!newly_created) {
1171 r = journal_file_verify_header(f);
1172 if (r < 0)
1173 goto fail;
1174 }
1175
1176 if (f->writable) {
1177 r = journal_file_refresh_header(f);
1178 if (r < 0)
1179 goto fail;
1180 }
1181
1182 if (newly_created) {
1183
1184 r = journal_file_setup_hash_table(f);
1185 if (r < 0)
1186 goto fail;
1187
1188 r = journal_file_setup_bisect_table(f);
1189 if (r < 0)
1190 goto fail;
1191 }
1192
1193 r = journal_file_map_hash_table(f);
1194 if (r < 0)
1195 goto fail;
1196
1197 r = journal_file_map_bisect_table(f);
1198 if (r < 0)
1199 goto fail;
1200
1201 if (ret)
1202 *ret = f;
1203
1204 return 0;
1205
1206fail:
1207 journal_file_close(f);
1208
1209 return r;
1210}
0ac38b70
LP
1211
1212int journal_file_rotate(JournalFile **f) {
1213 char *p;
1214 size_t l;
1215 JournalFile *old_file, *new_file = NULL;
1216 int r;
1217
1218 assert(f);
1219 assert(*f);
1220
1221 old_file = *f;
1222
1223 if (!old_file->writable)
1224 return -EINVAL;
1225
1226 if (!endswith(old_file->path, ".journal"))
1227 return -EINVAL;
1228
1229 l = strlen(old_file->path);
1230
1231 p = new(char, l + 1 + 16 + 1 + 32 + 1 + 16 + 1);
1232 if (!p)
1233 return -ENOMEM;
1234
1235 memcpy(p, old_file->path, l - 8);
1236 p[l-8] = '@';
1237 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
1238 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
1239 "-%016llx-%016llx.journal",
1240 (unsigned long long) le64toh((*f)->header->seqnum),
1241 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
1242
1243 r = rename(old_file->path, p);
1244 free(p);
1245
1246 if (r < 0)
1247 return -errno;
1248
1249 old_file->header->state = le32toh(STATE_ARCHIVED);
1250
1251 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, old_file, &new_file);
1252 journal_file_close(old_file);
1253
1254 *f = new_file;
1255 return r;
1256}
1257
1258struct vacuum_info {
1259 off_t usage;
1260 char *filename;
1261
1262 uint64_t realtime;
1263 sd_id128_t seqnum_id;
1264 uint64_t seqnum;
1265};
1266
1267static int vacuum_compare(const void *_a, const void *_b) {
1268 const struct vacuum_info *a, *b;
1269
1270 a = _a;
1271 b = _b;
1272
1273 if (sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
1274 if (a->seqnum < b->seqnum)
1275 return -1;
1276 else if (a->seqnum > b->seqnum)
1277 return 1;
1278 else
1279 return 0;
1280 }
1281
1282 if (a->realtime < b->realtime)
1283 return -1;
1284 else if (a->realtime > b->realtime)
1285 return 1;
1286 else
1287 return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
1288}
1289
1290int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
1291 DIR *d;
1292 int r = 0;
1293 struct vacuum_info *list = NULL;
1294 unsigned n_list = 0, n_allocated = 0, i;
1295 uint64_t sum = 0;
1296
1297 assert(directory);
1298
1299 if (max_use <= 0)
1300 max_use = DEFAULT_MAX_USE;
1301
1302 d = opendir(directory);
1303 if (!d)
1304 return -errno;
1305
1306 for (;;) {
1307 int k;
1308 struct dirent buf, *de;
1309 size_t q;
1310 struct stat st;
1311 char *p;
1312 unsigned long long seqnum, realtime;
1313 sd_id128_t seqnum_id;
1314
1315 k = readdir_r(d, &buf, &de);
1316 if (k != 0) {
1317 r = -k;
1318 goto finish;
1319 }
1320
1321 if (!de)
1322 break;
1323
1324 if (!dirent_is_file_with_suffix(de, ".journal"))
1325 continue;
1326
1327 q = strlen(de->d_name);
1328
1329 if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
1330 continue;
1331
1332 if (de->d_name[q-8-16-1] != '-' ||
1333 de->d_name[q-8-16-1-16-1] != '-' ||
1334 de->d_name[q-8-16-1-16-1-32-1] != '@')
1335 continue;
1336
1337 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
1338 continue;
1339
1340 if (!S_ISREG(st.st_mode))
1341 continue;
1342
1343 p = strdup(de->d_name);
1344 if (!p) {
1345 r = -ENOMEM;
1346 goto finish;
1347 }
1348
1349 de->d_name[q-8-16-1-16-1] = 0;
1350 if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
1351 free(p);
1352 continue;
1353 }
1354
1355 if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
1356 free(p);
1357 continue;
1358 }
1359
1360 if (n_list >= n_allocated) {
1361 struct vacuum_info *j;
1362
1363 n_allocated = MAX(n_allocated * 2U, 8U);
1364 j = realloc(list, n_allocated * sizeof(struct vacuum_info));
1365 if (!j) {
1366 free(p);
1367 r = -ENOMEM;
1368 goto finish;
1369 }
1370
1371 list = j;
1372 }
1373
1374 list[n_list].filename = p;
1375 list[n_list].usage = (uint64_t) st.st_blksize * (uint64_t) st.st_blocks;
1376 list[n_list].seqnum = seqnum;
1377 list[n_list].realtime = realtime;
1378 list[n_list].seqnum_id = seqnum_id;
1379
1380 sum += list[n_list].usage;
1381
1382 n_list ++;
1383 }
1384
1385 qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
1386
1387 for(i = 0; i < n_list; i++) {
1388 struct statvfs ss;
1389
1390 if (fstatvfs(dirfd(d), &ss) < 0) {
1391 r = -errno;
1392 goto finish;
1393 }
1394
1395 if (sum <= max_use &&
1396 (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
1397 break;
1398
1399 if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
1400 log_debug("Deleted archived journal %s/%s.", directory, list[i].filename);
1401 sum -= list[i].usage;
1402 } else if (errno != ENOENT)
1403 log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
1404 }
1405
1406finish:
1407 for (i = 0; i < n_list; i++)
1408 free(list[i].filename);
1409
1410 free(list);
1411
1412 return r;
1413}