]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
journal: fix bad memory access
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
30#include "journal-def.h"
31#include "journal-file.h"
32#include "lookup3.h"
807e17f0 33#include "compress.h"
cec736d2 34
de190aef
LP
35#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*16ULL)
36#define DEFAULT_FIELD_HASH_TABLE_SIZE (2047ULL*16ULL)
cec736d2 37
1fa80181 38#define DEFAULT_WINDOW_SIZE (8ULL*1024ULL*1024ULL)
cec736d2 39
be19b7df 40#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 41
babfc091 42/* This is the minimum journal file size */
b47ffcfd 43#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
babfc091
LP
44
45/* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
49
50/* This is the upper bound if we deduce max_size from max_use */
71100051 51#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
52
53/* This is the upper bound if we deduce the keep_free value from the
54 * file system size */
55#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57/* This is the keep_free value when we can't determine the system
58 * size */
59#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
60
cec736d2
LP
61static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' };
62
63#define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
64
65void journal_file_close(JournalFile *f) {
de190aef 66 int t;
cec736d2 67
de190aef 68 assert(f);
cec736d2 69
d384c7a8
MS
70 if (f->header) {
71 if (f->writable)
72 f->header->state = STATE_OFFLINE;
cec736d2 73
d384c7a8
MS
74 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
75 }
cec736d2 76
de190aef
LP
77 for (t = 0; t < _WINDOW_MAX; t++)
78 if (f->windows[t].ptr)
79 munmap(f->windows[t].ptr, f->windows[t].size);
cec736d2 80
0ac38b70
LP
81 if (f->fd >= 0)
82 close_nointr_nofail(f->fd);
83
cec736d2 84 free(f->path);
807e17f0
LP
85
86#ifdef HAVE_XZ
87 free(f->compress_buffer);
88#endif
89
cec736d2
LP
90 free(f);
91}
92
0ac38b70 93static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
94 Header h;
95 ssize_t k;
96 int r;
97
98 assert(f);
99
100 zero(h);
101 memcpy(h.signature, signature, 8);
23b0b2b2 102 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2
LP
103
104 r = sd_id128_randomize(&h.file_id);
105 if (r < 0)
106 return r;
107
0ac38b70
LP
108 if (template) {
109 h.seqnum_id = template->header->seqnum_id;
110 h.seqnum = template->header->seqnum;
111 } else
112 h.seqnum_id = h.file_id;
cec736d2
LP
113
114 k = pwrite(f->fd, &h, sizeof(h), 0);
115 if (k < 0)
116 return -errno;
117
118 if (k != sizeof(h))
119 return -EIO;
120
121 return 0;
122}
123
124static int journal_file_refresh_header(JournalFile *f) {
125 int r;
de190aef 126 sd_id128_t boot_id;
cec736d2
LP
127
128 assert(f);
129
130 r = sd_id128_get_machine(&f->header->machine_id);
131 if (r < 0)
132 return r;
133
de190aef 134 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
135 if (r < 0)
136 return r;
137
de190aef
LP
138 if (sd_id128_equal(boot_id, f->header->boot_id))
139 f->tail_entry_monotonic_valid = true;
140
141 f->header->boot_id = boot_id;
142
143 f->header->state = STATE_ONLINE;
b788cc23
LP
144
145 __sync_synchronize();
146
cec736d2
LP
147 return 0;
148}
149
150static int journal_file_verify_header(JournalFile *f) {
151 assert(f);
152
153 if (memcmp(f->header, signature, 8))
154 return -EBADMSG;
155
807e17f0
LP
156#ifdef HAVE_XZ
157 if ((le64toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
158 return -EPROTONOSUPPORT;
159#else
cec736d2
LP
160 if (f->header->incompatible_flags != 0)
161 return -EPROTONOSUPPORT;
807e17f0 162#endif
cec736d2 163
23b0b2b2
LP
164 if (f->header->header_size != htole64(ALIGN64(sizeof(*(f->header)))))
165 return -EBADMSG;
166
167 if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
cec736d2
LP
168 return -ENODATA;
169
170 if (f->writable) {
ccdbaf91 171 uint8_t state;
cec736d2
LP
172 sd_id128_t machine_id;
173 int r;
174
175 r = sd_id128_get_machine(&machine_id);
176 if (r < 0)
177 return r;
178
179 if (!sd_id128_equal(machine_id, f->header->machine_id))
180 return -EHOSTDOWN;
181
de190aef 182 state = f->header->state;
cec736d2
LP
183
184 if (state == STATE_ONLINE)
185 log_debug("Journal file %s is already online. Assuming unclean closing. Ignoring.", f->path);
08984293 186 /* FIXME: immediately rotate */
cec736d2
LP
187 else if (state == STATE_ARCHIVED)
188 return -ESHUTDOWN;
189 else if (state != STATE_OFFLINE)
190 log_debug("Journal file %s has unknown state %u. Ignoring.", f->path, state);
191 }
192
193 return 0;
194}
195
196static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
cec736d2 197 uint64_t old_size, new_size;
fec2aa2f 198 int r;
cec736d2
LP
199
200 assert(f);
201
cec736d2 202 /* We assume that this file is not sparse, and we know that
38ac38b2 203 * for sure, since we always call posix_fallocate()
cec736d2
LP
204 * ourselves */
205
206 old_size =
23b0b2b2 207 le64toh(f->header->header_size) +
cec736d2
LP
208 le64toh(f->header->arena_size);
209
bc85bfee 210 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
211 if (new_size < le64toh(f->header->header_size))
212 new_size = le64toh(f->header->header_size);
bc85bfee
LP
213
214 if (new_size <= old_size)
cec736d2
LP
215 return 0;
216
bc85bfee
LP
217 if (f->metrics.max_size > 0 &&
218 new_size > f->metrics.max_size)
219 return -E2BIG;
cec736d2 220
bc85bfee
LP
221 if (new_size > f->metrics.min_size &&
222 f->metrics.keep_free > 0) {
cec736d2
LP
223 struct statvfs svfs;
224
225 if (fstatvfs(f->fd, &svfs) >= 0) {
226 uint64_t available;
227
228 available = svfs.f_bfree * svfs.f_bsize;
229
bc85bfee
LP
230 if (available >= f->metrics.keep_free)
231 available -= f->metrics.keep_free;
cec736d2
LP
232 else
233 available = 0;
234
235 if (new_size - old_size > available)
236 return -E2BIG;
237 }
238 }
239
bc85bfee
LP
240 /* Note that the glibc fallocate() fallback is very
241 inefficient, hence we try to minimize the allocation area
242 as we can. */
fec2aa2f
GV
243 r = posix_fallocate(f->fd, old_size, new_size - old_size);
244 if (r != 0)
245 return -r;
cec736d2
LP
246
247 if (fstat(f->fd, &f->last_stat) < 0)
248 return -errno;
249
23b0b2b2 250 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
251
252 return 0;
253}
254
255static int journal_file_map(
256 JournalFile *f,
257 uint64_t offset,
258 uint64_t size,
259 void **_window,
260 uint64_t *_woffset,
261 uint64_t *_wsize,
262 void **ret) {
263
264 uint64_t woffset, wsize;
265 void *window;
266
267 assert(f);
268 assert(size > 0);
269 assert(ret);
270
271 woffset = offset & ~((uint64_t) page_size() - 1ULL);
272 wsize = size + (offset - woffset);
273 wsize = PAGE_ALIGN(wsize);
274
2a59ea54
LP
275 /* Avoid SIGBUS on invalid accesses */
276 if (woffset + wsize > (uint64_t) PAGE_ALIGN(f->last_stat.st_size))
277 return -EADDRNOTAVAIL;
278
cec736d2
LP
279 window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset);
280 if (window == MAP_FAILED)
281 return -errno;
282
283 if (_window)
284 *_window = window;
285
286 if (_woffset)
287 *_woffset = woffset;
288
289 if (_wsize)
290 *_wsize = wsize;
291
292 *ret = (uint8_t*) window + (offset - woffset);
293
294 return 0;
295}
296
de190aef 297static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_t size, void **ret) {
6c8a39b8 298 void *p = NULL;
cec736d2
LP
299 uint64_t delta;
300 int r;
de190aef 301 Window *w;
cec736d2
LP
302
303 assert(f);
304 assert(ret);
de190aef
LP
305 assert(wt >= 0);
306 assert(wt < _WINDOW_MAX);
cec736d2 307
4bbdcdb3
LP
308 if (offset + size > (uint64_t) f->last_stat.st_size) {
309 /* Hmm, out of range? Let's refresh the fstat() data
310 * first, before we trust that check. */
311
312 if (fstat(f->fd, &f->last_stat) < 0 ||
313 offset + size > (uint64_t) f->last_stat.st_size)
314 return -EADDRNOTAVAIL;
315 }
316
de190aef 317 w = f->windows + wt;
cec736d2 318
de190aef
LP
319 if (_likely_(w->ptr &&
320 w->offset <= offset &&
321 w->offset + w->size >= offset + size)) {
322
323 *ret = (uint8_t*) w->ptr + (offset - w->offset);
cec736d2
LP
324 return 0;
325 }
326
de190aef
LP
327 if (w->ptr) {
328 if (munmap(w->ptr, w->size) < 0)
cec736d2
LP
329 return -errno;
330
de190aef
LP
331 w->ptr = NULL;
332 w->size = w->offset = 0;
cec736d2
LP
333 }
334
335 if (size < DEFAULT_WINDOW_SIZE) {
336 /* If the default window size is larger then what was
337 * asked for extend the mapping a bit in the hope to
338 * minimize needed remappings later on. We add half
339 * the window space before and half behind the
340 * requested mapping */
341
1921a5cb 342 delta = (DEFAULT_WINDOW_SIZE - size) / 2;
cec736d2 343
a99c349d 344 if (delta > offset)
cec736d2
LP
345 delta = offset;
346
347 offset -= delta;
a99c349d 348 size = DEFAULT_WINDOW_SIZE;
cec736d2
LP
349 } else
350 delta = 0;
351
2a59ea54 352 if (offset + size > (uint64_t) f->last_stat.st_size)
1921a5cb 353 size = (uint64_t) f->last_stat.st_size - offset;
2a59ea54
LP
354
355 if (size <= 0)
356 return -EADDRNOTAVAIL;
357
cec736d2
LP
358 r = journal_file_map(f,
359 offset, size,
de190aef
LP
360 &w->ptr, &w->offset, &w->size,
361 &p);
cec736d2
LP
362
363 if (r < 0)
364 return r;
365
366 *ret = (uint8_t*) p + delta;
367 return 0;
368}
369
370static bool verify_hash(Object *o) {
de190aef 371 uint64_t h1, h2;
cec736d2
LP
372
373 assert(o);
374
807e17f0 375 if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
cec736d2 376 h1 = le64toh(o->data.hash);
de190aef
LP
377 h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
378 } else if (o->object.type == OBJECT_FIELD) {
379 h1 = le64toh(o->field.hash);
380 h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
381 } else
382 return true;
cec736d2 383
de190aef 384 return h1 == h2;
cec736d2
LP
385}
386
de190aef 387int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
388 int r;
389 void *t;
390 Object *o;
391 uint64_t s;
392
393 assert(f);
394 assert(ret);
de190aef 395 assert(type < _OBJECT_TYPE_MAX);
cec736d2 396
de190aef 397 r = journal_file_move_to(f, type >= 0 ? type : WINDOW_UNKNOWN, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
398 if (r < 0)
399 return r;
400
401 o = (Object*) t;
402 s = le64toh(o->object.size);
403
404 if (s < sizeof(ObjectHeader))
405 return -EBADMSG;
406
de190aef 407 if (type >= 0 && o->object.type != type)
cec736d2
LP
408 return -EBADMSG;
409
410 if (s > sizeof(ObjectHeader)) {
de190aef 411 r = journal_file_move_to(f, o->object.type, offset, s, &t);
cec736d2
LP
412 if (r < 0)
413 return r;
414
415 o = (Object*) t;
416 }
417
418 if (!verify_hash(o))
419 return -EBADMSG;
420
421 *ret = o;
422 return 0;
423}
424
c2373f84 425static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
426 uint64_t r;
427
428 assert(f);
429
430 r = le64toh(f->header->seqnum) + 1;
c2373f84
LP
431
432 if (seqnum) {
de190aef 433 /* If an external seqnum counter was passed, we update
c2373f84
LP
434 * both the local and the external one, and set it to
435 * the maximum of both */
436
437 if (*seqnum + 1 > r)
438 r = *seqnum + 1;
439
440 *seqnum = r;
441 }
442
cec736d2
LP
443 f->header->seqnum = htole64(r);
444
de190aef
LP
445 if (f->header->first_seqnum == 0)
446 f->header->first_seqnum = htole64(r);
447
cec736d2
LP
448 return r;
449}
450
de190aef 451static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
452 int r;
453 uint64_t p;
454 Object *tail, *o;
455 void *t;
456
457 assert(f);
458 assert(size >= sizeof(ObjectHeader));
459 assert(offset);
460 assert(ret);
461
462 p = le64toh(f->header->tail_object_offset);
cec736d2 463 if (p == 0)
23b0b2b2 464 p = le64toh(f->header->header_size);
cec736d2 465 else {
de190aef 466 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
467 if (r < 0)
468 return r;
469
470 p += ALIGN64(le64toh(tail->object.size));
471 }
472
473 r = journal_file_allocate(f, p, size);
474 if (r < 0)
475 return r;
476
de190aef 477 r = journal_file_move_to(f, type, p, size, &t);
cec736d2
LP
478 if (r < 0)
479 return r;
480
481 o = (Object*) t;
482
483 zero(o->object);
de190aef 484 o->object.type = type;
cec736d2
LP
485 o->object.size = htole64(size);
486
487 f->header->tail_object_offset = htole64(p);
cec736d2
LP
488 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
489
490 *ret = o;
491 *offset = p;
492
493 return 0;
494}
495
de190aef 496static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
497 uint64_t s, p;
498 Object *o;
499 int r;
500
501 assert(f);
502
de190aef
LP
503 s = DEFAULT_DATA_HASH_TABLE_SIZE;
504 r = journal_file_append_object(f,
505 OBJECT_DATA_HASH_TABLE,
506 offsetof(Object, hash_table.items) + s,
507 &o, &p);
cec736d2
LP
508 if (r < 0)
509 return r;
510
de190aef 511 memset(o->hash_table.items, 0, s);
cec736d2 512
de190aef
LP
513 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
514 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
515
516 return 0;
517}
518
de190aef 519static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
520 uint64_t s, p;
521 Object *o;
522 int r;
523
524 assert(f);
525
de190aef
LP
526 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
527 r = journal_file_append_object(f,
528 OBJECT_FIELD_HASH_TABLE,
529 offsetof(Object, hash_table.items) + s,
530 &o, &p);
cec736d2
LP
531 if (r < 0)
532 return r;
533
de190aef 534 memset(o->hash_table.items, 0, s);
cec736d2 535
de190aef
LP
536 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
537 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
538
539 return 0;
540}
541
de190aef 542static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
543 uint64_t s, p;
544 void *t;
545 int r;
546
547 assert(f);
548
de190aef
LP
549 p = le64toh(f->header->data_hash_table_offset);
550 s = le64toh(f->header->data_hash_table_size);
cec736d2 551
de190aef
LP
552 r = journal_file_move_to(f,
553 WINDOW_DATA_HASH_TABLE,
554 p, s,
555 &t);
cec736d2
LP
556 if (r < 0)
557 return r;
558
de190aef 559 f->data_hash_table = t;
cec736d2
LP
560 return 0;
561}
562
de190aef 563static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
564 uint64_t s, p;
565 void *t;
566 int r;
567
568 assert(f);
569
de190aef
LP
570 p = le64toh(f->header->field_hash_table_offset);
571 s = le64toh(f->header->field_hash_table_size);
cec736d2 572
de190aef
LP
573 r = journal_file_move_to(f,
574 WINDOW_FIELD_HASH_TABLE,
575 p, s,
576 &t);
cec736d2
LP
577 if (r < 0)
578 return r;
579
de190aef 580 f->field_hash_table = t;
cec736d2
LP
581 return 0;
582}
583
de190aef
LP
584static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
585 uint64_t p, h;
cec736d2
LP
586 int r;
587
588 assert(f);
589 assert(o);
590 assert(offset > 0);
de190aef 591 assert(o->object.type == OBJECT_DATA);
cec736d2 592
48496df6
LP
593 /* This might alter the window we are looking at */
594
de190aef
LP
595 o->data.next_hash_offset = o->data.next_field_offset = 0;
596 o->data.entry_offset = o->data.entry_array_offset = 0;
597 o->data.n_entries = 0;
cec736d2 598
de190aef 599 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
8db4213e 600 p = le64toh(f->data_hash_table[h].tail_hash_offset);
cec736d2
LP
601 if (p == 0) {
602 /* Only entry in the hash table is easy */
de190aef 603 f->data_hash_table[h].head_hash_offset = htole64(offset);
cec736d2 604 } else {
48496df6
LP
605 /* Move back to the previous data object, to patch in
606 * pointer */
cec736d2 607
de190aef 608 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
609 if (r < 0)
610 return r;
611
de190aef 612 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
613 }
614
de190aef 615 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2
LP
616
617 return 0;
618}
619
de190aef
LP
620int journal_file_find_data_object_with_hash(
621 JournalFile *f,
622 const void *data, uint64_t size, uint64_t hash,
623 Object **ret, uint64_t *offset) {
48496df6 624
de190aef 625 uint64_t p, osize, h;
cec736d2
LP
626 int r;
627
628 assert(f);
629 assert(data || size == 0);
630
631 osize = offsetof(Object, data.payload) + size;
632
bc85bfee
LP
633 if (f->header->data_hash_table_size == 0)
634 return -EBADMSG;
635
de190aef
LP
636 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
637 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 638
de190aef
LP
639 while (p > 0) {
640 Object *o;
cec736d2 641
de190aef 642 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
643 if (r < 0)
644 return r;
645
807e17f0 646 if (le64toh(o->data.hash) != hash)
85a131e8 647 goto next;
807e17f0
LP
648
649 if (o->object.flags & OBJECT_COMPRESSED) {
650#ifdef HAVE_XZ
b785c858 651 uint64_t l, rsize;
cec736d2 652
807e17f0
LP
653 l = le64toh(o->object.size);
654 if (l <= offsetof(Object, data.payload))
cec736d2
LP
655 return -EBADMSG;
656
807e17f0
LP
657 l -= offsetof(Object, data.payload);
658
659 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
660 return -EBADMSG;
661
b785c858 662 if (rsize == size &&
807e17f0
LP
663 memcmp(f->compress_buffer, data, size) == 0) {
664
665 if (ret)
666 *ret = o;
667
668 if (offset)
669 *offset = p;
670
671 return 1;
672 }
673#else
674 return -EPROTONOSUPPORT;
675#endif
676
677 } else if (le64toh(o->object.size) == osize &&
678 memcmp(o->data.payload, data, size) == 0) {
679
cec736d2
LP
680 if (ret)
681 *ret = o;
682
683 if (offset)
684 *offset = p;
685
de190aef 686 return 1;
cec736d2
LP
687 }
688
85a131e8 689 next:
cec736d2
LP
690 p = le64toh(o->data.next_hash_offset);
691 }
692
de190aef
LP
693 return 0;
694}
695
696int journal_file_find_data_object(
697 JournalFile *f,
698 const void *data, uint64_t size,
699 Object **ret, uint64_t *offset) {
700
701 uint64_t hash;
702
703 assert(f);
704 assert(data || size == 0);
705
706 hash = hash64(data, size);
707
708 return journal_file_find_data_object_with_hash(f,
709 data, size, hash,
710 ret, offset);
711}
712
48496df6
LP
713static int journal_file_append_data(
714 JournalFile *f,
715 const void *data, uint64_t size,
716 Object **ret, uint64_t *offset) {
717
de190aef
LP
718 uint64_t hash, p;
719 uint64_t osize;
720 Object *o;
721 int r;
807e17f0 722 bool compressed = false;
de190aef
LP
723
724 assert(f);
725 assert(data || size == 0);
726
727 hash = hash64(data, size);
728
729 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
730 if (r < 0)
731 return r;
732 else if (r > 0) {
733
734 if (ret)
735 *ret = o;
736
737 if (offset)
738 *offset = p;
739
740 return 0;
741 }
742
743 osize = offsetof(Object, data.payload) + size;
744 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
745 if (r < 0)
746 return r;
747
cec736d2 748 o->data.hash = htole64(hash);
807e17f0
LP
749
750#ifdef HAVE_XZ
751 if (f->compress &&
752 size >= COMPRESSION_SIZE_THRESHOLD) {
753 uint64_t rsize;
754
755 compressed = compress_blob(data, size, o->data.payload, &rsize);
756
757 if (compressed) {
758 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
759 o->object.flags |= OBJECT_COMPRESSED;
760
761 f->header->incompatible_flags = htole32(le32toh(f->header->incompatible_flags) | HEADER_INCOMPATIBLE_COMPRESSED);
762
763 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
764 }
765 }
766#endif
767
768 if (!compressed)
769 memcpy(o->data.payload, data, size);
cec736d2 770
de190aef 771 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
772 if (r < 0)
773 return r;
774
48496df6
LP
775 /* The linking might have altered the window, so let's
776 * refresh our pointer */
777 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
778 if (r < 0)
779 return r;
780
cec736d2
LP
781 if (ret)
782 *ret = o;
783
784 if (offset)
de190aef 785 *offset = p;
cec736d2
LP
786
787 return 0;
788}
789
790uint64_t journal_file_entry_n_items(Object *o) {
791 assert(o);
7be3aa17 792 assert(o->object.type == OBJECT_ENTRY);
cec736d2
LP
793
794 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
795}
796
de190aef
LP
797static uint64_t journal_file_entry_array_n_items(Object *o) {
798 assert(o);
7be3aa17 799 assert(o->object.type == OBJECT_ENTRY_ARRAY);
de190aef
LP
800
801 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
802}
803
804static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
805 le64_t *first,
806 le64_t *idx,
de190aef 807 uint64_t p) {
cec736d2 808 int r;
de190aef
LP
809 uint64_t n = 0, ap = 0, q, i, a, hidx;
810 Object *o;
811
cec736d2 812 assert(f);
de190aef
LP
813 assert(first);
814 assert(idx);
815 assert(p > 0);
cec736d2 816
de190aef
LP
817 a = le64toh(*first);
818 i = hidx = le64toh(*idx);
819 while (a > 0) {
820
821 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
822 if (r < 0)
823 return r;
cec736d2 824
de190aef
LP
825 n = journal_file_entry_array_n_items(o);
826 if (i < n) {
827 o->entry_array.items[i] = htole64(p);
828 *idx = htole64(hidx + 1);
829 return 0;
830 }
cec736d2 831
de190aef
LP
832 i -= n;
833 ap = a;
834 a = le64toh(o->entry_array.next_entry_array_offset);
835 }
836
837 if (hidx > n)
838 n = (hidx+1) * 2;
839 else
840 n = n * 2;
841
842 if (n < 4)
843 n = 4;
844
845 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
846 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
847 &o, &q);
cec736d2
LP
848 if (r < 0)
849 return r;
850
de190aef 851 o->entry_array.items[i] = htole64(p);
cec736d2 852
de190aef 853 if (ap == 0)
7be3aa17 854 *first = htole64(q);
cec736d2 855 else {
de190aef 856 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
857 if (r < 0)
858 return r;
859
de190aef
LP
860 o->entry_array.next_entry_array_offset = htole64(q);
861 }
cec736d2 862
de190aef
LP
863 *idx = htole64(hidx + 1);
864
865 return 0;
866}
cec736d2 867
de190aef 868static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
869 le64_t *extra,
870 le64_t *first,
871 le64_t *idx,
de190aef
LP
872 uint64_t p) {
873
874 int r;
875
876 assert(f);
877 assert(extra);
878 assert(first);
879 assert(idx);
880 assert(p > 0);
881
882 if (*idx == 0)
883 *extra = htole64(p);
884 else {
4fd052ae 885 le64_t i;
de190aef 886
7be3aa17 887 i = htole64(le64toh(*idx) - 1);
de190aef
LP
888 r = link_entry_into_array(f, first, &i, p);
889 if (r < 0)
890 return r;
cec736d2
LP
891 }
892
de190aef
LP
893 *idx = htole64(le64toh(*idx) + 1);
894 return 0;
895}
896
897static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
898 uint64_t p;
899 int r;
900 assert(f);
901 assert(o);
902 assert(offset > 0);
903
904 p = le64toh(o->entry.items[i].object_offset);
905 if (p == 0)
906 return -EINVAL;
907
908 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
909 if (r < 0)
910 return r;
911
de190aef
LP
912 return link_entry_into_array_plus_one(f,
913 &o->data.entry_offset,
914 &o->data.entry_array_offset,
915 &o->data.n_entries,
916 offset);
cec736d2
LP
917}
918
919static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 920 uint64_t n, i;
cec736d2
LP
921 int r;
922
923 assert(f);
924 assert(o);
925 assert(offset > 0);
de190aef 926 assert(o->object.type == OBJECT_ENTRY);
cec736d2 927
b788cc23
LP
928 __sync_synchronize();
929
cec736d2 930 /* Link up the entry itself */
de190aef
LP
931 r = link_entry_into_array(f,
932 &f->header->entry_array_offset,
933 &f->header->n_entries,
934 offset);
935 if (r < 0)
936 return r;
cec736d2 937
aaf53376 938 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
cec736d2 939
de190aef 940 if (f->header->head_entry_realtime == 0)
0ac38b70 941 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 942
0ac38b70 943 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
944 f->header->tail_entry_monotonic = o->entry.monotonic;
945
946 f->tail_entry_monotonic_valid = true;
cec736d2
LP
947
948 /* Link up the items */
949 n = journal_file_entry_n_items(o);
950 for (i = 0; i < n; i++) {
951 r = journal_file_link_entry_item(f, o, offset, i);
952 if (r < 0)
953 return r;
954 }
955
cec736d2
LP
956 return 0;
957}
958
959static int journal_file_append_entry_internal(
960 JournalFile *f,
961 const dual_timestamp *ts,
962 uint64_t xor_hash,
963 const EntryItem items[], unsigned n_items,
de190aef 964 uint64_t *seqnum,
cec736d2
LP
965 Object **ret, uint64_t *offset) {
966 uint64_t np;
967 uint64_t osize;
968 Object *o;
969 int r;
970
971 assert(f);
972 assert(items || n_items == 0);
de190aef 973 assert(ts);
cec736d2
LP
974
975 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
976
de190aef 977 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
978 if (r < 0)
979 return r;
980
de190aef 981 o->entry.seqnum = htole64(journal_file_seqnum(f, seqnum));
cec736d2 982 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
983 o->entry.realtime = htole64(ts->realtime);
984 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
985 o->entry.xor_hash = htole64(xor_hash);
986 o->entry.boot_id = f->header->boot_id;
987
988 r = journal_file_link_entry(f, o, np);
989 if (r < 0)
990 return r;
991
992 if (ret)
993 *ret = o;
994
995 if (offset)
996 *offset = np;
997
998 return 0;
999}
1000
cf244689 1001void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
1002 assert(f);
1003
1004 /* inotify() does not receive IN_MODIFY events from file
1005 * accesses done via mmap(). After each access we hence
1006 * trigger IN_MODIFY by truncating the journal file to its
1007 * current size which triggers IN_MODIFY. */
1008
bc85bfee
LP
1009 __sync_synchronize();
1010
50f20cfd
LP
1011 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1012 log_error("Failed to to truncate file to its own size: %m");
1013}
1014
de190aef 1015int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1016 unsigned i;
1017 EntryItem *items;
1018 int r;
1019 uint64_t xor_hash = 0;
de190aef 1020 struct dual_timestamp _ts;
cec736d2
LP
1021
1022 assert(f);
1023 assert(iovec || n_iovec == 0);
1024
de190aef
LP
1025 if (!f->writable)
1026 return -EPERM;
1027
1028 if (!ts) {
1029 dual_timestamp_get(&_ts);
1030 ts = &_ts;
1031 }
1032
1033 if (f->tail_entry_monotonic_valid &&
1034 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1035 return -EINVAL;
1036
cf244689 1037 items = alloca(sizeof(EntryItem) * n_iovec);
cec736d2
LP
1038
1039 for (i = 0; i < n_iovec; i++) {
1040 uint64_t p;
1041 Object *o;
1042
1043 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1044 if (r < 0)
cf244689 1045 return r;
cec736d2
LP
1046
1047 xor_hash ^= le64toh(o->data.hash);
1048 items[i].object_offset = htole64(p);
de7b95cd 1049 items[i].hash = o->data.hash;
cec736d2
LP
1050 }
1051
de190aef 1052 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1053
50f20cfd
LP
1054 journal_file_post_change(f);
1055
cec736d2
LP
1056 return r;
1057}
1058
de190aef
LP
1059static int generic_array_get(JournalFile *f,
1060 uint64_t first,
1061 uint64_t i,
1062 Object **ret, uint64_t *offset) {
1063
cec736d2 1064 Object *o;
6c8a39b8 1065 uint64_t p = 0, a;
cec736d2
LP
1066 int r;
1067
1068 assert(f);
1069
de190aef
LP
1070 a = first;
1071 while (a > 0) {
1072 uint64_t n;
cec736d2 1073
de190aef
LP
1074 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1075 if (r < 0)
1076 return r;
cec736d2 1077
de190aef
LP
1078 n = journal_file_entry_array_n_items(o);
1079 if (i < n) {
1080 p = le64toh(o->entry_array.items[i]);
1081 break;
cec736d2
LP
1082 }
1083
de190aef
LP
1084 i -= n;
1085 a = le64toh(o->entry_array.next_entry_array_offset);
1086 }
1087
1088 if (a <= 0 || p <= 0)
1089 return 0;
1090
1091 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1092 if (r < 0)
1093 return r;
1094
1095 if (ret)
1096 *ret = o;
1097
1098 if (offset)
1099 *offset = p;
1100
1101 return 1;
1102}
1103
1104static int generic_array_get_plus_one(JournalFile *f,
1105 uint64_t extra,
1106 uint64_t first,
1107 uint64_t i,
1108 Object **ret, uint64_t *offset) {
1109
1110 Object *o;
1111
1112 assert(f);
1113
1114 if (i == 0) {
1115 int r;
1116
1117 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1118 if (r < 0)
1119 return r;
1120
de190aef
LP
1121 if (ret)
1122 *ret = o;
cec736d2 1123
de190aef
LP
1124 if (offset)
1125 *offset = extra;
cec736d2 1126
de190aef 1127 return 1;
cec736d2
LP
1128 }
1129
de190aef
LP
1130 return generic_array_get(f, first, i-1, ret, offset);
1131}
cec736d2 1132
de190aef
LP
1133enum {
1134 TEST_FOUND,
1135 TEST_LEFT,
1136 TEST_RIGHT
1137};
cec736d2 1138
de190aef
LP
1139static int generic_array_bisect(JournalFile *f,
1140 uint64_t first,
1141 uint64_t n,
1142 uint64_t needle,
1143 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1144 direction_t direction,
1145 Object **ret,
1146 uint64_t *offset,
1147 uint64_t *idx) {
1148
1149 uint64_t a, p, t = 0, i = 0, last_p = 0;
1150 bool subtract_one = false;
1151 Object *o, *array = NULL;
1152 int r;
cec736d2 1153
de190aef
LP
1154 assert(f);
1155 assert(test_object);
cec736d2 1156
de190aef
LP
1157 a = first;
1158 while (a > 0) {
1159 uint64_t left, right, k, lp;
1160
1161 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1162 if (r < 0)
1163 return r;
1164
de190aef
LP
1165 k = journal_file_entry_array_n_items(array);
1166 right = MIN(k, n);
1167 if (right <= 0)
1168 return 0;
cec736d2 1169
de190aef
LP
1170 i = right - 1;
1171 lp = p = le64toh(array->entry_array.items[i]);
1172 if (p <= 0)
1173 return -EBADMSG;
cec736d2 1174
de190aef
LP
1175 r = test_object(f, p, needle);
1176 if (r < 0)
1177 return r;
cec736d2 1178
de190aef
LP
1179 if (r == TEST_FOUND)
1180 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1181
1182 if (r == TEST_RIGHT) {
1183 left = 0;
1184 right -= 1;
1185 for (;;) {
1186 if (left == right) {
1187 if (direction == DIRECTION_UP)
1188 subtract_one = true;
1189
1190 i = left;
1191 goto found;
1192 }
1193
1194 assert(left < right);
1195
1196 i = (left + right) / 2;
1197 p = le64toh(array->entry_array.items[i]);
1198 if (p <= 0)
1199 return -EBADMSG;
1200
1201 r = test_object(f, p, needle);
1202 if (r < 0)
1203 return r;
cec736d2 1204
de190aef
LP
1205 if (r == TEST_FOUND)
1206 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1207
1208 if (r == TEST_RIGHT)
1209 right = i;
1210 else
1211 left = i + 1;
1212 }
1213 }
1214
1215 if (k > n)
cec736d2
LP
1216 return 0;
1217
de190aef
LP
1218 last_p = lp;
1219
1220 n -= k;
1221 t += k;
1222 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1223 }
1224
1225 return 0;
de190aef
LP
1226
1227found:
1228 if (subtract_one && t == 0 && i == 0)
1229 return 0;
1230
1231 if (subtract_one && i == 0)
1232 p = last_p;
1233 else if (subtract_one)
1234 p = le64toh(array->entry_array.items[i-1]);
1235 else
1236 p = le64toh(array->entry_array.items[i]);
1237
1238 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1239 if (r < 0)
1240 return r;
1241
1242 if (ret)
1243 *ret = o;
1244
1245 if (offset)
1246 *offset = p;
1247
1248 if (idx)
1249 *idx = t + i - (subtract_one ? 1 : 0);
1250
1251 return 1;
cec736d2
LP
1252}
1253
de190aef
LP
1254static int generic_array_bisect_plus_one(JournalFile *f,
1255 uint64_t extra,
1256 uint64_t first,
1257 uint64_t n,
1258 uint64_t needle,
1259 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1260 direction_t direction,
1261 Object **ret,
1262 uint64_t *offset,
1263 uint64_t *idx) {
1264
cec736d2
LP
1265 int r;
1266
1267 assert(f);
de190aef 1268 assert(test_object);
cec736d2 1269
de190aef
LP
1270 if (n <= 0)
1271 return 0;
cec736d2 1272
de190aef
LP
1273 /* This bisects the array in object 'first', but first checks
1274 * an extra */
de190aef
LP
1275 r = test_object(f, extra, needle);
1276 if (r < 0)
1277 return r;
a536e261
LP
1278
1279 if (r == TEST_FOUND)
1280 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1281
1282 if (r == TEST_RIGHT) {
de190aef
LP
1283 Object *o;
1284
1285 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1286 if (r < 0)
1287 return r;
1288
1289 if (ret)
1290 *ret = o;
cec736d2 1291
de190aef
LP
1292 if (offset)
1293 *offset = extra;
440ee366
LP
1294
1295 if (idx)
1296 *idx = 0;
1297
1298 return 1;
a536e261 1299 }
cec736d2 1300
de190aef
LP
1301 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1302
ecf68b1d 1303 if (r > 0 && idx)
de190aef
LP
1304 (*idx) ++;
1305
1306 return r;
1307}
1308
1309static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1310 Object *o;
1311 int r;
1312
1313 assert(f);
1314 assert(p > 0);
1315
1316 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1317 if (r < 0)
1318 return r;
1319
de190aef
LP
1320 if (le64toh(o->entry.seqnum) == needle)
1321 return TEST_FOUND;
1322 else if (le64toh(o->entry.seqnum) < needle)
1323 return TEST_LEFT;
1324 else
1325 return TEST_RIGHT;
1326}
cec736d2 1327
de190aef
LP
1328int journal_file_move_to_entry_by_seqnum(
1329 JournalFile *f,
1330 uint64_t seqnum,
1331 direction_t direction,
1332 Object **ret,
1333 uint64_t *offset) {
1334
1335 return generic_array_bisect(f,
1336 le64toh(f->header->entry_array_offset),
1337 le64toh(f->header->n_entries),
1338 seqnum,
1339 test_object_seqnum,
1340 direction,
1341 ret, offset, NULL);
1342}
cec736d2 1343
de190aef
LP
1344static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1345 Object *o;
1346 int r;
1347
1348 assert(f);
1349 assert(p > 0);
1350
1351 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1352 if (r < 0)
1353 return r;
1354
1355 if (le64toh(o->entry.realtime) == needle)
1356 return TEST_FOUND;
1357 else if (le64toh(o->entry.realtime) < needle)
1358 return TEST_LEFT;
1359 else
1360 return TEST_RIGHT;
cec736d2
LP
1361}
1362
de190aef
LP
1363int journal_file_move_to_entry_by_realtime(
1364 JournalFile *f,
1365 uint64_t realtime,
1366 direction_t direction,
1367 Object **ret,
1368 uint64_t *offset) {
1369
1370 return generic_array_bisect(f,
1371 le64toh(f->header->entry_array_offset),
1372 le64toh(f->header->n_entries),
1373 realtime,
1374 test_object_realtime,
1375 direction,
1376 ret, offset, NULL);
1377}
1378
1379static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1380 Object *o;
1381 int r;
1382
1383 assert(f);
1384 assert(p > 0);
1385
1386 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1387 if (r < 0)
1388 return r;
1389
1390 if (le64toh(o->entry.monotonic) == needle)
1391 return TEST_FOUND;
1392 else if (le64toh(o->entry.monotonic) < needle)
1393 return TEST_LEFT;
1394 else
1395 return TEST_RIGHT;
1396}
1397
1398int journal_file_move_to_entry_by_monotonic(
1399 JournalFile *f,
1400 sd_id128_t boot_id,
1401 uint64_t monotonic,
1402 direction_t direction,
1403 Object **ret,
1404 uint64_t *offset) {
1405
10b6f904 1406 char t[9+32+1] = "_BOOT_ID=";
de190aef
LP
1407 Object *o;
1408 int r;
1409
10b6f904 1410 sd_id128_to_string(boot_id, t + 9);
de190aef
LP
1411
1412 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1413 if (r < 0)
1414 return r;
1415 else if (r == 0)
1416 return -ENOENT;
1417
1418 return generic_array_bisect_plus_one(f,
1419 le64toh(o->data.entry_offset),
1420 le64toh(o->data.entry_array_offset),
1421 le64toh(o->data.n_entries),
1422 monotonic,
1423 test_object_monotonic,
1424 direction,
1425 ret, offset, NULL);
1426}
1427
1428static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1429 assert(f);
1430 assert(p > 0);
1431
1432 if (p == needle)
1433 return TEST_FOUND;
1434 else if (p < needle)
1435 return TEST_LEFT;
1436 else
1437 return TEST_RIGHT;
1438}
1439
1440int journal_file_next_entry(
1441 JournalFile *f,
1442 Object *o, uint64_t p,
1443 direction_t direction,
1444 Object **ret, uint64_t *offset) {
1445
1446 uint64_t i, n;
cec736d2
LP
1447 int r;
1448
1449 assert(f);
de190aef
LP
1450 assert(p > 0 || !o);
1451
1452 n = le64toh(f->header->n_entries);
1453 if (n <= 0)
1454 return 0;
cec736d2
LP
1455
1456 if (!o)
de190aef 1457 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1458 else {
de190aef 1459 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1460 return -EINVAL;
1461
de190aef
LP
1462 r = generic_array_bisect(f,
1463 le64toh(f->header->entry_array_offset),
1464 le64toh(f->header->n_entries),
1465 p,
1466 test_object_offset,
1467 DIRECTION_DOWN,
1468 NULL, NULL,
1469 &i);
1470 if (r <= 0)
1471 return r;
1472
1473 if (direction == DIRECTION_DOWN) {
1474 if (i >= n - 1)
1475 return 0;
1476
1477 i++;
1478 } else {
1479 if (i <= 0)
1480 return 0;
1481
1482 i--;
1483 }
cec736d2
LP
1484 }
1485
de190aef
LP
1486 /* And jump to it */
1487 return generic_array_get(f,
1488 le64toh(f->header->entry_array_offset),
1489 i,
1490 ret, offset);
1491}
cec736d2 1492
de190aef
LP
1493int journal_file_skip_entry(
1494 JournalFile *f,
1495 Object *o, uint64_t p,
1496 int64_t skip,
1497 Object **ret, uint64_t *offset) {
1498
1499 uint64_t i, n;
1500 int r;
1501
1502 assert(f);
1503 assert(o);
1504 assert(p > 0);
1505
1506 if (o->object.type != OBJECT_ENTRY)
1507 return -EINVAL;
1508
1509 r = generic_array_bisect(f,
1510 le64toh(f->header->entry_array_offset),
1511 le64toh(f->header->n_entries),
1512 p,
1513 test_object_offset,
1514 DIRECTION_DOWN,
1515 NULL, NULL,
1516 &i);
1517 if (r <= 0)
cec736d2
LP
1518 return r;
1519
de190aef
LP
1520 /* Calculate new index */
1521 if (skip < 0) {
1522 if ((uint64_t) -skip >= i)
1523 i = 0;
1524 else
1525 i = i - (uint64_t) -skip;
1526 } else
1527 i += (uint64_t) skip;
cec736d2 1528
de190aef
LP
1529 n = le64toh(f->header->n_entries);
1530 if (n <= 0)
1531 return -EBADMSG;
cec736d2 1532
de190aef
LP
1533 if (i >= n)
1534 i = n-1;
1535
1536 return generic_array_get(f,
1537 le64toh(f->header->entry_array_offset),
1538 i,
1539 ret, offset);
cec736d2
LP
1540}
1541
de190aef
LP
1542int journal_file_next_entry_for_data(
1543 JournalFile *f,
1544 Object *o, uint64_t p,
1545 uint64_t data_offset,
1546 direction_t direction,
1547 Object **ret, uint64_t *offset) {
1548
1549 uint64_t n, i;
cec736d2 1550 int r;
de190aef 1551 Object *d;
cec736d2
LP
1552
1553 assert(f);
de190aef 1554 assert(p > 0 || !o);
cec736d2 1555
de190aef 1556 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 1557 if (r < 0)
de190aef 1558 return r;
cec736d2 1559
de190aef
LP
1560 n = le64toh(d->data.n_entries);
1561 if (n <= 0)
1562 return n;
cec736d2 1563
de190aef
LP
1564 if (!o)
1565 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1566 else {
1567 if (o->object.type != OBJECT_ENTRY)
1568 return -EINVAL;
cec736d2 1569
de190aef
LP
1570 r = generic_array_bisect_plus_one(f,
1571 le64toh(d->data.entry_offset),
1572 le64toh(d->data.entry_array_offset),
1573 le64toh(d->data.n_entries),
1574 p,
1575 test_object_offset,
1576 DIRECTION_DOWN,
1577 NULL, NULL,
1578 &i);
1579
1580 if (r <= 0)
cec736d2
LP
1581 return r;
1582
de190aef
LP
1583 if (direction == DIRECTION_DOWN) {
1584 if (i >= n - 1)
1585 return 0;
cec736d2 1586
de190aef
LP
1587 i++;
1588 } else {
1589 if (i <= 0)
1590 return 0;
cec736d2 1591
de190aef
LP
1592 i--;
1593 }
cec736d2 1594
de190aef 1595 }
cec736d2 1596
de190aef
LP
1597 return generic_array_get_plus_one(f,
1598 le64toh(d->data.entry_offset),
1599 le64toh(d->data.entry_array_offset),
1600 i,
1601 ret, offset);
1602}
cec736d2 1603
de190aef
LP
1604int journal_file_move_to_entry_by_seqnum_for_data(
1605 JournalFile *f,
1606 uint64_t data_offset,
1607 uint64_t seqnum,
1608 direction_t direction,
1609 Object **ret, uint64_t *offset) {
cec736d2 1610
de190aef
LP
1611 Object *d;
1612 int r;
cec736d2 1613
de190aef
LP
1614 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1615 if (r <= 0)
1616 return r;
cec736d2 1617
de190aef
LP
1618 return generic_array_bisect_plus_one(f,
1619 le64toh(d->data.entry_offset),
1620 le64toh(d->data.entry_array_offset),
1621 le64toh(d->data.n_entries),
1622 seqnum,
1623 test_object_seqnum,
1624 direction,
1625 ret, offset, NULL);
1626}
cec736d2 1627
de190aef
LP
1628int journal_file_move_to_entry_by_realtime_for_data(
1629 JournalFile *f,
1630 uint64_t data_offset,
1631 uint64_t realtime,
1632 direction_t direction,
1633 Object **ret, uint64_t *offset) {
1634
1635 Object *d;
1636 int r;
1637
1638 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1639 if (r <= 0)
1640 return r;
1641
1642 return generic_array_bisect_plus_one(f,
1643 le64toh(d->data.entry_offset),
1644 le64toh(d->data.entry_array_offset),
1645 le64toh(d->data.n_entries),
1646 realtime,
1647 test_object_realtime,
1648 direction,
1649 ret, offset, NULL);
cec736d2
LP
1650}
1651
1652void journal_file_dump(JournalFile *f) {
1653 char a[33], b[33], c[33];
1654 Object *o;
1655 int r;
1656 uint64_t p;
1657
1658 assert(f);
1659
de190aef
LP
1660 printf("File Path: %s\n"
1661 "File ID: %s\n"
cec736d2
LP
1662 "Machine ID: %s\n"
1663 "Boot ID: %s\n"
de190aef
LP
1664 "Arena size: %llu\n"
1665 "Objects: %lu\n"
1666 "Entries: %lu\n",
1667 f->path,
cec736d2
LP
1668 sd_id128_to_string(f->header->file_id, a),
1669 sd_id128_to_string(f->header->machine_id, b),
1670 sd_id128_to_string(f->header->boot_id, c),
de190aef
LP
1671 (unsigned long long) le64toh(f->header->arena_size),
1672 (unsigned long) le64toh(f->header->n_objects),
1673 (unsigned long) le64toh(f->header->n_entries));
cec736d2 1674
23b0b2b2 1675 p = le64toh(f->header->header_size);
cec736d2 1676 while (p != 0) {
de190aef 1677 r = journal_file_move_to_object(f, -1, p, &o);
cec736d2
LP
1678 if (r < 0)
1679 goto fail;
1680
1681 switch (o->object.type) {
1682
1683 case OBJECT_UNUSED:
1684 printf("Type: OBJECT_UNUSED\n");
1685 break;
1686
1687 case OBJECT_DATA:
1688 printf("Type: OBJECT_DATA\n");
1689 break;
1690
1691 case OBJECT_ENTRY:
3fbf9cbb
LP
1692 printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
1693 (unsigned long long) le64toh(o->entry.seqnum),
1694 (unsigned long long) le64toh(o->entry.monotonic),
1695 (unsigned long long) le64toh(o->entry.realtime));
cec736d2
LP
1696 break;
1697
de190aef
LP
1698 case OBJECT_FIELD_HASH_TABLE:
1699 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
cec736d2
LP
1700 break;
1701
de190aef
LP
1702 case OBJECT_DATA_HASH_TABLE:
1703 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1704 break;
1705
1706 case OBJECT_ENTRY_ARRAY:
1707 printf("Type: OBJECT_ENTRY_ARRAY\n");
cec736d2 1708 break;
8144056f
LP
1709
1710 case OBJECT_SIGNATURE:
1711 printf("Type: OBJECT_SIGNATURE\n");
1712 break;
cec736d2
LP
1713 }
1714
807e17f0
LP
1715 if (o->object.flags & OBJECT_COMPRESSED)
1716 printf("Flags: COMPRESSED\n");
1717
cec736d2
LP
1718 if (p == le64toh(f->header->tail_object_offset))
1719 p = 0;
1720 else
1721 p = p + ALIGN64(le64toh(o->object.size));
1722 }
1723
1724 return;
1725fail:
1726 log_error("File corrupt");
1727}
1728
1729int journal_file_open(
1730 const char *fname,
1731 int flags,
1732 mode_t mode,
0ac38b70 1733 JournalFile *template,
cec736d2
LP
1734 JournalFile **ret) {
1735
1736 JournalFile *f;
1737 int r;
1738 bool newly_created = false;
1739
1740 assert(fname);
1741
1742 if ((flags & O_ACCMODE) != O_RDONLY &&
1743 (flags & O_ACCMODE) != O_RDWR)
1744 return -EINVAL;
1745
9447a7f1
LP
1746 if (!endswith(fname, ".journal"))
1747 return -EINVAL;
1748
cec736d2
LP
1749 f = new0(JournalFile, 1);
1750 if (!f)
1751 return -ENOMEM;
1752
0ac38b70
LP
1753 f->fd = -1;
1754 f->flags = flags;
1755 f->mode = mode;
cec736d2
LP
1756 f->writable = (flags & O_ACCMODE) != O_RDONLY;
1757 f->prot = prot_from_flags(flags);
1758
15944db8
LP
1759 if (template) {
1760 f->metrics = template->metrics;
1761 f->compress = template->compress;
1762 }
1763
cec736d2
LP
1764 f->path = strdup(fname);
1765 if (!f->path) {
1766 r = -ENOMEM;
1767 goto fail;
1768 }
1769
0ac38b70
LP
1770 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
1771 if (f->fd < 0) {
1772 r = -errno;
1773 goto fail;
1774 }
1775
cec736d2
LP
1776 if (fstat(f->fd, &f->last_stat) < 0) {
1777 r = -errno;
1778 goto fail;
1779 }
1780
1781 if (f->last_stat.st_size == 0 && f->writable) {
1782 newly_created = true;
1783
0ac38b70 1784 r = journal_file_init_header(f, template);
cec736d2
LP
1785 if (r < 0)
1786 goto fail;
1787
1788 if (fstat(f->fd, &f->last_stat) < 0) {
1789 r = -errno;
1790 goto fail;
1791 }
1792 }
1793
1794 if (f->last_stat.st_size < (off_t) sizeof(Header)) {
1795 r = -EIO;
1796 goto fail;
1797 }
1798
1799 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
1800 if (f->header == MAP_FAILED) {
1801 f->header = NULL;
1802 r = -errno;
1803 goto fail;
1804 }
1805
1806 if (!newly_created) {
1807 r = journal_file_verify_header(f);
1808 if (r < 0)
1809 goto fail;
1810 }
1811
1812 if (f->writable) {
1813 r = journal_file_refresh_header(f);
1814 if (r < 0)
1815 goto fail;
1816 }
1817
1818 if (newly_created) {
1819
de190aef 1820 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
1821 if (r < 0)
1822 goto fail;
1823
de190aef 1824 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
1825 if (r < 0)
1826 goto fail;
1827 }
1828
de190aef 1829 r = journal_file_map_field_hash_table(f);
cec736d2
LP
1830 if (r < 0)
1831 goto fail;
1832
de190aef 1833 r = journal_file_map_data_hash_table(f);
cec736d2
LP
1834 if (r < 0)
1835 goto fail;
1836
1837 if (ret)
1838 *ret = f;
1839
1840 return 0;
1841
1842fail:
1843 journal_file_close(f);
1844
1845 return r;
1846}
0ac38b70
LP
1847
1848int journal_file_rotate(JournalFile **f) {
1849 char *p;
1850 size_t l;
1851 JournalFile *old_file, *new_file = NULL;
1852 int r;
1853
1854 assert(f);
1855 assert(*f);
1856
1857 old_file = *f;
1858
1859 if (!old_file->writable)
1860 return -EINVAL;
1861
1862 if (!endswith(old_file->path, ".journal"))
1863 return -EINVAL;
1864
1865 l = strlen(old_file->path);
1866
9447a7f1 1867 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
0ac38b70
LP
1868 if (!p)
1869 return -ENOMEM;
1870
1871 memcpy(p, old_file->path, l - 8);
1872 p[l-8] = '@';
1873 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
1874 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
1875 "-%016llx-%016llx.journal",
1876 (unsigned long long) le64toh((*f)->header->seqnum),
1877 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
1878
1879 r = rename(old_file->path, p);
1880 free(p);
1881
1882 if (r < 0)
1883 return -errno;
1884
ccdbaf91 1885 old_file->header->state = STATE_ARCHIVED;
0ac38b70
LP
1886
1887 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, old_file, &new_file);
1888 journal_file_close(old_file);
1889
1890 *f = new_file;
1891 return r;
1892}
1893
9447a7f1
LP
1894int journal_file_open_reliably(
1895 const char *fname,
1896 int flags,
1897 mode_t mode,
1898 JournalFile *template,
1899 JournalFile **ret) {
1900
1901 int r;
1902 size_t l;
1903 char *p;
1904
1905 r = journal_file_open(fname, flags, mode, template, ret);
0071d9f1
LP
1906 if (r != -EBADMSG && /* corrupted */
1907 r != -ENODATA && /* truncated */
1908 r != -EHOSTDOWN && /* other machine */
1909 r != -EPROTONOSUPPORT) /* incompatible feature */
9447a7f1
LP
1910 return r;
1911
1912 if ((flags & O_ACCMODE) == O_RDONLY)
1913 return r;
1914
1915 if (!(flags & O_CREAT))
1916 return r;
1917
5c70eab4
LP
1918 /* The file is corrupted. Rotate it away and try it again (but only once) */
1919
9447a7f1
LP
1920 l = strlen(fname);
1921 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
1922 (int) (l-8), fname,
1923 (unsigned long long) now(CLOCK_REALTIME),
1924 random_ull()) < 0)
1925 return -ENOMEM;
1926
1927 r = rename(fname, p);
1928 free(p);
1929 if (r < 0)
1930 return -errno;
1931
1932 log_warning("File %s corrupted, renaming and replacing.", fname);
1933
1934 return journal_file_open(fname, flags, mode, template, ret);
1935}
1936
0ac38b70
LP
1937struct vacuum_info {
1938 off_t usage;
1939 char *filename;
1940
1941 uint64_t realtime;
1942 sd_id128_t seqnum_id;
1943 uint64_t seqnum;
5c70eab4
LP
1944
1945 bool have_seqnum;
0ac38b70
LP
1946};
1947
1948static int vacuum_compare(const void *_a, const void *_b) {
1949 const struct vacuum_info *a, *b;
1950
1951 a = _a;
1952 b = _b;
1953
5c70eab4
LP
1954 if (a->have_seqnum && b->have_seqnum &&
1955 sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
0ac38b70
LP
1956 if (a->seqnum < b->seqnum)
1957 return -1;
1958 else if (a->seqnum > b->seqnum)
1959 return 1;
1960 else
1961 return 0;
1962 }
1963
1964 if (a->realtime < b->realtime)
1965 return -1;
1966 else if (a->realtime > b->realtime)
1967 return 1;
5c70eab4 1968 else if (a->have_seqnum && b->have_seqnum)
0ac38b70 1969 return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
5c70eab4
LP
1970 else
1971 return strcmp(a->filename, b->filename);
0ac38b70
LP
1972}
1973
1974int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
1975 DIR *d;
1976 int r = 0;
1977 struct vacuum_info *list = NULL;
1978 unsigned n_list = 0, n_allocated = 0, i;
1979 uint64_t sum = 0;
1980
1981 assert(directory);
1982
1983 if (max_use <= 0)
babfc091 1984 return 0;
0ac38b70
LP
1985
1986 d = opendir(directory);
1987 if (!d)
1988 return -errno;
1989
1990 for (;;) {
1991 int k;
1992 struct dirent buf, *de;
1993 size_t q;
1994 struct stat st;
1995 char *p;
7ea07dcd 1996 unsigned long long seqnum = 0, realtime;
0ac38b70 1997 sd_id128_t seqnum_id;
5c70eab4 1998 bool have_seqnum;
0ac38b70
LP
1999
2000 k = readdir_r(d, &buf, &de);
2001 if (k != 0) {
2002 r = -k;
2003 goto finish;
2004 }
2005
2006 if (!de)
2007 break;
2008
5c70eab4
LP
2009 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
2010 continue;
2011
2012 if (!S_ISREG(st.st_mode))
0ac38b70
LP
2013 continue;
2014
2015 q = strlen(de->d_name);
2016
5c70eab4 2017 if (endswith(de->d_name, ".journal")) {
0ac38b70 2018
5c70eab4 2019 /* Vacuum archived files */
0ac38b70 2020
5c70eab4
LP
2021 if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
2022 continue;
0ac38b70 2023
5c70eab4
LP
2024 if (de->d_name[q-8-16-1] != '-' ||
2025 de->d_name[q-8-16-1-16-1] != '-' ||
2026 de->d_name[q-8-16-1-16-1-32-1] != '@')
2027 continue;
0ac38b70 2028
5c70eab4
LP
2029 p = strdup(de->d_name);
2030 if (!p) {
2031 r = -ENOMEM;
2032 goto finish;
2033 }
0ac38b70 2034
5c70eab4
LP
2035 de->d_name[q-8-16-1-16-1] = 0;
2036 if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
2037 free(p);
2038 continue;
2039 }
2040
2041 if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
2042 free(p);
2043 continue;
2044 }
2045
2046 have_seqnum = true;
2047
2048 } else if (endswith(de->d_name, ".journal~")) {
2049 unsigned long long tmp;
2050
2051 /* Vacuum corrupted files */
2052
2053 if (q < 1 + 16 + 1 + 16 + 8 + 1)
2054 continue;
0ac38b70 2055
5c70eab4
LP
2056 if (de->d_name[q-1-8-16-1] != '-' ||
2057 de->d_name[q-1-8-16-1-16-1] != '@')
2058 continue;
2059
2060 p = strdup(de->d_name);
2061 if (!p) {
2062 r = -ENOMEM;
2063 goto finish;
2064 }
2065
2066 if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
2067 free(p);
2068 continue;
2069 }
2070
2071 have_seqnum = false;
2072 } else
0ac38b70 2073 continue;
0ac38b70
LP
2074
2075 if (n_list >= n_allocated) {
2076 struct vacuum_info *j;
2077
2078 n_allocated = MAX(n_allocated * 2U, 8U);
2079 j = realloc(list, n_allocated * sizeof(struct vacuum_info));
2080 if (!j) {
2081 free(p);
2082 r = -ENOMEM;
2083 goto finish;
2084 }
2085
2086 list = j;
2087 }
2088
2089 list[n_list].filename = p;
a3a52c0f 2090 list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
0ac38b70
LP
2091 list[n_list].seqnum = seqnum;
2092 list[n_list].realtime = realtime;
2093 list[n_list].seqnum_id = seqnum_id;
5c70eab4 2094 list[n_list].have_seqnum = have_seqnum;
0ac38b70
LP
2095
2096 sum += list[n_list].usage;
2097
2098 n_list ++;
2099 }
2100
2101 qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
2102
2103 for(i = 0; i < n_list; i++) {
2104 struct statvfs ss;
2105
2106 if (fstatvfs(dirfd(d), &ss) < 0) {
2107 r = -errno;
2108 goto finish;
2109 }
2110
2111 if (sum <= max_use &&
2112 (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
2113 break;
2114
2115 if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
e7bf07b3 2116 log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
0ac38b70
LP
2117 sum -= list[i].usage;
2118 } else if (errno != ENOENT)
2119 log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
2120 }
2121
2122finish:
2123 for (i = 0; i < n_list; i++)
2124 free(list[i].filename);
2125
2126 free(list);
2127
de190aef
LP
2128 if (d)
2129 closedir(d);
2130
0ac38b70
LP
2131 return r;
2132}
cf244689
LP
2133
2134int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2135 uint64_t i, n;
2136 uint64_t q, xor_hash = 0;
2137 int r;
2138 EntryItem *items;
2139 dual_timestamp ts;
2140
2141 assert(from);
2142 assert(to);
2143 assert(o);
2144 assert(p);
2145
2146 if (!to->writable)
2147 return -EPERM;
2148
2149 ts.monotonic = le64toh(o->entry.monotonic);
2150 ts.realtime = le64toh(o->entry.realtime);
2151
2152 if (to->tail_entry_monotonic_valid &&
2153 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2154 return -EINVAL;
2155
2156 if (ts.realtime < le64toh(to->header->tail_entry_realtime))
2157 return -EINVAL;
2158
2159 n = journal_file_entry_n_items(o);
2160 items = alloca(sizeof(EntryItem) * n);
2161
2162 for (i = 0; i < n; i++) {
4fd052ae
FC
2163 uint64_t l, h;
2164 le64_t le_hash;
cf244689
LP
2165 size_t t;
2166 void *data;
2167 Object *u;
2168
2169 q = le64toh(o->entry.items[i].object_offset);
2170 le_hash = o->entry.items[i].hash;
2171
2172 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2173 if (r < 0)
2174 return r;
2175
2176 if (le_hash != o->data.hash)
2177 return -EBADMSG;
2178
2179 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2180 t = (size_t) l;
2181
2182 /* We hit the limit on 32bit machines */
2183 if ((uint64_t) t != l)
2184 return -E2BIG;
2185
2186 if (o->object.flags & OBJECT_COMPRESSED) {
2187#ifdef HAVE_XZ
2188 uint64_t rsize;
2189
2190 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2191 return -EBADMSG;
2192
2193 data = from->compress_buffer;
2194 l = rsize;
2195#else
2196 return -EPROTONOSUPPORT;
2197#endif
2198 } else
2199 data = o->data.payload;
2200
2201 r = journal_file_append_data(to, data, l, &u, &h);
2202 if (r < 0)
2203 return r;
2204
2205 xor_hash ^= le64toh(u->data.hash);
2206 items[i].object_offset = htole64(h);
2207 items[i].hash = u->data.hash;
2208
2209 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2210 if (r < 0)
2211 return r;
2212 }
2213
2214 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2215}
babfc091
LP
2216
2217void journal_default_metrics(JournalMetrics *m, int fd) {
2218 uint64_t fs_size = 0;
2219 struct statvfs ss;
a7bc2c2a 2220 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2221
2222 assert(m);
2223 assert(fd >= 0);
2224
2225 if (fstatvfs(fd, &ss) >= 0)
2226 fs_size = ss.f_frsize * ss.f_blocks;
2227
2228 if (m->max_use == (uint64_t) -1) {
2229
2230 if (fs_size > 0) {
2231 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2232
2233 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2234 m->max_use = DEFAULT_MAX_USE_UPPER;
2235
2236 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2237 m->max_use = DEFAULT_MAX_USE_LOWER;
2238 } else
2239 m->max_use = DEFAULT_MAX_USE_LOWER;
2240 } else {
2241 m->max_use = PAGE_ALIGN(m->max_use);
2242
2243 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2244 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2245 }
2246
2247 if (m->max_size == (uint64_t) -1) {
2248 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2249
2250 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2251 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2252 } else
2253 m->max_size = PAGE_ALIGN(m->max_size);
2254
2255 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2256 m->max_size = JOURNAL_FILE_SIZE_MIN;
2257
2258 if (m->max_size*2 > m->max_use)
2259 m->max_use = m->max_size*2;
2260
2261 if (m->min_size == (uint64_t) -1)
2262 m->min_size = JOURNAL_FILE_SIZE_MIN;
2263 else {
2264 m->min_size = PAGE_ALIGN(m->min_size);
2265
2266 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2267 m->min_size = JOURNAL_FILE_SIZE_MIN;
2268
2269 if (m->min_size > m->max_size)
2270 m->max_size = m->min_size;
2271 }
2272
2273 if (m->keep_free == (uint64_t) -1) {
2274
2275 if (fs_size > 0) {
2276 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2277
2278 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2279 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2280
2281 } else
2282 m->keep_free = DEFAULT_KEEP_FREE;
2283 }
2284
e7bf07b3
LP
2285 log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2286 format_bytes(a, sizeof(a), m->max_use),
2287 format_bytes(b, sizeof(b), m->max_size),
2288 format_bytes(c, sizeof(c), m->min_size),
2289 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2290}
08984293
LP
2291
2292int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2293 Object *o;
2294 int r;
2295
2296 assert(f);
2297 assert(from || to);
2298
2299 if (from) {
2300 r = journal_file_next_entry(f, NULL, 0, DIRECTION_DOWN, &o, NULL);
2301 if (r <= 0)
2302 return r;
2303
2304 *from = le64toh(o->entry.realtime);
2305 }
2306
2307 if (to) {
2308 r = journal_file_next_entry(f, NULL, 0, DIRECTION_UP, &o, NULL);
2309 if (r <= 0)
2310 return r;
2311
2312 *to = le64toh(o->entry.realtime);
2313 }
2314
2315 return 1;
2316}
2317
2318int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2319 char t[9+32+1] = "_BOOT_ID=";
2320 Object *o;
2321 uint64_t p;
2322 int r;
2323
2324 assert(f);
2325 assert(from || to);
2326
2327 sd_id128_to_string(boot_id, t + 9);
2328
2329 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2330 if (r <= 0)
2331 return r;
2332
2333 if (le64toh(o->data.n_entries) <= 0)
2334 return 0;
2335
2336 if (from) {
2337 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2338 if (r < 0)
2339 return r;
2340
2341 *from = le64toh(o->entry.monotonic);
2342 }
2343
2344 if (to) {
2345 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2346 if (r < 0)
2347 return r;
2348
2349 r = generic_array_get_plus_one(f,
2350 le64toh(o->data.entry_offset),
2351 le64toh(o->data.entry_array_offset),
2352 le64toh(o->data.n_entries)-1,
2353 &o, NULL);
2354 if (r <= 0)
2355 return r;
2356
2357 *to = le64toh(o->entry.monotonic);
2358 }
2359
2360 return 1;
2361}