]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
journal: check fields we search for more carefully
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
30#include "journal-def.h"
31#include "journal-file.h"
32#include "lookup3.h"
807e17f0 33#include "compress.h"
cec736d2 34
de190aef
LP
35#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*16ULL)
36#define DEFAULT_FIELD_HASH_TABLE_SIZE (2047ULL*16ULL)
cec736d2 37
1fa80181 38#define DEFAULT_WINDOW_SIZE (8ULL*1024ULL*1024ULL)
cec736d2 39
be19b7df 40#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 41
babfc091 42/* This is the minimum journal file size */
b47ffcfd 43#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
babfc091
LP
44
45/* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
49
50/* This is the upper bound if we deduce max_size from max_use */
71100051 51#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
52
53/* This is the upper bound if we deduce the keep_free value from the
54 * file system size */
55#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57/* This is the keep_free value when we can't determine the system
58 * size */
59#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
60
cec736d2
LP
61static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' };
62
63#define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
64
65void journal_file_close(JournalFile *f) {
de190aef 66 int t;
cec736d2 67
de190aef 68 assert(f);
cec736d2 69
d384c7a8
MS
70 if (f->header) {
71 if (f->writable)
72 f->header->state = STATE_OFFLINE;
cec736d2 73
d384c7a8
MS
74 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
75 }
cec736d2 76
de190aef
LP
77 for (t = 0; t < _WINDOW_MAX; t++)
78 if (f->windows[t].ptr)
79 munmap(f->windows[t].ptr, f->windows[t].size);
cec736d2 80
0ac38b70
LP
81 if (f->fd >= 0)
82 close_nointr_nofail(f->fd);
83
cec736d2 84 free(f->path);
807e17f0
LP
85
86#ifdef HAVE_XZ
87 free(f->compress_buffer);
88#endif
89
cec736d2
LP
90 free(f);
91}
92
0ac38b70 93static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
94 Header h;
95 ssize_t k;
96 int r;
97
98 assert(f);
99
100 zero(h);
101 memcpy(h.signature, signature, 8);
23b0b2b2 102 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2
LP
103
104 r = sd_id128_randomize(&h.file_id);
105 if (r < 0)
106 return r;
107
0ac38b70
LP
108 if (template) {
109 h.seqnum_id = template->header->seqnum_id;
110 h.seqnum = template->header->seqnum;
111 } else
112 h.seqnum_id = h.file_id;
cec736d2
LP
113
114 k = pwrite(f->fd, &h, sizeof(h), 0);
115 if (k < 0)
116 return -errno;
117
118 if (k != sizeof(h))
119 return -EIO;
120
121 return 0;
122}
123
124static int journal_file_refresh_header(JournalFile *f) {
125 int r;
de190aef 126 sd_id128_t boot_id;
cec736d2
LP
127
128 assert(f);
129
130 r = sd_id128_get_machine(&f->header->machine_id);
131 if (r < 0)
132 return r;
133
de190aef 134 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
135 if (r < 0)
136 return r;
137
de190aef
LP
138 if (sd_id128_equal(boot_id, f->header->boot_id))
139 f->tail_entry_monotonic_valid = true;
140
141 f->header->boot_id = boot_id;
142
143 f->header->state = STATE_ONLINE;
b788cc23
LP
144
145 __sync_synchronize();
146
cec736d2
LP
147 return 0;
148}
149
150static int journal_file_verify_header(JournalFile *f) {
151 assert(f);
152
153 if (memcmp(f->header, signature, 8))
154 return -EBADMSG;
155
807e17f0
LP
156#ifdef HAVE_XZ
157 if ((le64toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
158 return -EPROTONOSUPPORT;
159#else
cec736d2
LP
160 if (f->header->incompatible_flags != 0)
161 return -EPROTONOSUPPORT;
807e17f0 162#endif
cec736d2 163
23b0b2b2
LP
164 if (f->header->header_size != htole64(ALIGN64(sizeof(*(f->header)))))
165 return -EBADMSG;
166
167 if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
cec736d2
LP
168 return -ENODATA;
169
170 if (f->writable) {
ccdbaf91 171 uint8_t state;
cec736d2
LP
172 sd_id128_t machine_id;
173 int r;
174
175 r = sd_id128_get_machine(&machine_id);
176 if (r < 0)
177 return r;
178
179 if (!sd_id128_equal(machine_id, f->header->machine_id))
180 return -EHOSTDOWN;
181
de190aef 182 state = f->header->state;
cec736d2
LP
183
184 if (state == STATE_ONLINE)
185 log_debug("Journal file %s is already online. Assuming unclean closing. Ignoring.", f->path);
08984293 186 /* FIXME: immediately rotate */
cec736d2
LP
187 else if (state == STATE_ARCHIVED)
188 return -ESHUTDOWN;
189 else if (state != STATE_OFFLINE)
190 log_debug("Journal file %s has unknown state %u. Ignoring.", f->path, state);
191 }
192
193 return 0;
194}
195
196static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
cec736d2 197 uint64_t old_size, new_size;
fec2aa2f 198 int r;
cec736d2
LP
199
200 assert(f);
201
cec736d2 202 /* We assume that this file is not sparse, and we know that
38ac38b2 203 * for sure, since we always call posix_fallocate()
cec736d2
LP
204 * ourselves */
205
206 old_size =
23b0b2b2 207 le64toh(f->header->header_size) +
cec736d2
LP
208 le64toh(f->header->arena_size);
209
bc85bfee 210 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
211 if (new_size < le64toh(f->header->header_size))
212 new_size = le64toh(f->header->header_size);
bc85bfee
LP
213
214 if (new_size <= old_size)
cec736d2
LP
215 return 0;
216
bc85bfee
LP
217 if (f->metrics.max_size > 0 &&
218 new_size > f->metrics.max_size)
219 return -E2BIG;
cec736d2 220
bc85bfee
LP
221 if (new_size > f->metrics.min_size &&
222 f->metrics.keep_free > 0) {
cec736d2
LP
223 struct statvfs svfs;
224
225 if (fstatvfs(f->fd, &svfs) >= 0) {
226 uint64_t available;
227
228 available = svfs.f_bfree * svfs.f_bsize;
229
bc85bfee
LP
230 if (available >= f->metrics.keep_free)
231 available -= f->metrics.keep_free;
cec736d2
LP
232 else
233 available = 0;
234
235 if (new_size - old_size > available)
236 return -E2BIG;
237 }
238 }
239
bc85bfee
LP
240 /* Note that the glibc fallocate() fallback is very
241 inefficient, hence we try to minimize the allocation area
242 as we can. */
fec2aa2f
GV
243 r = posix_fallocate(f->fd, old_size, new_size - old_size);
244 if (r != 0)
245 return -r;
cec736d2
LP
246
247 if (fstat(f->fd, &f->last_stat) < 0)
248 return -errno;
249
23b0b2b2 250 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
251
252 return 0;
253}
254
255static int journal_file_map(
256 JournalFile *f,
257 uint64_t offset,
258 uint64_t size,
259 void **_window,
260 uint64_t *_woffset,
261 uint64_t *_wsize,
262 void **ret) {
263
264 uint64_t woffset, wsize;
265 void *window;
266
267 assert(f);
268 assert(size > 0);
269 assert(ret);
270
271 woffset = offset & ~((uint64_t) page_size() - 1ULL);
272 wsize = size + (offset - woffset);
273 wsize = PAGE_ALIGN(wsize);
274
2a59ea54
LP
275 /* Avoid SIGBUS on invalid accesses */
276 if (woffset + wsize > (uint64_t) PAGE_ALIGN(f->last_stat.st_size))
277 return -EADDRNOTAVAIL;
278
cec736d2
LP
279 window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset);
280 if (window == MAP_FAILED)
281 return -errno;
282
283 if (_window)
284 *_window = window;
285
286 if (_woffset)
287 *_woffset = woffset;
288
289 if (_wsize)
290 *_wsize = wsize;
291
292 *ret = (uint8_t*) window + (offset - woffset);
293
294 return 0;
295}
296
de190aef 297static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_t size, void **ret) {
6c8a39b8 298 void *p = NULL;
cec736d2
LP
299 uint64_t delta;
300 int r;
de190aef 301 Window *w;
cec736d2
LP
302
303 assert(f);
304 assert(ret);
de190aef
LP
305 assert(wt >= 0);
306 assert(wt < _WINDOW_MAX);
cec736d2 307
4bbdcdb3
LP
308 if (offset + size > (uint64_t) f->last_stat.st_size) {
309 /* Hmm, out of range? Let's refresh the fstat() data
310 * first, before we trust that check. */
311
312 if (fstat(f->fd, &f->last_stat) < 0 ||
313 offset + size > (uint64_t) f->last_stat.st_size)
314 return -EADDRNOTAVAIL;
315 }
316
de190aef 317 w = f->windows + wt;
cec736d2 318
de190aef
LP
319 if (_likely_(w->ptr &&
320 w->offset <= offset &&
321 w->offset + w->size >= offset + size)) {
322
323 *ret = (uint8_t*) w->ptr + (offset - w->offset);
cec736d2
LP
324 return 0;
325 }
326
de190aef
LP
327 if (w->ptr) {
328 if (munmap(w->ptr, w->size) < 0)
cec736d2
LP
329 return -errno;
330
de190aef
LP
331 w->ptr = NULL;
332 w->size = w->offset = 0;
cec736d2
LP
333 }
334
335 if (size < DEFAULT_WINDOW_SIZE) {
336 /* If the default window size is larger then what was
337 * asked for extend the mapping a bit in the hope to
338 * minimize needed remappings later on. We add half
339 * the window space before and half behind the
340 * requested mapping */
341
1921a5cb 342 delta = (DEFAULT_WINDOW_SIZE - size) / 2;
cec736d2 343
a99c349d 344 if (delta > offset)
cec736d2
LP
345 delta = offset;
346
347 offset -= delta;
a99c349d 348 size = DEFAULT_WINDOW_SIZE;
cec736d2
LP
349 } else
350 delta = 0;
351
2a59ea54 352 if (offset + size > (uint64_t) f->last_stat.st_size)
1921a5cb 353 size = (uint64_t) f->last_stat.st_size - offset;
2a59ea54
LP
354
355 if (size <= 0)
356 return -EADDRNOTAVAIL;
357
cec736d2
LP
358 r = journal_file_map(f,
359 offset, size,
de190aef
LP
360 &w->ptr, &w->offset, &w->size,
361 &p);
cec736d2
LP
362
363 if (r < 0)
364 return r;
365
366 *ret = (uint8_t*) p + delta;
367 return 0;
368}
369
370static bool verify_hash(Object *o) {
de190aef 371 uint64_t h1, h2;
cec736d2
LP
372
373 assert(o);
374
807e17f0 375 if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
cec736d2 376 h1 = le64toh(o->data.hash);
de190aef
LP
377 h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
378 } else if (o->object.type == OBJECT_FIELD) {
379 h1 = le64toh(o->field.hash);
380 h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
381 } else
382 return true;
cec736d2 383
de190aef 384 return h1 == h2;
cec736d2
LP
385}
386
de190aef 387int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
388 int r;
389 void *t;
390 Object *o;
391 uint64_t s;
392
393 assert(f);
394 assert(ret);
de190aef 395 assert(type < _OBJECT_TYPE_MAX);
cec736d2 396
de190aef 397 r = journal_file_move_to(f, type >= 0 ? type : WINDOW_UNKNOWN, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
398 if (r < 0)
399 return r;
400
401 o = (Object*) t;
402 s = le64toh(o->object.size);
403
404 if (s < sizeof(ObjectHeader))
405 return -EBADMSG;
406
de190aef 407 if (type >= 0 && o->object.type != type)
cec736d2
LP
408 return -EBADMSG;
409
410 if (s > sizeof(ObjectHeader)) {
de190aef 411 r = journal_file_move_to(f, o->object.type, offset, s, &t);
cec736d2
LP
412 if (r < 0)
413 return r;
414
415 o = (Object*) t;
416 }
417
418 if (!verify_hash(o))
419 return -EBADMSG;
420
421 *ret = o;
422 return 0;
423}
424
c2373f84 425static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
426 uint64_t r;
427
428 assert(f);
429
430 r = le64toh(f->header->seqnum) + 1;
c2373f84
LP
431
432 if (seqnum) {
de190aef 433 /* If an external seqnum counter was passed, we update
c2373f84
LP
434 * both the local and the external one, and set it to
435 * the maximum of both */
436
437 if (*seqnum + 1 > r)
438 r = *seqnum + 1;
439
440 *seqnum = r;
441 }
442
cec736d2
LP
443 f->header->seqnum = htole64(r);
444
de190aef
LP
445 if (f->header->first_seqnum == 0)
446 f->header->first_seqnum = htole64(r);
447
cec736d2
LP
448 return r;
449}
450
de190aef 451static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
452 int r;
453 uint64_t p;
454 Object *tail, *o;
455 void *t;
456
457 assert(f);
458 assert(size >= sizeof(ObjectHeader));
459 assert(offset);
460 assert(ret);
461
462 p = le64toh(f->header->tail_object_offset);
cec736d2 463 if (p == 0)
23b0b2b2 464 p = le64toh(f->header->header_size);
cec736d2 465 else {
de190aef 466 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
467 if (r < 0)
468 return r;
469
470 p += ALIGN64(le64toh(tail->object.size));
471 }
472
473 r = journal_file_allocate(f, p, size);
474 if (r < 0)
475 return r;
476
de190aef 477 r = journal_file_move_to(f, type, p, size, &t);
cec736d2
LP
478 if (r < 0)
479 return r;
480
481 o = (Object*) t;
482
483 zero(o->object);
de190aef 484 o->object.type = type;
cec736d2
LP
485 o->object.size = htole64(size);
486
487 f->header->tail_object_offset = htole64(p);
cec736d2
LP
488 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
489
490 *ret = o;
491 *offset = p;
492
493 return 0;
494}
495
de190aef 496static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
497 uint64_t s, p;
498 Object *o;
499 int r;
500
501 assert(f);
502
de190aef
LP
503 s = DEFAULT_DATA_HASH_TABLE_SIZE;
504 r = journal_file_append_object(f,
505 OBJECT_DATA_HASH_TABLE,
506 offsetof(Object, hash_table.items) + s,
507 &o, &p);
cec736d2
LP
508 if (r < 0)
509 return r;
510
de190aef 511 memset(o->hash_table.items, 0, s);
cec736d2 512
de190aef
LP
513 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
514 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
515
516 return 0;
517}
518
de190aef 519static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
520 uint64_t s, p;
521 Object *o;
522 int r;
523
524 assert(f);
525
de190aef
LP
526 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
527 r = journal_file_append_object(f,
528 OBJECT_FIELD_HASH_TABLE,
529 offsetof(Object, hash_table.items) + s,
530 &o, &p);
cec736d2
LP
531 if (r < 0)
532 return r;
533
de190aef 534 memset(o->hash_table.items, 0, s);
cec736d2 535
de190aef
LP
536 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
537 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
538
539 return 0;
540}
541
de190aef 542static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
543 uint64_t s, p;
544 void *t;
545 int r;
546
547 assert(f);
548
de190aef
LP
549 p = le64toh(f->header->data_hash_table_offset);
550 s = le64toh(f->header->data_hash_table_size);
cec736d2 551
de190aef
LP
552 r = journal_file_move_to(f,
553 WINDOW_DATA_HASH_TABLE,
554 p, s,
555 &t);
cec736d2
LP
556 if (r < 0)
557 return r;
558
de190aef 559 f->data_hash_table = t;
cec736d2
LP
560 return 0;
561}
562
de190aef 563static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
564 uint64_t s, p;
565 void *t;
566 int r;
567
568 assert(f);
569
de190aef
LP
570 p = le64toh(f->header->field_hash_table_offset);
571 s = le64toh(f->header->field_hash_table_size);
cec736d2 572
de190aef
LP
573 r = journal_file_move_to(f,
574 WINDOW_FIELD_HASH_TABLE,
575 p, s,
576 &t);
cec736d2
LP
577 if (r < 0)
578 return r;
579
de190aef 580 f->field_hash_table = t;
cec736d2
LP
581 return 0;
582}
583
de190aef
LP
584static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
585 uint64_t p, h;
cec736d2
LP
586 int r;
587
588 assert(f);
589 assert(o);
590 assert(offset > 0);
de190aef 591 assert(o->object.type == OBJECT_DATA);
cec736d2 592
48496df6
LP
593 /* This might alter the window we are looking at */
594
de190aef
LP
595 o->data.next_hash_offset = o->data.next_field_offset = 0;
596 o->data.entry_offset = o->data.entry_array_offset = 0;
597 o->data.n_entries = 0;
cec736d2 598
de190aef 599 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
8db4213e 600 p = le64toh(f->data_hash_table[h].tail_hash_offset);
cec736d2
LP
601 if (p == 0) {
602 /* Only entry in the hash table is easy */
de190aef 603 f->data_hash_table[h].head_hash_offset = htole64(offset);
cec736d2 604 } else {
48496df6
LP
605 /* Move back to the previous data object, to patch in
606 * pointer */
cec736d2 607
de190aef 608 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
609 if (r < 0)
610 return r;
611
de190aef 612 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
613 }
614
de190aef 615 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2
LP
616
617 return 0;
618}
619
de190aef
LP
620int journal_file_find_data_object_with_hash(
621 JournalFile *f,
622 const void *data, uint64_t size, uint64_t hash,
623 Object **ret, uint64_t *offset) {
48496df6 624
de190aef 625 uint64_t p, osize, h;
cec736d2
LP
626 int r;
627
628 assert(f);
629 assert(data || size == 0);
630
631 osize = offsetof(Object, data.payload) + size;
632
bc85bfee
LP
633 if (f->header->data_hash_table_size == 0)
634 return -EBADMSG;
635
de190aef
LP
636 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
637 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 638
de190aef
LP
639 while (p > 0) {
640 Object *o;
cec736d2 641
de190aef 642 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
643 if (r < 0)
644 return r;
645
807e17f0 646 if (le64toh(o->data.hash) != hash)
85a131e8 647 goto next;
807e17f0
LP
648
649 if (o->object.flags & OBJECT_COMPRESSED) {
650#ifdef HAVE_XZ
b785c858 651 uint64_t l, rsize;
cec736d2 652
807e17f0
LP
653 l = le64toh(o->object.size);
654 if (l <= offsetof(Object, data.payload))
cec736d2
LP
655 return -EBADMSG;
656
807e17f0
LP
657 l -= offsetof(Object, data.payload);
658
659 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
660 return -EBADMSG;
661
b785c858 662 if (rsize == size &&
807e17f0
LP
663 memcmp(f->compress_buffer, data, size) == 0) {
664
665 if (ret)
666 *ret = o;
667
668 if (offset)
669 *offset = p;
670
671 return 1;
672 }
673#else
674 return -EPROTONOSUPPORT;
675#endif
676
677 } else if (le64toh(o->object.size) == osize &&
678 memcmp(o->data.payload, data, size) == 0) {
679
cec736d2
LP
680 if (ret)
681 *ret = o;
682
683 if (offset)
684 *offset = p;
685
de190aef 686 return 1;
cec736d2
LP
687 }
688
85a131e8 689 next:
cec736d2
LP
690 p = le64toh(o->data.next_hash_offset);
691 }
692
de190aef
LP
693 return 0;
694}
695
696int journal_file_find_data_object(
697 JournalFile *f,
698 const void *data, uint64_t size,
699 Object **ret, uint64_t *offset) {
700
701 uint64_t hash;
702
703 assert(f);
704 assert(data || size == 0);
705
706 hash = hash64(data, size);
707
708 return journal_file_find_data_object_with_hash(f,
709 data, size, hash,
710 ret, offset);
711}
712
48496df6
LP
713static int journal_file_append_data(
714 JournalFile *f,
715 const void *data, uint64_t size,
716 Object **ret, uint64_t *offset) {
717
de190aef
LP
718 uint64_t hash, p;
719 uint64_t osize;
720 Object *o;
721 int r;
807e17f0 722 bool compressed = false;
de190aef
LP
723
724 assert(f);
725 assert(data || size == 0);
726
727 hash = hash64(data, size);
728
729 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
730 if (r < 0)
731 return r;
732 else if (r > 0) {
733
734 if (ret)
735 *ret = o;
736
737 if (offset)
738 *offset = p;
739
740 return 0;
741 }
742
743 osize = offsetof(Object, data.payload) + size;
744 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
745 if (r < 0)
746 return r;
747
cec736d2 748 o->data.hash = htole64(hash);
807e17f0
LP
749
750#ifdef HAVE_XZ
751 if (f->compress &&
752 size >= COMPRESSION_SIZE_THRESHOLD) {
753 uint64_t rsize;
754
755 compressed = compress_blob(data, size, o->data.payload, &rsize);
756
757 if (compressed) {
758 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
759 o->object.flags |= OBJECT_COMPRESSED;
760
761 f->header->incompatible_flags = htole32(le32toh(f->header->incompatible_flags) | HEADER_INCOMPATIBLE_COMPRESSED);
762
763 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
764 }
765 }
766#endif
767
768 if (!compressed)
769 memcpy(o->data.payload, data, size);
cec736d2 770
de190aef 771 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
772 if (r < 0)
773 return r;
774
48496df6
LP
775 /* The linking might have altered the window, so let's
776 * refresh our pointer */
777 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
778 if (r < 0)
779 return r;
780
cec736d2
LP
781 if (ret)
782 *ret = o;
783
784 if (offset)
de190aef 785 *offset = p;
cec736d2
LP
786
787 return 0;
788}
789
790uint64_t journal_file_entry_n_items(Object *o) {
791 assert(o);
7be3aa17 792 assert(o->object.type == OBJECT_ENTRY);
cec736d2
LP
793
794 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
795}
796
de190aef
LP
797static uint64_t journal_file_entry_array_n_items(Object *o) {
798 assert(o);
7be3aa17 799 assert(o->object.type == OBJECT_ENTRY_ARRAY);
de190aef
LP
800
801 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
802}
803
804static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
805 le64_t *first,
806 le64_t *idx,
de190aef 807 uint64_t p) {
cec736d2 808 int r;
de190aef
LP
809 uint64_t n = 0, ap = 0, q, i, a, hidx;
810 Object *o;
811
cec736d2 812 assert(f);
de190aef
LP
813 assert(first);
814 assert(idx);
815 assert(p > 0);
cec736d2 816
de190aef
LP
817 a = le64toh(*first);
818 i = hidx = le64toh(*idx);
819 while (a > 0) {
820
821 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
822 if (r < 0)
823 return r;
cec736d2 824
de190aef
LP
825 n = journal_file_entry_array_n_items(o);
826 if (i < n) {
827 o->entry_array.items[i] = htole64(p);
828 *idx = htole64(hidx + 1);
829 return 0;
830 }
cec736d2 831
de190aef
LP
832 i -= n;
833 ap = a;
834 a = le64toh(o->entry_array.next_entry_array_offset);
835 }
836
837 if (hidx > n)
838 n = (hidx+1) * 2;
839 else
840 n = n * 2;
841
842 if (n < 4)
843 n = 4;
844
845 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
846 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
847 &o, &q);
cec736d2
LP
848 if (r < 0)
849 return r;
850
de190aef 851 o->entry_array.items[i] = htole64(p);
cec736d2 852
de190aef 853 if (ap == 0)
7be3aa17 854 *first = htole64(q);
cec736d2 855 else {
de190aef 856 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
857 if (r < 0)
858 return r;
859
de190aef
LP
860 o->entry_array.next_entry_array_offset = htole64(q);
861 }
cec736d2 862
de190aef
LP
863 *idx = htole64(hidx + 1);
864
865 return 0;
866}
cec736d2 867
de190aef 868static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
869 le64_t *extra,
870 le64_t *first,
871 le64_t *idx,
de190aef
LP
872 uint64_t p) {
873
874 int r;
875
876 assert(f);
877 assert(extra);
878 assert(first);
879 assert(idx);
880 assert(p > 0);
881
882 if (*idx == 0)
883 *extra = htole64(p);
884 else {
4fd052ae 885 le64_t i;
de190aef 886
7be3aa17 887 i = htole64(le64toh(*idx) - 1);
de190aef
LP
888 r = link_entry_into_array(f, first, &i, p);
889 if (r < 0)
890 return r;
cec736d2
LP
891 }
892
de190aef
LP
893 *idx = htole64(le64toh(*idx) + 1);
894 return 0;
895}
896
897static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
898 uint64_t p;
899 int r;
900 assert(f);
901 assert(o);
902 assert(offset > 0);
903
904 p = le64toh(o->entry.items[i].object_offset);
905 if (p == 0)
906 return -EINVAL;
907
908 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
909 if (r < 0)
910 return r;
911
de190aef
LP
912 return link_entry_into_array_plus_one(f,
913 &o->data.entry_offset,
914 &o->data.entry_array_offset,
915 &o->data.n_entries,
916 offset);
cec736d2
LP
917}
918
919static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 920 uint64_t n, i;
cec736d2
LP
921 int r;
922
923 assert(f);
924 assert(o);
925 assert(offset > 0);
de190aef 926 assert(o->object.type == OBJECT_ENTRY);
cec736d2 927
b788cc23
LP
928 __sync_synchronize();
929
cec736d2 930 /* Link up the entry itself */
de190aef
LP
931 r = link_entry_into_array(f,
932 &f->header->entry_array_offset,
933 &f->header->n_entries,
934 offset);
935 if (r < 0)
936 return r;
cec736d2 937
aaf53376 938 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
cec736d2 939
de190aef 940 if (f->header->head_entry_realtime == 0)
0ac38b70 941 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 942
0ac38b70 943 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
944 f->header->tail_entry_monotonic = o->entry.monotonic;
945
946 f->tail_entry_monotonic_valid = true;
cec736d2
LP
947
948 /* Link up the items */
949 n = journal_file_entry_n_items(o);
950 for (i = 0; i < n; i++) {
951 r = journal_file_link_entry_item(f, o, offset, i);
952 if (r < 0)
953 return r;
954 }
955
cec736d2
LP
956 return 0;
957}
958
959static int journal_file_append_entry_internal(
960 JournalFile *f,
961 const dual_timestamp *ts,
962 uint64_t xor_hash,
963 const EntryItem items[], unsigned n_items,
de190aef 964 uint64_t *seqnum,
cec736d2
LP
965 Object **ret, uint64_t *offset) {
966 uint64_t np;
967 uint64_t osize;
968 Object *o;
969 int r;
970
971 assert(f);
972 assert(items || n_items == 0);
de190aef 973 assert(ts);
cec736d2
LP
974
975 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
976
de190aef 977 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
978 if (r < 0)
979 return r;
980
de190aef 981 o->entry.seqnum = htole64(journal_file_seqnum(f, seqnum));
cec736d2 982 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
983 o->entry.realtime = htole64(ts->realtime);
984 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
985 o->entry.xor_hash = htole64(xor_hash);
986 o->entry.boot_id = f->header->boot_id;
987
988 r = journal_file_link_entry(f, o, np);
989 if (r < 0)
990 return r;
991
992 if (ret)
993 *ret = o;
994
995 if (offset)
996 *offset = np;
997
998 return 0;
999}
1000
cf244689 1001void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
1002 assert(f);
1003
1004 /* inotify() does not receive IN_MODIFY events from file
1005 * accesses done via mmap(). After each access we hence
1006 * trigger IN_MODIFY by truncating the journal file to its
1007 * current size which triggers IN_MODIFY. */
1008
bc85bfee
LP
1009 __sync_synchronize();
1010
50f20cfd
LP
1011 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1012 log_error("Failed to to truncate file to its own size: %m");
1013}
1014
de190aef 1015int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1016 unsigned i;
1017 EntryItem *items;
1018 int r;
1019 uint64_t xor_hash = 0;
de190aef 1020 struct dual_timestamp _ts;
cec736d2
LP
1021
1022 assert(f);
1023 assert(iovec || n_iovec == 0);
1024
de190aef
LP
1025 if (!f->writable)
1026 return -EPERM;
1027
1028 if (!ts) {
1029 dual_timestamp_get(&_ts);
1030 ts = &_ts;
1031 }
1032
1033 if (f->tail_entry_monotonic_valid &&
1034 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1035 return -EINVAL;
1036
cf244689 1037 items = alloca(sizeof(EntryItem) * n_iovec);
cec736d2
LP
1038
1039 for (i = 0; i < n_iovec; i++) {
1040 uint64_t p;
1041 Object *o;
1042
1043 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1044 if (r < 0)
cf244689 1045 return r;
cec736d2
LP
1046
1047 xor_hash ^= le64toh(o->data.hash);
1048 items[i].object_offset = htole64(p);
de7b95cd 1049 items[i].hash = o->data.hash;
cec736d2
LP
1050 }
1051
de190aef 1052 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1053
50f20cfd
LP
1054 journal_file_post_change(f);
1055
cec736d2
LP
1056 return r;
1057}
1058
de190aef
LP
1059static int generic_array_get(JournalFile *f,
1060 uint64_t first,
1061 uint64_t i,
1062 Object **ret, uint64_t *offset) {
1063
cec736d2 1064 Object *o;
6c8a39b8 1065 uint64_t p = 0, a;
cec736d2
LP
1066 int r;
1067
1068 assert(f);
1069
de190aef
LP
1070 a = first;
1071 while (a > 0) {
1072 uint64_t n;
cec736d2 1073
de190aef
LP
1074 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1075 if (r < 0)
1076 return r;
cec736d2 1077
de190aef
LP
1078 n = journal_file_entry_array_n_items(o);
1079 if (i < n) {
1080 p = le64toh(o->entry_array.items[i]);
1081 break;
cec736d2
LP
1082 }
1083
de190aef
LP
1084 i -= n;
1085 a = le64toh(o->entry_array.next_entry_array_offset);
1086 }
1087
1088 if (a <= 0 || p <= 0)
1089 return 0;
1090
1091 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1092 if (r < 0)
1093 return r;
1094
1095 if (ret)
1096 *ret = o;
1097
1098 if (offset)
1099 *offset = p;
1100
1101 return 1;
1102}
1103
1104static int generic_array_get_plus_one(JournalFile *f,
1105 uint64_t extra,
1106 uint64_t first,
1107 uint64_t i,
1108 Object **ret, uint64_t *offset) {
1109
1110 Object *o;
1111
1112 assert(f);
1113
1114 if (i == 0) {
1115 int r;
1116
1117 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1118 if (r < 0)
1119 return r;
1120
de190aef
LP
1121 if (ret)
1122 *ret = o;
cec736d2 1123
de190aef
LP
1124 if (offset)
1125 *offset = extra;
cec736d2 1126
de190aef 1127 return 1;
cec736d2
LP
1128 }
1129
de190aef
LP
1130 return generic_array_get(f, first, i-1, ret, offset);
1131}
cec736d2 1132
de190aef
LP
1133enum {
1134 TEST_FOUND,
1135 TEST_LEFT,
1136 TEST_RIGHT
1137};
cec736d2 1138
de190aef
LP
1139static int generic_array_bisect(JournalFile *f,
1140 uint64_t first,
1141 uint64_t n,
1142 uint64_t needle,
1143 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1144 direction_t direction,
1145 Object **ret,
1146 uint64_t *offset,
1147 uint64_t *idx) {
1148
1149 uint64_t a, p, t = 0, i = 0, last_p = 0;
1150 bool subtract_one = false;
1151 Object *o, *array = NULL;
1152 int r;
cec736d2 1153
de190aef
LP
1154 assert(f);
1155 assert(test_object);
cec736d2 1156
de190aef
LP
1157 a = first;
1158 while (a > 0) {
1159 uint64_t left, right, k, lp;
1160
1161 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1162 if (r < 0)
1163 return r;
1164
de190aef
LP
1165 k = journal_file_entry_array_n_items(array);
1166 right = MIN(k, n);
1167 if (right <= 0)
1168 return 0;
cec736d2 1169
de190aef
LP
1170 i = right - 1;
1171 lp = p = le64toh(array->entry_array.items[i]);
1172 if (p <= 0)
1173 return -EBADMSG;
cec736d2 1174
de190aef
LP
1175 r = test_object(f, p, needle);
1176 if (r < 0)
1177 return r;
cec736d2 1178
de190aef
LP
1179 if (r == TEST_FOUND)
1180 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1181
1182 if (r == TEST_RIGHT) {
1183 left = 0;
1184 right -= 1;
1185 for (;;) {
1186 if (left == right) {
1187 if (direction == DIRECTION_UP)
1188 subtract_one = true;
1189
1190 i = left;
1191 goto found;
1192 }
1193
1194 assert(left < right);
1195
1196 i = (left + right) / 2;
1197 p = le64toh(array->entry_array.items[i]);
1198 if (p <= 0)
1199 return -EBADMSG;
1200
1201 r = test_object(f, p, needle);
1202 if (r < 0)
1203 return r;
cec736d2 1204
de190aef
LP
1205 if (r == TEST_FOUND)
1206 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1207
1208 if (r == TEST_RIGHT)
1209 right = i;
1210 else
1211 left = i + 1;
1212 }
1213 }
1214
1215 if (k > n)
cec736d2
LP
1216 return 0;
1217
de190aef
LP
1218 last_p = lp;
1219
1220 n -= k;
1221 t += k;
1222 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1223 }
1224
1225 return 0;
de190aef
LP
1226
1227found:
1228 if (subtract_one && t == 0 && i == 0)
1229 return 0;
1230
1231 if (subtract_one && i == 0)
1232 p = last_p;
1233 else if (subtract_one)
1234 p = le64toh(array->entry_array.items[i-1]);
1235 else
1236 p = le64toh(array->entry_array.items[i]);
1237
1238 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1239 if (r < 0)
1240 return r;
1241
1242 if (ret)
1243 *ret = o;
1244
1245 if (offset)
1246 *offset = p;
1247
1248 if (idx)
1249 *idx = t + i - (subtract_one ? 1 : 0);
1250
1251 return 1;
cec736d2
LP
1252}
1253
de190aef
LP
1254static int generic_array_bisect_plus_one(JournalFile *f,
1255 uint64_t extra,
1256 uint64_t first,
1257 uint64_t n,
1258 uint64_t needle,
1259 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1260 direction_t direction,
1261 Object **ret,
1262 uint64_t *offset,
1263 uint64_t *idx) {
1264
cec736d2
LP
1265 int r;
1266
1267 assert(f);
de190aef 1268 assert(test_object);
cec736d2 1269
de190aef
LP
1270 if (n <= 0)
1271 return 0;
cec736d2 1272
de190aef
LP
1273 /* This bisects the array in object 'first', but first checks
1274 * an extra */
de190aef
LP
1275 r = test_object(f, extra, needle);
1276 if (r < 0)
1277 return r;
a536e261
LP
1278
1279 if (r == TEST_FOUND)
1280 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1281
1282 if (r == TEST_RIGHT) {
de190aef
LP
1283 Object *o;
1284
1285 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1286 if (r < 0)
1287 return r;
1288
1289 if (ret)
1290 *ret = o;
cec736d2 1291
de190aef
LP
1292 if (offset)
1293 *offset = extra;
440ee366
LP
1294
1295 if (idx)
1296 *idx = 0;
1297
1298 return 1;
a536e261 1299 }
cec736d2 1300
de190aef
LP
1301 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1302
ecf68b1d 1303 if (r > 0 && idx)
de190aef
LP
1304 (*idx) ++;
1305
1306 return r;
1307}
1308
1309static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1310 Object *o;
1311 int r;
1312
1313 assert(f);
1314 assert(p > 0);
1315
1316 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1317 if (r < 0)
1318 return r;
1319
de190aef
LP
1320 if (le64toh(o->entry.seqnum) == needle)
1321 return TEST_FOUND;
1322 else if (le64toh(o->entry.seqnum) < needle)
1323 return TEST_LEFT;
1324 else
1325 return TEST_RIGHT;
1326}
cec736d2 1327
de190aef
LP
1328int journal_file_move_to_entry_by_seqnum(
1329 JournalFile *f,
1330 uint64_t seqnum,
1331 direction_t direction,
1332 Object **ret,
1333 uint64_t *offset) {
1334
1335 return generic_array_bisect(f,
1336 le64toh(f->header->entry_array_offset),
1337 le64toh(f->header->n_entries),
1338 seqnum,
1339 test_object_seqnum,
1340 direction,
1341 ret, offset, NULL);
1342}
cec736d2 1343
de190aef
LP
1344static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1345 Object *o;
1346 int r;
1347
1348 assert(f);
1349 assert(p > 0);
1350
1351 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1352 if (r < 0)
1353 return r;
1354
1355 if (le64toh(o->entry.realtime) == needle)
1356 return TEST_FOUND;
1357 else if (le64toh(o->entry.realtime) < needle)
1358 return TEST_LEFT;
1359 else
1360 return TEST_RIGHT;
cec736d2
LP
1361}
1362
de190aef
LP
1363int journal_file_move_to_entry_by_realtime(
1364 JournalFile *f,
1365 uint64_t realtime,
1366 direction_t direction,
1367 Object **ret,
1368 uint64_t *offset) {
1369
1370 return generic_array_bisect(f,
1371 le64toh(f->header->entry_array_offset),
1372 le64toh(f->header->n_entries),
1373 realtime,
1374 test_object_realtime,
1375 direction,
1376 ret, offset, NULL);
1377}
1378
1379static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1380 Object *o;
1381 int r;
1382
1383 assert(f);
1384 assert(p > 0);
1385
1386 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1387 if (r < 0)
1388 return r;
1389
1390 if (le64toh(o->entry.monotonic) == needle)
1391 return TEST_FOUND;
1392 else if (le64toh(o->entry.monotonic) < needle)
1393 return TEST_LEFT;
1394 else
1395 return TEST_RIGHT;
1396}
1397
1398int journal_file_move_to_entry_by_monotonic(
1399 JournalFile *f,
1400 sd_id128_t boot_id,
1401 uint64_t monotonic,
1402 direction_t direction,
1403 Object **ret,
1404 uint64_t *offset) {
1405
10b6f904 1406 char t[9+32+1] = "_BOOT_ID=";
de190aef
LP
1407 Object *o;
1408 int r;
1409
10b6f904 1410 sd_id128_to_string(boot_id, t + 9);
de190aef
LP
1411
1412 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1413 if (r < 0)
1414 return r;
1415 else if (r == 0)
1416 return -ENOENT;
1417
1418 return generic_array_bisect_plus_one(f,
1419 le64toh(o->data.entry_offset),
1420 le64toh(o->data.entry_array_offset),
1421 le64toh(o->data.n_entries),
1422 monotonic,
1423 test_object_monotonic,
1424 direction,
1425 ret, offset, NULL);
1426}
1427
1428static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1429 assert(f);
1430 assert(p > 0);
1431
1432 if (p == needle)
1433 return TEST_FOUND;
1434 else if (p < needle)
1435 return TEST_LEFT;
1436 else
1437 return TEST_RIGHT;
1438}
1439
1440int journal_file_next_entry(
1441 JournalFile *f,
1442 Object *o, uint64_t p,
1443 direction_t direction,
1444 Object **ret, uint64_t *offset) {
1445
1446 uint64_t i, n;
cec736d2
LP
1447 int r;
1448
1449 assert(f);
de190aef
LP
1450 assert(p > 0 || !o);
1451
1452 n = le64toh(f->header->n_entries);
1453 if (n <= 0)
1454 return 0;
cec736d2
LP
1455
1456 if (!o)
de190aef 1457 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1458 else {
de190aef 1459 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1460 return -EINVAL;
1461
de190aef
LP
1462 r = generic_array_bisect(f,
1463 le64toh(f->header->entry_array_offset),
1464 le64toh(f->header->n_entries),
1465 p,
1466 test_object_offset,
1467 DIRECTION_DOWN,
1468 NULL, NULL,
1469 &i);
1470 if (r <= 0)
1471 return r;
1472
1473 if (direction == DIRECTION_DOWN) {
1474 if (i >= n - 1)
1475 return 0;
1476
1477 i++;
1478 } else {
1479 if (i <= 0)
1480 return 0;
1481
1482 i--;
1483 }
cec736d2
LP
1484 }
1485
de190aef
LP
1486 /* And jump to it */
1487 return generic_array_get(f,
1488 le64toh(f->header->entry_array_offset),
1489 i,
1490 ret, offset);
1491}
cec736d2 1492
de190aef
LP
1493int journal_file_skip_entry(
1494 JournalFile *f,
1495 Object *o, uint64_t p,
1496 int64_t skip,
1497 Object **ret, uint64_t *offset) {
1498
1499 uint64_t i, n;
1500 int r;
1501
1502 assert(f);
1503 assert(o);
1504 assert(p > 0);
1505
1506 if (o->object.type != OBJECT_ENTRY)
1507 return -EINVAL;
1508
1509 r = generic_array_bisect(f,
1510 le64toh(f->header->entry_array_offset),
1511 le64toh(f->header->n_entries),
1512 p,
1513 test_object_offset,
1514 DIRECTION_DOWN,
1515 NULL, NULL,
1516 &i);
1517 if (r <= 0)
cec736d2
LP
1518 return r;
1519
de190aef
LP
1520 /* Calculate new index */
1521 if (skip < 0) {
1522 if ((uint64_t) -skip >= i)
1523 i = 0;
1524 else
1525 i = i - (uint64_t) -skip;
1526 } else
1527 i += (uint64_t) skip;
cec736d2 1528
de190aef
LP
1529 n = le64toh(f->header->n_entries);
1530 if (n <= 0)
1531 return -EBADMSG;
cec736d2 1532
de190aef
LP
1533 if (i >= n)
1534 i = n-1;
1535
1536 return generic_array_get(f,
1537 le64toh(f->header->entry_array_offset),
1538 i,
1539 ret, offset);
cec736d2
LP
1540}
1541
de190aef
LP
1542int journal_file_next_entry_for_data(
1543 JournalFile *f,
1544 Object *o, uint64_t p,
1545 uint64_t data_offset,
1546 direction_t direction,
1547 Object **ret, uint64_t *offset) {
1548
1549 uint64_t n, i;
cec736d2 1550 int r;
de190aef 1551 Object *d;
cec736d2
LP
1552
1553 assert(f);
de190aef 1554 assert(p > 0 || !o);
cec736d2 1555
de190aef 1556 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 1557 if (r < 0)
de190aef 1558 return r;
cec736d2 1559
de190aef
LP
1560 n = le64toh(d->data.n_entries);
1561 if (n <= 0)
1562 return n;
cec736d2 1563
de190aef
LP
1564 if (!o)
1565 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1566 else {
1567 if (o->object.type != OBJECT_ENTRY)
1568 return -EINVAL;
cec736d2 1569
de190aef
LP
1570 r = generic_array_bisect_plus_one(f,
1571 le64toh(d->data.entry_offset),
1572 le64toh(d->data.entry_array_offset),
1573 le64toh(d->data.n_entries),
1574 p,
1575 test_object_offset,
1576 DIRECTION_DOWN,
1577 NULL, NULL,
1578 &i);
1579
1580 if (r <= 0)
cec736d2
LP
1581 return r;
1582
de190aef
LP
1583 if (direction == DIRECTION_DOWN) {
1584 if (i >= n - 1)
1585 return 0;
cec736d2 1586
de190aef
LP
1587 i++;
1588 } else {
1589 if (i <= 0)
1590 return 0;
cec736d2 1591
de190aef
LP
1592 i--;
1593 }
cec736d2 1594
de190aef 1595 }
cec736d2 1596
de190aef
LP
1597 return generic_array_get_plus_one(f,
1598 le64toh(d->data.entry_offset),
1599 le64toh(d->data.entry_array_offset),
1600 i,
1601 ret, offset);
1602}
cec736d2 1603
de190aef
LP
1604int journal_file_move_to_entry_by_seqnum_for_data(
1605 JournalFile *f,
1606 uint64_t data_offset,
1607 uint64_t seqnum,
1608 direction_t direction,
1609 Object **ret, uint64_t *offset) {
cec736d2 1610
de190aef
LP
1611 Object *d;
1612 int r;
cec736d2 1613
91a31dde
LP
1614 assert(f);
1615
de190aef 1616 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1617 if (r < 0)
de190aef 1618 return r;
cec736d2 1619
de190aef
LP
1620 return generic_array_bisect_plus_one(f,
1621 le64toh(d->data.entry_offset),
1622 le64toh(d->data.entry_array_offset),
1623 le64toh(d->data.n_entries),
1624 seqnum,
1625 test_object_seqnum,
1626 direction,
1627 ret, offset, NULL);
1628}
cec736d2 1629
de190aef
LP
1630int journal_file_move_to_entry_by_realtime_for_data(
1631 JournalFile *f,
1632 uint64_t data_offset,
1633 uint64_t realtime,
1634 direction_t direction,
1635 Object **ret, uint64_t *offset) {
1636
1637 Object *d;
1638 int r;
1639
91a31dde
LP
1640 assert(f);
1641
de190aef 1642 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1643 if (r < 0)
de190aef
LP
1644 return r;
1645
1646 return generic_array_bisect_plus_one(f,
1647 le64toh(d->data.entry_offset),
1648 le64toh(d->data.entry_array_offset),
1649 le64toh(d->data.n_entries),
1650 realtime,
1651 test_object_realtime,
1652 direction,
1653 ret, offset, NULL);
cec736d2
LP
1654}
1655
1656void journal_file_dump(JournalFile *f) {
1657 char a[33], b[33], c[33];
1658 Object *o;
1659 int r;
1660 uint64_t p;
1661
1662 assert(f);
1663
de190aef
LP
1664 printf("File Path: %s\n"
1665 "File ID: %s\n"
cec736d2
LP
1666 "Machine ID: %s\n"
1667 "Boot ID: %s\n"
de190aef
LP
1668 "Arena size: %llu\n"
1669 "Objects: %lu\n"
1670 "Entries: %lu\n",
1671 f->path,
cec736d2
LP
1672 sd_id128_to_string(f->header->file_id, a),
1673 sd_id128_to_string(f->header->machine_id, b),
1674 sd_id128_to_string(f->header->boot_id, c),
de190aef
LP
1675 (unsigned long long) le64toh(f->header->arena_size),
1676 (unsigned long) le64toh(f->header->n_objects),
1677 (unsigned long) le64toh(f->header->n_entries));
cec736d2 1678
23b0b2b2 1679 p = le64toh(f->header->header_size);
cec736d2 1680 while (p != 0) {
de190aef 1681 r = journal_file_move_to_object(f, -1, p, &o);
cec736d2
LP
1682 if (r < 0)
1683 goto fail;
1684
1685 switch (o->object.type) {
1686
1687 case OBJECT_UNUSED:
1688 printf("Type: OBJECT_UNUSED\n");
1689 break;
1690
1691 case OBJECT_DATA:
1692 printf("Type: OBJECT_DATA\n");
1693 break;
1694
1695 case OBJECT_ENTRY:
3fbf9cbb
LP
1696 printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
1697 (unsigned long long) le64toh(o->entry.seqnum),
1698 (unsigned long long) le64toh(o->entry.monotonic),
1699 (unsigned long long) le64toh(o->entry.realtime));
cec736d2
LP
1700 break;
1701
de190aef
LP
1702 case OBJECT_FIELD_HASH_TABLE:
1703 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
cec736d2
LP
1704 break;
1705
de190aef
LP
1706 case OBJECT_DATA_HASH_TABLE:
1707 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1708 break;
1709
1710 case OBJECT_ENTRY_ARRAY:
1711 printf("Type: OBJECT_ENTRY_ARRAY\n");
cec736d2 1712 break;
8144056f
LP
1713
1714 case OBJECT_SIGNATURE:
1715 printf("Type: OBJECT_SIGNATURE\n");
1716 break;
cec736d2
LP
1717 }
1718
807e17f0
LP
1719 if (o->object.flags & OBJECT_COMPRESSED)
1720 printf("Flags: COMPRESSED\n");
1721
cec736d2
LP
1722 if (p == le64toh(f->header->tail_object_offset))
1723 p = 0;
1724 else
1725 p = p + ALIGN64(le64toh(o->object.size));
1726 }
1727
1728 return;
1729fail:
1730 log_error("File corrupt");
1731}
1732
1733int journal_file_open(
1734 const char *fname,
1735 int flags,
1736 mode_t mode,
0ac38b70 1737 JournalFile *template,
cec736d2
LP
1738 JournalFile **ret) {
1739
1740 JournalFile *f;
1741 int r;
1742 bool newly_created = false;
1743
1744 assert(fname);
1745
1746 if ((flags & O_ACCMODE) != O_RDONLY &&
1747 (flags & O_ACCMODE) != O_RDWR)
1748 return -EINVAL;
1749
9447a7f1
LP
1750 if (!endswith(fname, ".journal"))
1751 return -EINVAL;
1752
cec736d2
LP
1753 f = new0(JournalFile, 1);
1754 if (!f)
1755 return -ENOMEM;
1756
0ac38b70
LP
1757 f->fd = -1;
1758 f->flags = flags;
1759 f->mode = mode;
cec736d2
LP
1760 f->writable = (flags & O_ACCMODE) != O_RDONLY;
1761 f->prot = prot_from_flags(flags);
1762
15944db8
LP
1763 if (template) {
1764 f->metrics = template->metrics;
1765 f->compress = template->compress;
1766 }
1767
cec736d2
LP
1768 f->path = strdup(fname);
1769 if (!f->path) {
1770 r = -ENOMEM;
1771 goto fail;
1772 }
1773
0ac38b70
LP
1774 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
1775 if (f->fd < 0) {
1776 r = -errno;
1777 goto fail;
1778 }
1779
cec736d2
LP
1780 if (fstat(f->fd, &f->last_stat) < 0) {
1781 r = -errno;
1782 goto fail;
1783 }
1784
1785 if (f->last_stat.st_size == 0 && f->writable) {
1786 newly_created = true;
1787
0ac38b70 1788 r = journal_file_init_header(f, template);
cec736d2
LP
1789 if (r < 0)
1790 goto fail;
1791
1792 if (fstat(f->fd, &f->last_stat) < 0) {
1793 r = -errno;
1794 goto fail;
1795 }
1796 }
1797
1798 if (f->last_stat.st_size < (off_t) sizeof(Header)) {
1799 r = -EIO;
1800 goto fail;
1801 }
1802
1803 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
1804 if (f->header == MAP_FAILED) {
1805 f->header = NULL;
1806 r = -errno;
1807 goto fail;
1808 }
1809
1810 if (!newly_created) {
1811 r = journal_file_verify_header(f);
1812 if (r < 0)
1813 goto fail;
1814 }
1815
1816 if (f->writable) {
1817 r = journal_file_refresh_header(f);
1818 if (r < 0)
1819 goto fail;
1820 }
1821
1822 if (newly_created) {
1823
de190aef 1824 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
1825 if (r < 0)
1826 goto fail;
1827
de190aef 1828 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
1829 if (r < 0)
1830 goto fail;
1831 }
1832
de190aef 1833 r = journal_file_map_field_hash_table(f);
cec736d2
LP
1834 if (r < 0)
1835 goto fail;
1836
de190aef 1837 r = journal_file_map_data_hash_table(f);
cec736d2
LP
1838 if (r < 0)
1839 goto fail;
1840
1841 if (ret)
1842 *ret = f;
1843
1844 return 0;
1845
1846fail:
1847 journal_file_close(f);
1848
1849 return r;
1850}
0ac38b70
LP
1851
1852int journal_file_rotate(JournalFile **f) {
1853 char *p;
1854 size_t l;
1855 JournalFile *old_file, *new_file = NULL;
1856 int r;
1857
1858 assert(f);
1859 assert(*f);
1860
1861 old_file = *f;
1862
1863 if (!old_file->writable)
1864 return -EINVAL;
1865
1866 if (!endswith(old_file->path, ".journal"))
1867 return -EINVAL;
1868
1869 l = strlen(old_file->path);
1870
9447a7f1 1871 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
0ac38b70
LP
1872 if (!p)
1873 return -ENOMEM;
1874
1875 memcpy(p, old_file->path, l - 8);
1876 p[l-8] = '@';
1877 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
1878 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
1879 "-%016llx-%016llx.journal",
1880 (unsigned long long) le64toh((*f)->header->seqnum),
1881 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
1882
1883 r = rename(old_file->path, p);
1884 free(p);
1885
1886 if (r < 0)
1887 return -errno;
1888
ccdbaf91 1889 old_file->header->state = STATE_ARCHIVED;
0ac38b70
LP
1890
1891 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, old_file, &new_file);
1892 journal_file_close(old_file);
1893
1894 *f = new_file;
1895 return r;
1896}
1897
9447a7f1
LP
1898int journal_file_open_reliably(
1899 const char *fname,
1900 int flags,
1901 mode_t mode,
1902 JournalFile *template,
1903 JournalFile **ret) {
1904
1905 int r;
1906 size_t l;
1907 char *p;
1908
1909 r = journal_file_open(fname, flags, mode, template, ret);
0071d9f1
LP
1910 if (r != -EBADMSG && /* corrupted */
1911 r != -ENODATA && /* truncated */
1912 r != -EHOSTDOWN && /* other machine */
1913 r != -EPROTONOSUPPORT) /* incompatible feature */
9447a7f1
LP
1914 return r;
1915
1916 if ((flags & O_ACCMODE) == O_RDONLY)
1917 return r;
1918
1919 if (!(flags & O_CREAT))
1920 return r;
1921
5c70eab4
LP
1922 /* The file is corrupted. Rotate it away and try it again (but only once) */
1923
9447a7f1
LP
1924 l = strlen(fname);
1925 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
1926 (int) (l-8), fname,
1927 (unsigned long long) now(CLOCK_REALTIME),
1928 random_ull()) < 0)
1929 return -ENOMEM;
1930
1931 r = rename(fname, p);
1932 free(p);
1933 if (r < 0)
1934 return -errno;
1935
1936 log_warning("File %s corrupted, renaming and replacing.", fname);
1937
1938 return journal_file_open(fname, flags, mode, template, ret);
1939}
1940
0ac38b70
LP
1941struct vacuum_info {
1942 off_t usage;
1943 char *filename;
1944
1945 uint64_t realtime;
1946 sd_id128_t seqnum_id;
1947 uint64_t seqnum;
5c70eab4
LP
1948
1949 bool have_seqnum;
0ac38b70
LP
1950};
1951
1952static int vacuum_compare(const void *_a, const void *_b) {
1953 const struct vacuum_info *a, *b;
1954
1955 a = _a;
1956 b = _b;
1957
5c70eab4
LP
1958 if (a->have_seqnum && b->have_seqnum &&
1959 sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
0ac38b70
LP
1960 if (a->seqnum < b->seqnum)
1961 return -1;
1962 else if (a->seqnum > b->seqnum)
1963 return 1;
1964 else
1965 return 0;
1966 }
1967
1968 if (a->realtime < b->realtime)
1969 return -1;
1970 else if (a->realtime > b->realtime)
1971 return 1;
5c70eab4 1972 else if (a->have_seqnum && b->have_seqnum)
0ac38b70 1973 return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
5c70eab4
LP
1974 else
1975 return strcmp(a->filename, b->filename);
0ac38b70
LP
1976}
1977
1978int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
1979 DIR *d;
1980 int r = 0;
1981 struct vacuum_info *list = NULL;
1982 unsigned n_list = 0, n_allocated = 0, i;
1983 uint64_t sum = 0;
1984
1985 assert(directory);
1986
1987 if (max_use <= 0)
babfc091 1988 return 0;
0ac38b70
LP
1989
1990 d = opendir(directory);
1991 if (!d)
1992 return -errno;
1993
1994 for (;;) {
1995 int k;
1996 struct dirent buf, *de;
1997 size_t q;
1998 struct stat st;
1999 char *p;
7ea07dcd 2000 unsigned long long seqnum = 0, realtime;
0ac38b70 2001 sd_id128_t seqnum_id;
5c70eab4 2002 bool have_seqnum;
0ac38b70
LP
2003
2004 k = readdir_r(d, &buf, &de);
2005 if (k != 0) {
2006 r = -k;
2007 goto finish;
2008 }
2009
2010 if (!de)
2011 break;
2012
5c70eab4
LP
2013 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
2014 continue;
2015
2016 if (!S_ISREG(st.st_mode))
0ac38b70
LP
2017 continue;
2018
2019 q = strlen(de->d_name);
2020
5c70eab4 2021 if (endswith(de->d_name, ".journal")) {
0ac38b70 2022
5c70eab4 2023 /* Vacuum archived files */
0ac38b70 2024
5c70eab4
LP
2025 if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
2026 continue;
0ac38b70 2027
5c70eab4
LP
2028 if (de->d_name[q-8-16-1] != '-' ||
2029 de->d_name[q-8-16-1-16-1] != '-' ||
2030 de->d_name[q-8-16-1-16-1-32-1] != '@')
2031 continue;
0ac38b70 2032
5c70eab4
LP
2033 p = strdup(de->d_name);
2034 if (!p) {
2035 r = -ENOMEM;
2036 goto finish;
2037 }
0ac38b70 2038
5c70eab4
LP
2039 de->d_name[q-8-16-1-16-1] = 0;
2040 if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
2041 free(p);
2042 continue;
2043 }
2044
2045 if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
2046 free(p);
2047 continue;
2048 }
2049
2050 have_seqnum = true;
2051
2052 } else if (endswith(de->d_name, ".journal~")) {
2053 unsigned long long tmp;
2054
2055 /* Vacuum corrupted files */
2056
2057 if (q < 1 + 16 + 1 + 16 + 8 + 1)
2058 continue;
0ac38b70 2059
5c70eab4
LP
2060 if (de->d_name[q-1-8-16-1] != '-' ||
2061 de->d_name[q-1-8-16-1-16-1] != '@')
2062 continue;
2063
2064 p = strdup(de->d_name);
2065 if (!p) {
2066 r = -ENOMEM;
2067 goto finish;
2068 }
2069
2070 if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
2071 free(p);
2072 continue;
2073 }
2074
2075 have_seqnum = false;
2076 } else
0ac38b70 2077 continue;
0ac38b70
LP
2078
2079 if (n_list >= n_allocated) {
2080 struct vacuum_info *j;
2081
2082 n_allocated = MAX(n_allocated * 2U, 8U);
2083 j = realloc(list, n_allocated * sizeof(struct vacuum_info));
2084 if (!j) {
2085 free(p);
2086 r = -ENOMEM;
2087 goto finish;
2088 }
2089
2090 list = j;
2091 }
2092
2093 list[n_list].filename = p;
a3a52c0f 2094 list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
0ac38b70
LP
2095 list[n_list].seqnum = seqnum;
2096 list[n_list].realtime = realtime;
2097 list[n_list].seqnum_id = seqnum_id;
5c70eab4 2098 list[n_list].have_seqnum = have_seqnum;
0ac38b70
LP
2099
2100 sum += list[n_list].usage;
2101
2102 n_list ++;
2103 }
2104
2105 qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
2106
2107 for(i = 0; i < n_list; i++) {
2108 struct statvfs ss;
2109
2110 if (fstatvfs(dirfd(d), &ss) < 0) {
2111 r = -errno;
2112 goto finish;
2113 }
2114
2115 if (sum <= max_use &&
2116 (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
2117 break;
2118
2119 if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
e7bf07b3 2120 log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
0ac38b70
LP
2121 sum -= list[i].usage;
2122 } else if (errno != ENOENT)
2123 log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
2124 }
2125
2126finish:
2127 for (i = 0; i < n_list; i++)
2128 free(list[i].filename);
2129
2130 free(list);
2131
de190aef
LP
2132 if (d)
2133 closedir(d);
2134
0ac38b70
LP
2135 return r;
2136}
cf244689
LP
2137
2138int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2139 uint64_t i, n;
2140 uint64_t q, xor_hash = 0;
2141 int r;
2142 EntryItem *items;
2143 dual_timestamp ts;
2144
2145 assert(from);
2146 assert(to);
2147 assert(o);
2148 assert(p);
2149
2150 if (!to->writable)
2151 return -EPERM;
2152
2153 ts.monotonic = le64toh(o->entry.monotonic);
2154 ts.realtime = le64toh(o->entry.realtime);
2155
2156 if (to->tail_entry_monotonic_valid &&
2157 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2158 return -EINVAL;
2159
2160 if (ts.realtime < le64toh(to->header->tail_entry_realtime))
2161 return -EINVAL;
2162
2163 n = journal_file_entry_n_items(o);
2164 items = alloca(sizeof(EntryItem) * n);
2165
2166 for (i = 0; i < n; i++) {
4fd052ae
FC
2167 uint64_t l, h;
2168 le64_t le_hash;
cf244689
LP
2169 size_t t;
2170 void *data;
2171 Object *u;
2172
2173 q = le64toh(o->entry.items[i].object_offset);
2174 le_hash = o->entry.items[i].hash;
2175
2176 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2177 if (r < 0)
2178 return r;
2179
2180 if (le_hash != o->data.hash)
2181 return -EBADMSG;
2182
2183 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2184 t = (size_t) l;
2185
2186 /* We hit the limit on 32bit machines */
2187 if ((uint64_t) t != l)
2188 return -E2BIG;
2189
2190 if (o->object.flags & OBJECT_COMPRESSED) {
2191#ifdef HAVE_XZ
2192 uint64_t rsize;
2193
2194 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2195 return -EBADMSG;
2196
2197 data = from->compress_buffer;
2198 l = rsize;
2199#else
2200 return -EPROTONOSUPPORT;
2201#endif
2202 } else
2203 data = o->data.payload;
2204
2205 r = journal_file_append_data(to, data, l, &u, &h);
2206 if (r < 0)
2207 return r;
2208
2209 xor_hash ^= le64toh(u->data.hash);
2210 items[i].object_offset = htole64(h);
2211 items[i].hash = u->data.hash;
2212
2213 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2214 if (r < 0)
2215 return r;
2216 }
2217
2218 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2219}
babfc091
LP
2220
2221void journal_default_metrics(JournalMetrics *m, int fd) {
2222 uint64_t fs_size = 0;
2223 struct statvfs ss;
a7bc2c2a 2224 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2225
2226 assert(m);
2227 assert(fd >= 0);
2228
2229 if (fstatvfs(fd, &ss) >= 0)
2230 fs_size = ss.f_frsize * ss.f_blocks;
2231
2232 if (m->max_use == (uint64_t) -1) {
2233
2234 if (fs_size > 0) {
2235 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2236
2237 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2238 m->max_use = DEFAULT_MAX_USE_UPPER;
2239
2240 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2241 m->max_use = DEFAULT_MAX_USE_LOWER;
2242 } else
2243 m->max_use = DEFAULT_MAX_USE_LOWER;
2244 } else {
2245 m->max_use = PAGE_ALIGN(m->max_use);
2246
2247 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2248 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2249 }
2250
2251 if (m->max_size == (uint64_t) -1) {
2252 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2253
2254 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2255 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2256 } else
2257 m->max_size = PAGE_ALIGN(m->max_size);
2258
2259 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2260 m->max_size = JOURNAL_FILE_SIZE_MIN;
2261
2262 if (m->max_size*2 > m->max_use)
2263 m->max_use = m->max_size*2;
2264
2265 if (m->min_size == (uint64_t) -1)
2266 m->min_size = JOURNAL_FILE_SIZE_MIN;
2267 else {
2268 m->min_size = PAGE_ALIGN(m->min_size);
2269
2270 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2271 m->min_size = JOURNAL_FILE_SIZE_MIN;
2272
2273 if (m->min_size > m->max_size)
2274 m->max_size = m->min_size;
2275 }
2276
2277 if (m->keep_free == (uint64_t) -1) {
2278
2279 if (fs_size > 0) {
2280 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2281
2282 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2283 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2284
2285 } else
2286 m->keep_free = DEFAULT_KEEP_FREE;
2287 }
2288
e7bf07b3
LP
2289 log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2290 format_bytes(a, sizeof(a), m->max_use),
2291 format_bytes(b, sizeof(b), m->max_size),
2292 format_bytes(c, sizeof(c), m->min_size),
2293 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2294}
08984293
LP
2295
2296int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2297 Object *o;
2298 int r;
2299
2300 assert(f);
2301 assert(from || to);
2302
2303 if (from) {
2304 r = journal_file_next_entry(f, NULL, 0, DIRECTION_DOWN, &o, NULL);
2305 if (r <= 0)
2306 return r;
2307
2308 *from = le64toh(o->entry.realtime);
2309 }
2310
2311 if (to) {
2312 r = journal_file_next_entry(f, NULL, 0, DIRECTION_UP, &o, NULL);
2313 if (r <= 0)
2314 return r;
2315
2316 *to = le64toh(o->entry.realtime);
2317 }
2318
2319 return 1;
2320}
2321
2322int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2323 char t[9+32+1] = "_BOOT_ID=";
2324 Object *o;
2325 uint64_t p;
2326 int r;
2327
2328 assert(f);
2329 assert(from || to);
2330
2331 sd_id128_to_string(boot_id, t + 9);
2332
2333 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2334 if (r <= 0)
2335 return r;
2336
2337 if (le64toh(o->data.n_entries) <= 0)
2338 return 0;
2339
2340 if (from) {
2341 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2342 if (r < 0)
2343 return r;
2344
2345 *from = le64toh(o->entry.monotonic);
2346 }
2347
2348 if (to) {
2349 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2350 if (r < 0)
2351 return r;
2352
2353 r = generic_array_get_plus_one(f,
2354 le64toh(o->data.entry_offset),
2355 le64toh(o->data.entry_array_offset),
2356 le64toh(o->data.n_entries)-1,
2357 &o, NULL);
2358 if (r <= 0)
2359 return r;
2360
2361 *to = le64toh(o->entry.monotonic);
2362 }
2363
2364 return 1;
2365}