]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
journal: add basic object definition for signatures
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
30#include "journal-def.h"
31#include "journal-file.h"
32#include "lookup3.h"
807e17f0 33#include "compress.h"
cec736d2 34
de190aef
LP
35#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*16ULL)
36#define DEFAULT_FIELD_HASH_TABLE_SIZE (2047ULL*16ULL)
cec736d2 37
1fa80181 38#define DEFAULT_WINDOW_SIZE (8ULL*1024ULL*1024ULL)
cec736d2 39
be19b7df 40#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 41
babfc091 42/* This is the minimum journal file size */
b47ffcfd 43#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
babfc091
LP
44
45/* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
49
50/* This is the upper bound if we deduce max_size from max_use */
71100051 51#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
52
53/* This is the upper bound if we deduce the keep_free value from the
54 * file system size */
55#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57/* This is the keep_free value when we can't determine the system
58 * size */
59#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
60
cec736d2
LP
61static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' };
62
63#define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
64
65void journal_file_close(JournalFile *f) {
de190aef 66 int t;
cec736d2 67
de190aef 68 assert(f);
cec736d2 69
d384c7a8
MS
70 if (f->header) {
71 if (f->writable)
72 f->header->state = STATE_OFFLINE;
cec736d2 73
d384c7a8
MS
74 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
75 }
cec736d2 76
de190aef
LP
77 for (t = 0; t < _WINDOW_MAX; t++)
78 if (f->windows[t].ptr)
79 munmap(f->windows[t].ptr, f->windows[t].size);
cec736d2 80
0ac38b70
LP
81 if (f->fd >= 0)
82 close_nointr_nofail(f->fd);
83
cec736d2 84 free(f->path);
807e17f0
LP
85
86#ifdef HAVE_XZ
87 free(f->compress_buffer);
88#endif
89
cec736d2
LP
90 free(f);
91}
92
0ac38b70 93static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
94 Header h;
95 ssize_t k;
96 int r;
97
98 assert(f);
99
100 zero(h);
101 memcpy(h.signature, signature, 8);
23b0b2b2 102 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2
LP
103
104 r = sd_id128_randomize(&h.file_id);
105 if (r < 0)
106 return r;
107
0ac38b70
LP
108 if (template) {
109 h.seqnum_id = template->header->seqnum_id;
110 h.seqnum = template->header->seqnum;
111 } else
112 h.seqnum_id = h.file_id;
cec736d2
LP
113
114 k = pwrite(f->fd, &h, sizeof(h), 0);
115 if (k < 0)
116 return -errno;
117
118 if (k != sizeof(h))
119 return -EIO;
120
121 return 0;
122}
123
124static int journal_file_refresh_header(JournalFile *f) {
125 int r;
de190aef 126 sd_id128_t boot_id;
cec736d2
LP
127
128 assert(f);
129
130 r = sd_id128_get_machine(&f->header->machine_id);
131 if (r < 0)
132 return r;
133
de190aef 134 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
135 if (r < 0)
136 return r;
137
de190aef
LP
138 if (sd_id128_equal(boot_id, f->header->boot_id))
139 f->tail_entry_monotonic_valid = true;
140
141 f->header->boot_id = boot_id;
142
143 f->header->state = STATE_ONLINE;
b788cc23
LP
144
145 __sync_synchronize();
146
cec736d2
LP
147 return 0;
148}
149
150static int journal_file_verify_header(JournalFile *f) {
151 assert(f);
152
153 if (memcmp(f->header, signature, 8))
154 return -EBADMSG;
155
807e17f0
LP
156#ifdef HAVE_XZ
157 if ((le64toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
158 return -EPROTONOSUPPORT;
159#else
cec736d2
LP
160 if (f->header->incompatible_flags != 0)
161 return -EPROTONOSUPPORT;
807e17f0 162#endif
cec736d2 163
23b0b2b2
LP
164 if (f->header->header_size != htole64(ALIGN64(sizeof(*(f->header)))))
165 return -EBADMSG;
166
167 if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
cec736d2
LP
168 return -ENODATA;
169
170 if (f->writable) {
ccdbaf91 171 uint8_t state;
cec736d2
LP
172 sd_id128_t machine_id;
173 int r;
174
175 r = sd_id128_get_machine(&machine_id);
176 if (r < 0)
177 return r;
178
179 if (!sd_id128_equal(machine_id, f->header->machine_id))
180 return -EHOSTDOWN;
181
de190aef 182 state = f->header->state;
cec736d2
LP
183
184 if (state == STATE_ONLINE)
185 log_debug("Journal file %s is already online. Assuming unclean closing. Ignoring.", f->path);
186 else if (state == STATE_ARCHIVED)
187 return -ESHUTDOWN;
188 else if (state != STATE_OFFLINE)
189 log_debug("Journal file %s has unknown state %u. Ignoring.", f->path, state);
190 }
191
192 return 0;
193}
194
195static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
cec736d2 196 uint64_t old_size, new_size;
fec2aa2f 197 int r;
cec736d2
LP
198
199 assert(f);
200
cec736d2 201 /* We assume that this file is not sparse, and we know that
38ac38b2 202 * for sure, since we always call posix_fallocate()
cec736d2
LP
203 * ourselves */
204
205 old_size =
23b0b2b2 206 le64toh(f->header->header_size) +
cec736d2
LP
207 le64toh(f->header->arena_size);
208
bc85bfee 209 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
210 if (new_size < le64toh(f->header->header_size))
211 new_size = le64toh(f->header->header_size);
bc85bfee
LP
212
213 if (new_size <= old_size)
cec736d2
LP
214 return 0;
215
bc85bfee
LP
216 if (f->metrics.max_size > 0 &&
217 new_size > f->metrics.max_size)
218 return -E2BIG;
cec736d2 219
bc85bfee
LP
220 if (new_size > f->metrics.min_size &&
221 f->metrics.keep_free > 0) {
cec736d2
LP
222 struct statvfs svfs;
223
224 if (fstatvfs(f->fd, &svfs) >= 0) {
225 uint64_t available;
226
227 available = svfs.f_bfree * svfs.f_bsize;
228
bc85bfee
LP
229 if (available >= f->metrics.keep_free)
230 available -= f->metrics.keep_free;
cec736d2
LP
231 else
232 available = 0;
233
234 if (new_size - old_size > available)
235 return -E2BIG;
236 }
237 }
238
bc85bfee
LP
239 /* Note that the glibc fallocate() fallback is very
240 inefficient, hence we try to minimize the allocation area
241 as we can. */
fec2aa2f
GV
242 r = posix_fallocate(f->fd, old_size, new_size - old_size);
243 if (r != 0)
244 return -r;
cec736d2
LP
245
246 if (fstat(f->fd, &f->last_stat) < 0)
247 return -errno;
248
23b0b2b2 249 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
250
251 return 0;
252}
253
254static int journal_file_map(
255 JournalFile *f,
256 uint64_t offset,
257 uint64_t size,
258 void **_window,
259 uint64_t *_woffset,
260 uint64_t *_wsize,
261 void **ret) {
262
263 uint64_t woffset, wsize;
264 void *window;
265
266 assert(f);
267 assert(size > 0);
268 assert(ret);
269
270 woffset = offset & ~((uint64_t) page_size() - 1ULL);
271 wsize = size + (offset - woffset);
272 wsize = PAGE_ALIGN(wsize);
273
2a59ea54
LP
274 /* Avoid SIGBUS on invalid accesses */
275 if (woffset + wsize > (uint64_t) PAGE_ALIGN(f->last_stat.st_size))
276 return -EADDRNOTAVAIL;
277
cec736d2
LP
278 window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset);
279 if (window == MAP_FAILED)
280 return -errno;
281
282 if (_window)
283 *_window = window;
284
285 if (_woffset)
286 *_woffset = woffset;
287
288 if (_wsize)
289 *_wsize = wsize;
290
291 *ret = (uint8_t*) window + (offset - woffset);
292
293 return 0;
294}
295
de190aef 296static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_t size, void **ret) {
6c8a39b8 297 void *p = NULL;
cec736d2
LP
298 uint64_t delta;
299 int r;
de190aef 300 Window *w;
cec736d2
LP
301
302 assert(f);
303 assert(ret);
de190aef
LP
304 assert(wt >= 0);
305 assert(wt < _WINDOW_MAX);
cec736d2 306
4bbdcdb3
LP
307 if (offset + size > (uint64_t) f->last_stat.st_size) {
308 /* Hmm, out of range? Let's refresh the fstat() data
309 * first, before we trust that check. */
310
311 if (fstat(f->fd, &f->last_stat) < 0 ||
312 offset + size > (uint64_t) f->last_stat.st_size)
313 return -EADDRNOTAVAIL;
314 }
315
de190aef 316 w = f->windows + wt;
cec736d2 317
de190aef
LP
318 if (_likely_(w->ptr &&
319 w->offset <= offset &&
320 w->offset + w->size >= offset + size)) {
321
322 *ret = (uint8_t*) w->ptr + (offset - w->offset);
cec736d2
LP
323 return 0;
324 }
325
de190aef
LP
326 if (w->ptr) {
327 if (munmap(w->ptr, w->size) < 0)
cec736d2
LP
328 return -errno;
329
de190aef
LP
330 w->ptr = NULL;
331 w->size = w->offset = 0;
cec736d2
LP
332 }
333
334 if (size < DEFAULT_WINDOW_SIZE) {
335 /* If the default window size is larger then what was
336 * asked for extend the mapping a bit in the hope to
337 * minimize needed remappings later on. We add half
338 * the window space before and half behind the
339 * requested mapping */
340
1921a5cb 341 delta = (DEFAULT_WINDOW_SIZE - size) / 2;
cec736d2 342
a99c349d 343 if (delta > offset)
cec736d2
LP
344 delta = offset;
345
346 offset -= delta;
a99c349d 347 size = DEFAULT_WINDOW_SIZE;
cec736d2
LP
348 } else
349 delta = 0;
350
2a59ea54 351 if (offset + size > (uint64_t) f->last_stat.st_size)
1921a5cb 352 size = (uint64_t) f->last_stat.st_size - offset;
2a59ea54
LP
353
354 if (size <= 0)
355 return -EADDRNOTAVAIL;
356
cec736d2
LP
357 r = journal_file_map(f,
358 offset, size,
de190aef
LP
359 &w->ptr, &w->offset, &w->size,
360 &p);
cec736d2
LP
361
362 if (r < 0)
363 return r;
364
365 *ret = (uint8_t*) p + delta;
366 return 0;
367}
368
369static bool verify_hash(Object *o) {
de190aef 370 uint64_t h1, h2;
cec736d2
LP
371
372 assert(o);
373
807e17f0 374 if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
cec736d2 375 h1 = le64toh(o->data.hash);
de190aef
LP
376 h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
377 } else if (o->object.type == OBJECT_FIELD) {
378 h1 = le64toh(o->field.hash);
379 h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
380 } else
381 return true;
cec736d2 382
de190aef 383 return h1 == h2;
cec736d2
LP
384}
385
de190aef 386int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
387 int r;
388 void *t;
389 Object *o;
390 uint64_t s;
391
392 assert(f);
393 assert(ret);
de190aef 394 assert(type < _OBJECT_TYPE_MAX);
cec736d2 395
de190aef 396 r = journal_file_move_to(f, type >= 0 ? type : WINDOW_UNKNOWN, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
397 if (r < 0)
398 return r;
399
400 o = (Object*) t;
401 s = le64toh(o->object.size);
402
403 if (s < sizeof(ObjectHeader))
404 return -EBADMSG;
405
de190aef 406 if (type >= 0 && o->object.type != type)
cec736d2
LP
407 return -EBADMSG;
408
409 if (s > sizeof(ObjectHeader)) {
de190aef 410 r = journal_file_move_to(f, o->object.type, offset, s, &t);
cec736d2
LP
411 if (r < 0)
412 return r;
413
414 o = (Object*) t;
415 }
416
417 if (!verify_hash(o))
418 return -EBADMSG;
419
420 *ret = o;
421 return 0;
422}
423
c2373f84 424static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
425 uint64_t r;
426
427 assert(f);
428
429 r = le64toh(f->header->seqnum) + 1;
c2373f84
LP
430
431 if (seqnum) {
de190aef 432 /* If an external seqnum counter was passed, we update
c2373f84
LP
433 * both the local and the external one, and set it to
434 * the maximum of both */
435
436 if (*seqnum + 1 > r)
437 r = *seqnum + 1;
438
439 *seqnum = r;
440 }
441
cec736d2
LP
442 f->header->seqnum = htole64(r);
443
de190aef
LP
444 if (f->header->first_seqnum == 0)
445 f->header->first_seqnum = htole64(r);
446
cec736d2
LP
447 return r;
448}
449
de190aef 450static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
451 int r;
452 uint64_t p;
453 Object *tail, *o;
454 void *t;
455
456 assert(f);
457 assert(size >= sizeof(ObjectHeader));
458 assert(offset);
459 assert(ret);
460
461 p = le64toh(f->header->tail_object_offset);
cec736d2 462 if (p == 0)
23b0b2b2 463 p = le64toh(f->header->header_size);
cec736d2 464 else {
de190aef 465 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
466 if (r < 0)
467 return r;
468
469 p += ALIGN64(le64toh(tail->object.size));
470 }
471
472 r = journal_file_allocate(f, p, size);
473 if (r < 0)
474 return r;
475
de190aef 476 r = journal_file_move_to(f, type, p, size, &t);
cec736d2
LP
477 if (r < 0)
478 return r;
479
480 o = (Object*) t;
481
482 zero(o->object);
de190aef 483 o->object.type = type;
cec736d2
LP
484 o->object.size = htole64(size);
485
486 f->header->tail_object_offset = htole64(p);
cec736d2
LP
487 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
488
489 *ret = o;
490 *offset = p;
491
492 return 0;
493}
494
de190aef 495static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
496 uint64_t s, p;
497 Object *o;
498 int r;
499
500 assert(f);
501
de190aef
LP
502 s = DEFAULT_DATA_HASH_TABLE_SIZE;
503 r = journal_file_append_object(f,
504 OBJECT_DATA_HASH_TABLE,
505 offsetof(Object, hash_table.items) + s,
506 &o, &p);
cec736d2
LP
507 if (r < 0)
508 return r;
509
de190aef 510 memset(o->hash_table.items, 0, s);
cec736d2 511
de190aef
LP
512 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
513 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
514
515 return 0;
516}
517
de190aef 518static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
519 uint64_t s, p;
520 Object *o;
521 int r;
522
523 assert(f);
524
de190aef
LP
525 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
526 r = journal_file_append_object(f,
527 OBJECT_FIELD_HASH_TABLE,
528 offsetof(Object, hash_table.items) + s,
529 &o, &p);
cec736d2
LP
530 if (r < 0)
531 return r;
532
de190aef 533 memset(o->hash_table.items, 0, s);
cec736d2 534
de190aef
LP
535 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
536 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
537
538 return 0;
539}
540
de190aef 541static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
542 uint64_t s, p;
543 void *t;
544 int r;
545
546 assert(f);
547
de190aef
LP
548 p = le64toh(f->header->data_hash_table_offset);
549 s = le64toh(f->header->data_hash_table_size);
cec736d2 550
de190aef
LP
551 r = journal_file_move_to(f,
552 WINDOW_DATA_HASH_TABLE,
553 p, s,
554 &t);
cec736d2
LP
555 if (r < 0)
556 return r;
557
de190aef 558 f->data_hash_table = t;
cec736d2
LP
559 return 0;
560}
561
de190aef 562static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
563 uint64_t s, p;
564 void *t;
565 int r;
566
567 assert(f);
568
de190aef
LP
569 p = le64toh(f->header->field_hash_table_offset);
570 s = le64toh(f->header->field_hash_table_size);
cec736d2 571
de190aef
LP
572 r = journal_file_move_to(f,
573 WINDOW_FIELD_HASH_TABLE,
574 p, s,
575 &t);
cec736d2
LP
576 if (r < 0)
577 return r;
578
de190aef 579 f->field_hash_table = t;
cec736d2
LP
580 return 0;
581}
582
de190aef
LP
583static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
584 uint64_t p, h;
cec736d2
LP
585 int r;
586
587 assert(f);
588 assert(o);
589 assert(offset > 0);
de190aef 590 assert(o->object.type == OBJECT_DATA);
cec736d2 591
48496df6
LP
592 /* This might alter the window we are looking at */
593
de190aef
LP
594 o->data.next_hash_offset = o->data.next_field_offset = 0;
595 o->data.entry_offset = o->data.entry_array_offset = 0;
596 o->data.n_entries = 0;
cec736d2 597
de190aef
LP
598 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
599 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2
LP
600 if (p == 0) {
601 /* Only entry in the hash table is easy */
de190aef 602 f->data_hash_table[h].head_hash_offset = htole64(offset);
cec736d2 603 } else {
48496df6
LP
604 /* Move back to the previous data object, to patch in
605 * pointer */
cec736d2 606
de190aef 607 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
608 if (r < 0)
609 return r;
610
de190aef 611 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
612 }
613
de190aef 614 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2
LP
615
616 return 0;
617}
618
de190aef
LP
619int journal_file_find_data_object_with_hash(
620 JournalFile *f,
621 const void *data, uint64_t size, uint64_t hash,
622 Object **ret, uint64_t *offset) {
48496df6 623
de190aef 624 uint64_t p, osize, h;
cec736d2
LP
625 int r;
626
627 assert(f);
628 assert(data || size == 0);
629
630 osize = offsetof(Object, data.payload) + size;
631
bc85bfee
LP
632 if (f->header->data_hash_table_size == 0)
633 return -EBADMSG;
634
de190aef
LP
635 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
636 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 637
de190aef
LP
638 while (p > 0) {
639 Object *o;
cec736d2 640
de190aef 641 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
642 if (r < 0)
643 return r;
644
807e17f0 645 if (le64toh(o->data.hash) != hash)
85a131e8 646 goto next;
807e17f0
LP
647
648 if (o->object.flags & OBJECT_COMPRESSED) {
649#ifdef HAVE_XZ
b785c858 650 uint64_t l, rsize;
cec736d2 651
807e17f0
LP
652 l = le64toh(o->object.size);
653 if (l <= offsetof(Object, data.payload))
cec736d2
LP
654 return -EBADMSG;
655
807e17f0
LP
656 l -= offsetof(Object, data.payload);
657
658 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
659 return -EBADMSG;
660
b785c858 661 if (rsize == size &&
807e17f0
LP
662 memcmp(f->compress_buffer, data, size) == 0) {
663
664 if (ret)
665 *ret = o;
666
667 if (offset)
668 *offset = p;
669
670 return 1;
671 }
672#else
673 return -EPROTONOSUPPORT;
674#endif
675
676 } else if (le64toh(o->object.size) == osize &&
677 memcmp(o->data.payload, data, size) == 0) {
678
cec736d2
LP
679 if (ret)
680 *ret = o;
681
682 if (offset)
683 *offset = p;
684
de190aef 685 return 1;
cec736d2
LP
686 }
687
85a131e8 688 next:
cec736d2
LP
689 p = le64toh(o->data.next_hash_offset);
690 }
691
de190aef
LP
692 return 0;
693}
694
695int journal_file_find_data_object(
696 JournalFile *f,
697 const void *data, uint64_t size,
698 Object **ret, uint64_t *offset) {
699
700 uint64_t hash;
701
702 assert(f);
703 assert(data || size == 0);
704
705 hash = hash64(data, size);
706
707 return journal_file_find_data_object_with_hash(f,
708 data, size, hash,
709 ret, offset);
710}
711
48496df6
LP
712static int journal_file_append_data(
713 JournalFile *f,
714 const void *data, uint64_t size,
715 Object **ret, uint64_t *offset) {
716
de190aef
LP
717 uint64_t hash, p;
718 uint64_t osize;
719 Object *o;
720 int r;
807e17f0 721 bool compressed = false;
de190aef
LP
722
723 assert(f);
724 assert(data || size == 0);
725
726 hash = hash64(data, size);
727
728 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
729 if (r < 0)
730 return r;
731 else if (r > 0) {
732
733 if (ret)
734 *ret = o;
735
736 if (offset)
737 *offset = p;
738
739 return 0;
740 }
741
742 osize = offsetof(Object, data.payload) + size;
743 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
744 if (r < 0)
745 return r;
746
cec736d2 747 o->data.hash = htole64(hash);
807e17f0
LP
748
749#ifdef HAVE_XZ
750 if (f->compress &&
751 size >= COMPRESSION_SIZE_THRESHOLD) {
752 uint64_t rsize;
753
754 compressed = compress_blob(data, size, o->data.payload, &rsize);
755
756 if (compressed) {
757 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
758 o->object.flags |= OBJECT_COMPRESSED;
759
760 f->header->incompatible_flags = htole32(le32toh(f->header->incompatible_flags) | HEADER_INCOMPATIBLE_COMPRESSED);
761
762 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
763 }
764 }
765#endif
766
767 if (!compressed)
768 memcpy(o->data.payload, data, size);
cec736d2 769
de190aef 770 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
771 if (r < 0)
772 return r;
773
48496df6
LP
774 /* The linking might have altered the window, so let's
775 * refresh our pointer */
776 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
777 if (r < 0)
778 return r;
779
cec736d2
LP
780 if (ret)
781 *ret = o;
782
783 if (offset)
de190aef 784 *offset = p;
cec736d2
LP
785
786 return 0;
787}
788
789uint64_t journal_file_entry_n_items(Object *o) {
790 assert(o);
7be3aa17 791 assert(o->object.type == OBJECT_ENTRY);
cec736d2
LP
792
793 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
794}
795
de190aef
LP
796static uint64_t journal_file_entry_array_n_items(Object *o) {
797 assert(o);
7be3aa17 798 assert(o->object.type == OBJECT_ENTRY_ARRAY);
de190aef
LP
799
800 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
801}
802
803static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
804 le64_t *first,
805 le64_t *idx,
de190aef 806 uint64_t p) {
cec736d2 807 int r;
de190aef
LP
808 uint64_t n = 0, ap = 0, q, i, a, hidx;
809 Object *o;
810
cec736d2 811 assert(f);
de190aef
LP
812 assert(first);
813 assert(idx);
814 assert(p > 0);
cec736d2 815
de190aef
LP
816 a = le64toh(*first);
817 i = hidx = le64toh(*idx);
818 while (a > 0) {
819
820 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
821 if (r < 0)
822 return r;
cec736d2 823
de190aef
LP
824 n = journal_file_entry_array_n_items(o);
825 if (i < n) {
826 o->entry_array.items[i] = htole64(p);
827 *idx = htole64(hidx + 1);
828 return 0;
829 }
cec736d2 830
de190aef
LP
831 i -= n;
832 ap = a;
833 a = le64toh(o->entry_array.next_entry_array_offset);
834 }
835
836 if (hidx > n)
837 n = (hidx+1) * 2;
838 else
839 n = n * 2;
840
841 if (n < 4)
842 n = 4;
843
844 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
845 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
846 &o, &q);
cec736d2
LP
847 if (r < 0)
848 return r;
849
de190aef 850 o->entry_array.items[i] = htole64(p);
cec736d2 851
de190aef 852 if (ap == 0)
7be3aa17 853 *first = htole64(q);
cec736d2 854 else {
de190aef 855 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
856 if (r < 0)
857 return r;
858
de190aef
LP
859 o->entry_array.next_entry_array_offset = htole64(q);
860 }
cec736d2 861
de190aef
LP
862 *idx = htole64(hidx + 1);
863
864 return 0;
865}
cec736d2 866
de190aef 867static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
868 le64_t *extra,
869 le64_t *first,
870 le64_t *idx,
de190aef
LP
871 uint64_t p) {
872
873 int r;
874
875 assert(f);
876 assert(extra);
877 assert(first);
878 assert(idx);
879 assert(p > 0);
880
881 if (*idx == 0)
882 *extra = htole64(p);
883 else {
4fd052ae 884 le64_t i;
de190aef 885
7be3aa17 886 i = htole64(le64toh(*idx) - 1);
de190aef
LP
887 r = link_entry_into_array(f, first, &i, p);
888 if (r < 0)
889 return r;
cec736d2
LP
890 }
891
de190aef
LP
892 *idx = htole64(le64toh(*idx) + 1);
893 return 0;
894}
895
896static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
897 uint64_t p;
898 int r;
899 assert(f);
900 assert(o);
901 assert(offset > 0);
902
903 p = le64toh(o->entry.items[i].object_offset);
904 if (p == 0)
905 return -EINVAL;
906
907 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
908 if (r < 0)
909 return r;
910
de190aef
LP
911 return link_entry_into_array_plus_one(f,
912 &o->data.entry_offset,
913 &o->data.entry_array_offset,
914 &o->data.n_entries,
915 offset);
cec736d2
LP
916}
917
918static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 919 uint64_t n, i;
cec736d2
LP
920 int r;
921
922 assert(f);
923 assert(o);
924 assert(offset > 0);
de190aef 925 assert(o->object.type == OBJECT_ENTRY);
cec736d2 926
b788cc23
LP
927 __sync_synchronize();
928
cec736d2 929 /* Link up the entry itself */
de190aef
LP
930 r = link_entry_into_array(f,
931 &f->header->entry_array_offset,
932 &f->header->n_entries,
933 offset);
934 if (r < 0)
935 return r;
cec736d2 936
aaf53376 937 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
cec736d2 938
de190aef 939 if (f->header->head_entry_realtime == 0)
0ac38b70 940 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 941
0ac38b70 942 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
943 f->header->tail_entry_monotonic = o->entry.monotonic;
944
945 f->tail_entry_monotonic_valid = true;
cec736d2
LP
946
947 /* Link up the items */
948 n = journal_file_entry_n_items(o);
949 for (i = 0; i < n; i++) {
950 r = journal_file_link_entry_item(f, o, offset, i);
951 if (r < 0)
952 return r;
953 }
954
cec736d2
LP
955 return 0;
956}
957
958static int journal_file_append_entry_internal(
959 JournalFile *f,
960 const dual_timestamp *ts,
961 uint64_t xor_hash,
962 const EntryItem items[], unsigned n_items,
de190aef 963 uint64_t *seqnum,
cec736d2
LP
964 Object **ret, uint64_t *offset) {
965 uint64_t np;
966 uint64_t osize;
967 Object *o;
968 int r;
969
970 assert(f);
971 assert(items || n_items == 0);
de190aef 972 assert(ts);
cec736d2
LP
973
974 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
975
de190aef 976 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
977 if (r < 0)
978 return r;
979
de190aef 980 o->entry.seqnum = htole64(journal_file_seqnum(f, seqnum));
cec736d2 981 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
982 o->entry.realtime = htole64(ts->realtime);
983 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
984 o->entry.xor_hash = htole64(xor_hash);
985 o->entry.boot_id = f->header->boot_id;
986
987 r = journal_file_link_entry(f, o, np);
988 if (r < 0)
989 return r;
990
991 if (ret)
992 *ret = o;
993
994 if (offset)
995 *offset = np;
996
997 return 0;
998}
999
cf244689 1000void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
1001 assert(f);
1002
1003 /* inotify() does not receive IN_MODIFY events from file
1004 * accesses done via mmap(). After each access we hence
1005 * trigger IN_MODIFY by truncating the journal file to its
1006 * current size which triggers IN_MODIFY. */
1007
bc85bfee
LP
1008 __sync_synchronize();
1009
50f20cfd
LP
1010 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1011 log_error("Failed to to truncate file to its own size: %m");
1012}
1013
de190aef 1014int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1015 unsigned i;
1016 EntryItem *items;
1017 int r;
1018 uint64_t xor_hash = 0;
de190aef 1019 struct dual_timestamp _ts;
cec736d2
LP
1020
1021 assert(f);
1022 assert(iovec || n_iovec == 0);
1023
de190aef
LP
1024 if (!f->writable)
1025 return -EPERM;
1026
1027 if (!ts) {
1028 dual_timestamp_get(&_ts);
1029 ts = &_ts;
1030 }
1031
1032 if (f->tail_entry_monotonic_valid &&
1033 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1034 return -EINVAL;
1035
cf244689 1036 items = alloca(sizeof(EntryItem) * n_iovec);
cec736d2
LP
1037
1038 for (i = 0; i < n_iovec; i++) {
1039 uint64_t p;
1040 Object *o;
1041
1042 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1043 if (r < 0)
cf244689 1044 return r;
cec736d2
LP
1045
1046 xor_hash ^= le64toh(o->data.hash);
1047 items[i].object_offset = htole64(p);
de7b95cd 1048 items[i].hash = o->data.hash;
cec736d2
LP
1049 }
1050
de190aef 1051 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1052
50f20cfd
LP
1053 journal_file_post_change(f);
1054
cec736d2
LP
1055 return r;
1056}
1057
de190aef
LP
1058static int generic_array_get(JournalFile *f,
1059 uint64_t first,
1060 uint64_t i,
1061 Object **ret, uint64_t *offset) {
1062
cec736d2 1063 Object *o;
6c8a39b8 1064 uint64_t p = 0, a;
cec736d2
LP
1065 int r;
1066
1067 assert(f);
1068
de190aef
LP
1069 a = first;
1070 while (a > 0) {
1071 uint64_t n;
cec736d2 1072
de190aef
LP
1073 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1074 if (r < 0)
1075 return r;
cec736d2 1076
de190aef
LP
1077 n = journal_file_entry_array_n_items(o);
1078 if (i < n) {
1079 p = le64toh(o->entry_array.items[i]);
1080 break;
cec736d2
LP
1081 }
1082
de190aef
LP
1083 i -= n;
1084 a = le64toh(o->entry_array.next_entry_array_offset);
1085 }
1086
1087 if (a <= 0 || p <= 0)
1088 return 0;
1089
1090 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1091 if (r < 0)
1092 return r;
1093
1094 if (ret)
1095 *ret = o;
1096
1097 if (offset)
1098 *offset = p;
1099
1100 return 1;
1101}
1102
1103static int generic_array_get_plus_one(JournalFile *f,
1104 uint64_t extra,
1105 uint64_t first,
1106 uint64_t i,
1107 Object **ret, uint64_t *offset) {
1108
1109 Object *o;
1110
1111 assert(f);
1112
1113 if (i == 0) {
1114 int r;
1115
1116 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1117 if (r < 0)
1118 return r;
1119
de190aef
LP
1120 if (ret)
1121 *ret = o;
cec736d2 1122
de190aef
LP
1123 if (offset)
1124 *offset = extra;
cec736d2 1125
de190aef 1126 return 1;
cec736d2
LP
1127 }
1128
de190aef
LP
1129 return generic_array_get(f, first, i-1, ret, offset);
1130}
cec736d2 1131
de190aef
LP
1132enum {
1133 TEST_FOUND,
1134 TEST_LEFT,
1135 TEST_RIGHT
1136};
cec736d2 1137
de190aef
LP
1138static int generic_array_bisect(JournalFile *f,
1139 uint64_t first,
1140 uint64_t n,
1141 uint64_t needle,
1142 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1143 direction_t direction,
1144 Object **ret,
1145 uint64_t *offset,
1146 uint64_t *idx) {
1147
1148 uint64_t a, p, t = 0, i = 0, last_p = 0;
1149 bool subtract_one = false;
1150 Object *o, *array = NULL;
1151 int r;
cec736d2 1152
de190aef
LP
1153 assert(f);
1154 assert(test_object);
cec736d2 1155
de190aef
LP
1156 a = first;
1157 while (a > 0) {
1158 uint64_t left, right, k, lp;
1159
1160 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1161 if (r < 0)
1162 return r;
1163
de190aef
LP
1164 k = journal_file_entry_array_n_items(array);
1165 right = MIN(k, n);
1166 if (right <= 0)
1167 return 0;
cec736d2 1168
de190aef
LP
1169 i = right - 1;
1170 lp = p = le64toh(array->entry_array.items[i]);
1171 if (p <= 0)
1172 return -EBADMSG;
cec736d2 1173
de190aef
LP
1174 r = test_object(f, p, needle);
1175 if (r < 0)
1176 return r;
cec736d2 1177
de190aef
LP
1178 if (r == TEST_FOUND)
1179 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1180
1181 if (r == TEST_RIGHT) {
1182 left = 0;
1183 right -= 1;
1184 for (;;) {
1185 if (left == right) {
1186 if (direction == DIRECTION_UP)
1187 subtract_one = true;
1188
1189 i = left;
1190 goto found;
1191 }
1192
1193 assert(left < right);
1194
1195 i = (left + right) / 2;
1196 p = le64toh(array->entry_array.items[i]);
1197 if (p <= 0)
1198 return -EBADMSG;
1199
1200 r = test_object(f, p, needle);
1201 if (r < 0)
1202 return r;
cec736d2 1203
de190aef
LP
1204 if (r == TEST_FOUND)
1205 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1206
1207 if (r == TEST_RIGHT)
1208 right = i;
1209 else
1210 left = i + 1;
1211 }
1212 }
1213
1214 if (k > n)
cec736d2
LP
1215 return 0;
1216
de190aef
LP
1217 last_p = lp;
1218
1219 n -= k;
1220 t += k;
1221 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1222 }
1223
1224 return 0;
de190aef
LP
1225
1226found:
1227 if (subtract_one && t == 0 && i == 0)
1228 return 0;
1229
1230 if (subtract_one && i == 0)
1231 p = last_p;
1232 else if (subtract_one)
1233 p = le64toh(array->entry_array.items[i-1]);
1234 else
1235 p = le64toh(array->entry_array.items[i]);
1236
1237 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1238 if (r < 0)
1239 return r;
1240
1241 if (ret)
1242 *ret = o;
1243
1244 if (offset)
1245 *offset = p;
1246
1247 if (idx)
1248 *idx = t + i - (subtract_one ? 1 : 0);
1249
1250 return 1;
cec736d2
LP
1251}
1252
de190aef
LP
1253static int generic_array_bisect_plus_one(JournalFile *f,
1254 uint64_t extra,
1255 uint64_t first,
1256 uint64_t n,
1257 uint64_t needle,
1258 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1259 direction_t direction,
1260 Object **ret,
1261 uint64_t *offset,
1262 uint64_t *idx) {
1263
cec736d2
LP
1264 int r;
1265
1266 assert(f);
de190aef 1267 assert(test_object);
cec736d2 1268
de190aef
LP
1269 if (n <= 0)
1270 return 0;
cec736d2 1271
de190aef
LP
1272 /* This bisects the array in object 'first', but first checks
1273 * an extra */
de190aef
LP
1274 r = test_object(f, extra, needle);
1275 if (r < 0)
1276 return r;
1277 else if (r == TEST_FOUND) {
1278 Object *o;
1279
1280 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1281 if (r < 0)
1282 return r;
1283
1284 if (ret)
1285 *ret = o;
cec736d2 1286
de190aef
LP
1287 if (offset)
1288 *offset = extra;
440ee366
LP
1289
1290 if (idx)
1291 *idx = 0;
1292
1293 return 1;
de190aef 1294 } else if (r == TEST_RIGHT)
cec736d2
LP
1295 return 0;
1296
de190aef
LP
1297 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1298
1299 if (r > 0)
1300 (*idx) ++;
1301
1302 return r;
1303}
1304
1305static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1306 Object *o;
1307 int r;
1308
1309 assert(f);
1310 assert(p > 0);
1311
1312 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1313 if (r < 0)
1314 return r;
1315
de190aef
LP
1316 if (le64toh(o->entry.seqnum) == needle)
1317 return TEST_FOUND;
1318 else if (le64toh(o->entry.seqnum) < needle)
1319 return TEST_LEFT;
1320 else
1321 return TEST_RIGHT;
1322}
cec736d2 1323
de190aef
LP
1324int journal_file_move_to_entry_by_seqnum(
1325 JournalFile *f,
1326 uint64_t seqnum,
1327 direction_t direction,
1328 Object **ret,
1329 uint64_t *offset) {
1330
1331 return generic_array_bisect(f,
1332 le64toh(f->header->entry_array_offset),
1333 le64toh(f->header->n_entries),
1334 seqnum,
1335 test_object_seqnum,
1336 direction,
1337 ret, offset, NULL);
1338}
cec736d2 1339
de190aef
LP
1340static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1341 Object *o;
1342 int r;
1343
1344 assert(f);
1345 assert(p > 0);
1346
1347 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1348 if (r < 0)
1349 return r;
1350
1351 if (le64toh(o->entry.realtime) == needle)
1352 return TEST_FOUND;
1353 else if (le64toh(o->entry.realtime) < needle)
1354 return TEST_LEFT;
1355 else
1356 return TEST_RIGHT;
cec736d2
LP
1357}
1358
de190aef
LP
1359int journal_file_move_to_entry_by_realtime(
1360 JournalFile *f,
1361 uint64_t realtime,
1362 direction_t direction,
1363 Object **ret,
1364 uint64_t *offset) {
1365
1366 return generic_array_bisect(f,
1367 le64toh(f->header->entry_array_offset),
1368 le64toh(f->header->n_entries),
1369 realtime,
1370 test_object_realtime,
1371 direction,
1372 ret, offset, NULL);
1373}
1374
1375static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1376 Object *o;
1377 int r;
1378
1379 assert(f);
1380 assert(p > 0);
1381
1382 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1383 if (r < 0)
1384 return r;
1385
1386 if (le64toh(o->entry.monotonic) == needle)
1387 return TEST_FOUND;
1388 else if (le64toh(o->entry.monotonic) < needle)
1389 return TEST_LEFT;
1390 else
1391 return TEST_RIGHT;
1392}
1393
1394int journal_file_move_to_entry_by_monotonic(
1395 JournalFile *f,
1396 sd_id128_t boot_id,
1397 uint64_t monotonic,
1398 direction_t direction,
1399 Object **ret,
1400 uint64_t *offset) {
1401
1402 char t[8+32+1] = "_BOOT_ID=";
1403 Object *o;
1404 int r;
1405
1406 sd_id128_to_string(boot_id, t + 8);
1407
1408 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1409 if (r < 0)
1410 return r;
1411 else if (r == 0)
1412 return -ENOENT;
1413
1414 return generic_array_bisect_plus_one(f,
1415 le64toh(o->data.entry_offset),
1416 le64toh(o->data.entry_array_offset),
1417 le64toh(o->data.n_entries),
1418 monotonic,
1419 test_object_monotonic,
1420 direction,
1421 ret, offset, NULL);
1422}
1423
1424static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1425 assert(f);
1426 assert(p > 0);
1427
1428 if (p == needle)
1429 return TEST_FOUND;
1430 else if (p < needle)
1431 return TEST_LEFT;
1432 else
1433 return TEST_RIGHT;
1434}
1435
1436int journal_file_next_entry(
1437 JournalFile *f,
1438 Object *o, uint64_t p,
1439 direction_t direction,
1440 Object **ret, uint64_t *offset) {
1441
1442 uint64_t i, n;
cec736d2
LP
1443 int r;
1444
1445 assert(f);
de190aef
LP
1446 assert(p > 0 || !o);
1447
1448 n = le64toh(f->header->n_entries);
1449 if (n <= 0)
1450 return 0;
cec736d2
LP
1451
1452 if (!o)
de190aef 1453 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1454 else {
de190aef 1455 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1456 return -EINVAL;
1457
de190aef
LP
1458 r = generic_array_bisect(f,
1459 le64toh(f->header->entry_array_offset),
1460 le64toh(f->header->n_entries),
1461 p,
1462 test_object_offset,
1463 DIRECTION_DOWN,
1464 NULL, NULL,
1465 &i);
1466 if (r <= 0)
1467 return r;
1468
1469 if (direction == DIRECTION_DOWN) {
1470 if (i >= n - 1)
1471 return 0;
1472
1473 i++;
1474 } else {
1475 if (i <= 0)
1476 return 0;
1477
1478 i--;
1479 }
cec736d2
LP
1480 }
1481
de190aef
LP
1482 /* And jump to it */
1483 return generic_array_get(f,
1484 le64toh(f->header->entry_array_offset),
1485 i,
1486 ret, offset);
1487}
cec736d2 1488
de190aef
LP
1489int journal_file_skip_entry(
1490 JournalFile *f,
1491 Object *o, uint64_t p,
1492 int64_t skip,
1493 Object **ret, uint64_t *offset) {
1494
1495 uint64_t i, n;
1496 int r;
1497
1498 assert(f);
1499 assert(o);
1500 assert(p > 0);
1501
1502 if (o->object.type != OBJECT_ENTRY)
1503 return -EINVAL;
1504
1505 r = generic_array_bisect(f,
1506 le64toh(f->header->entry_array_offset),
1507 le64toh(f->header->n_entries),
1508 p,
1509 test_object_offset,
1510 DIRECTION_DOWN,
1511 NULL, NULL,
1512 &i);
1513 if (r <= 0)
cec736d2
LP
1514 return r;
1515
de190aef
LP
1516 /* Calculate new index */
1517 if (skip < 0) {
1518 if ((uint64_t) -skip >= i)
1519 i = 0;
1520 else
1521 i = i - (uint64_t) -skip;
1522 } else
1523 i += (uint64_t) skip;
cec736d2 1524
de190aef
LP
1525 n = le64toh(f->header->n_entries);
1526 if (n <= 0)
1527 return -EBADMSG;
cec736d2 1528
de190aef
LP
1529 if (i >= n)
1530 i = n-1;
1531
1532 return generic_array_get(f,
1533 le64toh(f->header->entry_array_offset),
1534 i,
1535 ret, offset);
cec736d2
LP
1536}
1537
de190aef
LP
1538int journal_file_next_entry_for_data(
1539 JournalFile *f,
1540 Object *o, uint64_t p,
1541 uint64_t data_offset,
1542 direction_t direction,
1543 Object **ret, uint64_t *offset) {
1544
1545 uint64_t n, i;
cec736d2 1546 int r;
de190aef 1547 Object *d;
cec736d2
LP
1548
1549 assert(f);
de190aef 1550 assert(p > 0 || !o);
cec736d2 1551
de190aef 1552 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 1553 if (r < 0)
de190aef 1554 return r;
cec736d2 1555
de190aef
LP
1556 n = le64toh(d->data.n_entries);
1557 if (n <= 0)
1558 return n;
cec736d2 1559
de190aef
LP
1560 if (!o)
1561 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1562 else {
1563 if (o->object.type != OBJECT_ENTRY)
1564 return -EINVAL;
cec736d2 1565
de190aef
LP
1566 r = generic_array_bisect_plus_one(f,
1567 le64toh(d->data.entry_offset),
1568 le64toh(d->data.entry_array_offset),
1569 le64toh(d->data.n_entries),
1570 p,
1571 test_object_offset,
1572 DIRECTION_DOWN,
1573 NULL, NULL,
1574 &i);
1575
1576 if (r <= 0)
cec736d2
LP
1577 return r;
1578
de190aef
LP
1579 if (direction == DIRECTION_DOWN) {
1580 if (i >= n - 1)
1581 return 0;
cec736d2 1582
de190aef
LP
1583 i++;
1584 } else {
1585 if (i <= 0)
1586 return 0;
cec736d2 1587
de190aef
LP
1588 i--;
1589 }
cec736d2 1590
de190aef 1591 }
cec736d2 1592
de190aef
LP
1593 return generic_array_get_plus_one(f,
1594 le64toh(d->data.entry_offset),
1595 le64toh(d->data.entry_array_offset),
1596 i,
1597 ret, offset);
1598}
cec736d2 1599
de190aef
LP
1600int journal_file_move_to_entry_by_seqnum_for_data(
1601 JournalFile *f,
1602 uint64_t data_offset,
1603 uint64_t seqnum,
1604 direction_t direction,
1605 Object **ret, uint64_t *offset) {
cec736d2 1606
de190aef
LP
1607 Object *d;
1608 int r;
cec736d2 1609
de190aef
LP
1610 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1611 if (r <= 0)
1612 return r;
cec736d2 1613
de190aef
LP
1614 return generic_array_bisect_plus_one(f,
1615 le64toh(d->data.entry_offset),
1616 le64toh(d->data.entry_array_offset),
1617 le64toh(d->data.n_entries),
1618 seqnum,
1619 test_object_seqnum,
1620 direction,
1621 ret, offset, NULL);
1622}
cec736d2 1623
de190aef
LP
1624int journal_file_move_to_entry_by_realtime_for_data(
1625 JournalFile *f,
1626 uint64_t data_offset,
1627 uint64_t realtime,
1628 direction_t direction,
1629 Object **ret, uint64_t *offset) {
1630
1631 Object *d;
1632 int r;
1633
1634 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1635 if (r <= 0)
1636 return r;
1637
1638 return generic_array_bisect_plus_one(f,
1639 le64toh(d->data.entry_offset),
1640 le64toh(d->data.entry_array_offset),
1641 le64toh(d->data.n_entries),
1642 realtime,
1643 test_object_realtime,
1644 direction,
1645 ret, offset, NULL);
cec736d2
LP
1646}
1647
1648void journal_file_dump(JournalFile *f) {
1649 char a[33], b[33], c[33];
1650 Object *o;
1651 int r;
1652 uint64_t p;
1653
1654 assert(f);
1655
de190aef
LP
1656 printf("File Path: %s\n"
1657 "File ID: %s\n"
cec736d2
LP
1658 "Machine ID: %s\n"
1659 "Boot ID: %s\n"
de190aef
LP
1660 "Arena size: %llu\n"
1661 "Objects: %lu\n"
1662 "Entries: %lu\n",
1663 f->path,
cec736d2
LP
1664 sd_id128_to_string(f->header->file_id, a),
1665 sd_id128_to_string(f->header->machine_id, b),
1666 sd_id128_to_string(f->header->boot_id, c),
de190aef
LP
1667 (unsigned long long) le64toh(f->header->arena_size),
1668 (unsigned long) le64toh(f->header->n_objects),
1669 (unsigned long) le64toh(f->header->n_entries));
cec736d2 1670
23b0b2b2 1671 p = le64toh(f->header->header_size);
cec736d2 1672 while (p != 0) {
de190aef 1673 r = journal_file_move_to_object(f, -1, p, &o);
cec736d2
LP
1674 if (r < 0)
1675 goto fail;
1676
1677 switch (o->object.type) {
1678
1679 case OBJECT_UNUSED:
1680 printf("Type: OBJECT_UNUSED\n");
1681 break;
1682
1683 case OBJECT_DATA:
1684 printf("Type: OBJECT_DATA\n");
1685 break;
1686
1687 case OBJECT_ENTRY:
3fbf9cbb
LP
1688 printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
1689 (unsigned long long) le64toh(o->entry.seqnum),
1690 (unsigned long long) le64toh(o->entry.monotonic),
1691 (unsigned long long) le64toh(o->entry.realtime));
cec736d2
LP
1692 break;
1693
de190aef
LP
1694 case OBJECT_FIELD_HASH_TABLE:
1695 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
cec736d2
LP
1696 break;
1697
de190aef
LP
1698 case OBJECT_DATA_HASH_TABLE:
1699 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1700 break;
1701
1702 case OBJECT_ENTRY_ARRAY:
1703 printf("Type: OBJECT_ENTRY_ARRAY\n");
cec736d2 1704 break;
8144056f
LP
1705
1706 case OBJECT_SIGNATURE:
1707 printf("Type: OBJECT_SIGNATURE\n");
1708 break;
cec736d2
LP
1709 }
1710
807e17f0
LP
1711 if (o->object.flags & OBJECT_COMPRESSED)
1712 printf("Flags: COMPRESSED\n");
1713
cec736d2
LP
1714 if (p == le64toh(f->header->tail_object_offset))
1715 p = 0;
1716 else
1717 p = p + ALIGN64(le64toh(o->object.size));
1718 }
1719
1720 return;
1721fail:
1722 log_error("File corrupt");
1723}
1724
1725int journal_file_open(
1726 const char *fname,
1727 int flags,
1728 mode_t mode,
0ac38b70 1729 JournalFile *template,
cec736d2
LP
1730 JournalFile **ret) {
1731
1732 JournalFile *f;
1733 int r;
1734 bool newly_created = false;
1735
1736 assert(fname);
1737
1738 if ((flags & O_ACCMODE) != O_RDONLY &&
1739 (flags & O_ACCMODE) != O_RDWR)
1740 return -EINVAL;
1741
9447a7f1
LP
1742 if (!endswith(fname, ".journal"))
1743 return -EINVAL;
1744
cec736d2
LP
1745 f = new0(JournalFile, 1);
1746 if (!f)
1747 return -ENOMEM;
1748
0ac38b70
LP
1749 f->fd = -1;
1750 f->flags = flags;
1751 f->mode = mode;
cec736d2
LP
1752 f->writable = (flags & O_ACCMODE) != O_RDONLY;
1753 f->prot = prot_from_flags(flags);
1754
15944db8
LP
1755 if (template) {
1756 f->metrics = template->metrics;
1757 f->compress = template->compress;
1758 }
1759
cec736d2
LP
1760 f->path = strdup(fname);
1761 if (!f->path) {
1762 r = -ENOMEM;
1763 goto fail;
1764 }
1765
0ac38b70
LP
1766 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
1767 if (f->fd < 0) {
1768 r = -errno;
1769 goto fail;
1770 }
1771
cec736d2
LP
1772 if (fstat(f->fd, &f->last_stat) < 0) {
1773 r = -errno;
1774 goto fail;
1775 }
1776
1777 if (f->last_stat.st_size == 0 && f->writable) {
1778 newly_created = true;
1779
0ac38b70 1780 r = journal_file_init_header(f, template);
cec736d2
LP
1781 if (r < 0)
1782 goto fail;
1783
1784 if (fstat(f->fd, &f->last_stat) < 0) {
1785 r = -errno;
1786 goto fail;
1787 }
1788 }
1789
1790 if (f->last_stat.st_size < (off_t) sizeof(Header)) {
1791 r = -EIO;
1792 goto fail;
1793 }
1794
1795 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
1796 if (f->header == MAP_FAILED) {
1797 f->header = NULL;
1798 r = -errno;
1799 goto fail;
1800 }
1801
1802 if (!newly_created) {
1803 r = journal_file_verify_header(f);
1804 if (r < 0)
1805 goto fail;
1806 }
1807
1808 if (f->writable) {
1809 r = journal_file_refresh_header(f);
1810 if (r < 0)
1811 goto fail;
1812 }
1813
1814 if (newly_created) {
1815
de190aef 1816 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
1817 if (r < 0)
1818 goto fail;
1819
de190aef 1820 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
1821 if (r < 0)
1822 goto fail;
1823 }
1824
de190aef 1825 r = journal_file_map_field_hash_table(f);
cec736d2
LP
1826 if (r < 0)
1827 goto fail;
1828
de190aef 1829 r = journal_file_map_data_hash_table(f);
cec736d2
LP
1830 if (r < 0)
1831 goto fail;
1832
1833 if (ret)
1834 *ret = f;
1835
1836 return 0;
1837
1838fail:
1839 journal_file_close(f);
1840
1841 return r;
1842}
0ac38b70
LP
1843
1844int journal_file_rotate(JournalFile **f) {
1845 char *p;
1846 size_t l;
1847 JournalFile *old_file, *new_file = NULL;
1848 int r;
1849
1850 assert(f);
1851 assert(*f);
1852
1853 old_file = *f;
1854
1855 if (!old_file->writable)
1856 return -EINVAL;
1857
1858 if (!endswith(old_file->path, ".journal"))
1859 return -EINVAL;
1860
1861 l = strlen(old_file->path);
1862
9447a7f1 1863 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
0ac38b70
LP
1864 if (!p)
1865 return -ENOMEM;
1866
1867 memcpy(p, old_file->path, l - 8);
1868 p[l-8] = '@';
1869 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
1870 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
1871 "-%016llx-%016llx.journal",
1872 (unsigned long long) le64toh((*f)->header->seqnum),
1873 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
1874
1875 r = rename(old_file->path, p);
1876 free(p);
1877
1878 if (r < 0)
1879 return -errno;
1880
ccdbaf91 1881 old_file->header->state = STATE_ARCHIVED;
0ac38b70
LP
1882
1883 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, old_file, &new_file);
1884 journal_file_close(old_file);
1885
1886 *f = new_file;
1887 return r;
1888}
1889
9447a7f1
LP
1890int journal_file_open_reliably(
1891 const char *fname,
1892 int flags,
1893 mode_t mode,
1894 JournalFile *template,
1895 JournalFile **ret) {
1896
1897 int r;
1898 size_t l;
1899 char *p;
1900
1901 r = journal_file_open(fname, flags, mode, template, ret);
0071d9f1
LP
1902 if (r != -EBADMSG && /* corrupted */
1903 r != -ENODATA && /* truncated */
1904 r != -EHOSTDOWN && /* other machine */
1905 r != -EPROTONOSUPPORT) /* incompatible feature */
9447a7f1
LP
1906 return r;
1907
1908 if ((flags & O_ACCMODE) == O_RDONLY)
1909 return r;
1910
1911 if (!(flags & O_CREAT))
1912 return r;
1913
5c70eab4
LP
1914 /* The file is corrupted. Rotate it away and try it again (but only once) */
1915
9447a7f1
LP
1916 l = strlen(fname);
1917 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
1918 (int) (l-8), fname,
1919 (unsigned long long) now(CLOCK_REALTIME),
1920 random_ull()) < 0)
1921 return -ENOMEM;
1922
1923 r = rename(fname, p);
1924 free(p);
1925 if (r < 0)
1926 return -errno;
1927
1928 log_warning("File %s corrupted, renaming and replacing.", fname);
1929
1930 return journal_file_open(fname, flags, mode, template, ret);
1931}
1932
0ac38b70
LP
1933struct vacuum_info {
1934 off_t usage;
1935 char *filename;
1936
1937 uint64_t realtime;
1938 sd_id128_t seqnum_id;
1939 uint64_t seqnum;
5c70eab4
LP
1940
1941 bool have_seqnum;
0ac38b70
LP
1942};
1943
1944static int vacuum_compare(const void *_a, const void *_b) {
1945 const struct vacuum_info *a, *b;
1946
1947 a = _a;
1948 b = _b;
1949
5c70eab4
LP
1950 if (a->have_seqnum && b->have_seqnum &&
1951 sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
0ac38b70
LP
1952 if (a->seqnum < b->seqnum)
1953 return -1;
1954 else if (a->seqnum > b->seqnum)
1955 return 1;
1956 else
1957 return 0;
1958 }
1959
1960 if (a->realtime < b->realtime)
1961 return -1;
1962 else if (a->realtime > b->realtime)
1963 return 1;
5c70eab4 1964 else if (a->have_seqnum && b->have_seqnum)
0ac38b70 1965 return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
5c70eab4
LP
1966 else
1967 return strcmp(a->filename, b->filename);
0ac38b70
LP
1968}
1969
1970int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
1971 DIR *d;
1972 int r = 0;
1973 struct vacuum_info *list = NULL;
1974 unsigned n_list = 0, n_allocated = 0, i;
1975 uint64_t sum = 0;
1976
1977 assert(directory);
1978
1979 if (max_use <= 0)
babfc091 1980 return 0;
0ac38b70
LP
1981
1982 d = opendir(directory);
1983 if (!d)
1984 return -errno;
1985
1986 for (;;) {
1987 int k;
1988 struct dirent buf, *de;
1989 size_t q;
1990 struct stat st;
1991 char *p;
7ea07dcd 1992 unsigned long long seqnum = 0, realtime;
0ac38b70 1993 sd_id128_t seqnum_id;
5c70eab4 1994 bool have_seqnum;
0ac38b70
LP
1995
1996 k = readdir_r(d, &buf, &de);
1997 if (k != 0) {
1998 r = -k;
1999 goto finish;
2000 }
2001
2002 if (!de)
2003 break;
2004
5c70eab4
LP
2005 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
2006 continue;
2007
2008 if (!S_ISREG(st.st_mode))
0ac38b70
LP
2009 continue;
2010
2011 q = strlen(de->d_name);
2012
5c70eab4 2013 if (endswith(de->d_name, ".journal")) {
0ac38b70 2014
5c70eab4 2015 /* Vacuum archived files */
0ac38b70 2016
5c70eab4
LP
2017 if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
2018 continue;
0ac38b70 2019
5c70eab4
LP
2020 if (de->d_name[q-8-16-1] != '-' ||
2021 de->d_name[q-8-16-1-16-1] != '-' ||
2022 de->d_name[q-8-16-1-16-1-32-1] != '@')
2023 continue;
0ac38b70 2024
5c70eab4
LP
2025 p = strdup(de->d_name);
2026 if (!p) {
2027 r = -ENOMEM;
2028 goto finish;
2029 }
0ac38b70 2030
5c70eab4
LP
2031 de->d_name[q-8-16-1-16-1] = 0;
2032 if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
2033 free(p);
2034 continue;
2035 }
2036
2037 if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
2038 free(p);
2039 continue;
2040 }
2041
2042 have_seqnum = true;
2043
2044 } else if (endswith(de->d_name, ".journal~")) {
2045 unsigned long long tmp;
2046
2047 /* Vacuum corrupted files */
2048
2049 if (q < 1 + 16 + 1 + 16 + 8 + 1)
2050 continue;
0ac38b70 2051
5c70eab4
LP
2052 if (de->d_name[q-1-8-16-1] != '-' ||
2053 de->d_name[q-1-8-16-1-16-1] != '@')
2054 continue;
2055
2056 p = strdup(de->d_name);
2057 if (!p) {
2058 r = -ENOMEM;
2059 goto finish;
2060 }
2061
2062 if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
2063 free(p);
2064 continue;
2065 }
2066
2067 have_seqnum = false;
2068 } else
0ac38b70 2069 continue;
0ac38b70
LP
2070
2071 if (n_list >= n_allocated) {
2072 struct vacuum_info *j;
2073
2074 n_allocated = MAX(n_allocated * 2U, 8U);
2075 j = realloc(list, n_allocated * sizeof(struct vacuum_info));
2076 if (!j) {
2077 free(p);
2078 r = -ENOMEM;
2079 goto finish;
2080 }
2081
2082 list = j;
2083 }
2084
2085 list[n_list].filename = p;
a3a52c0f 2086 list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
0ac38b70
LP
2087 list[n_list].seqnum = seqnum;
2088 list[n_list].realtime = realtime;
2089 list[n_list].seqnum_id = seqnum_id;
5c70eab4 2090 list[n_list].have_seqnum = have_seqnum;
0ac38b70
LP
2091
2092 sum += list[n_list].usage;
2093
2094 n_list ++;
2095 }
2096
2097 qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
2098
2099 for(i = 0; i < n_list; i++) {
2100 struct statvfs ss;
2101
2102 if (fstatvfs(dirfd(d), &ss) < 0) {
2103 r = -errno;
2104 goto finish;
2105 }
2106
2107 if (sum <= max_use &&
2108 (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
2109 break;
2110
2111 if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
e7bf07b3 2112 log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
0ac38b70
LP
2113 sum -= list[i].usage;
2114 } else if (errno != ENOENT)
2115 log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
2116 }
2117
2118finish:
2119 for (i = 0; i < n_list; i++)
2120 free(list[i].filename);
2121
2122 free(list);
2123
de190aef
LP
2124 if (d)
2125 closedir(d);
2126
0ac38b70
LP
2127 return r;
2128}
cf244689
LP
2129
2130int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2131 uint64_t i, n;
2132 uint64_t q, xor_hash = 0;
2133 int r;
2134 EntryItem *items;
2135 dual_timestamp ts;
2136
2137 assert(from);
2138 assert(to);
2139 assert(o);
2140 assert(p);
2141
2142 if (!to->writable)
2143 return -EPERM;
2144
2145 ts.monotonic = le64toh(o->entry.monotonic);
2146 ts.realtime = le64toh(o->entry.realtime);
2147
2148 if (to->tail_entry_monotonic_valid &&
2149 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2150 return -EINVAL;
2151
2152 if (ts.realtime < le64toh(to->header->tail_entry_realtime))
2153 return -EINVAL;
2154
2155 n = journal_file_entry_n_items(o);
2156 items = alloca(sizeof(EntryItem) * n);
2157
2158 for (i = 0; i < n; i++) {
4fd052ae
FC
2159 uint64_t l, h;
2160 le64_t le_hash;
cf244689
LP
2161 size_t t;
2162 void *data;
2163 Object *u;
2164
2165 q = le64toh(o->entry.items[i].object_offset);
2166 le_hash = o->entry.items[i].hash;
2167
2168 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2169 if (r < 0)
2170 return r;
2171
2172 if (le_hash != o->data.hash)
2173 return -EBADMSG;
2174
2175 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2176 t = (size_t) l;
2177
2178 /* We hit the limit on 32bit machines */
2179 if ((uint64_t) t != l)
2180 return -E2BIG;
2181
2182 if (o->object.flags & OBJECT_COMPRESSED) {
2183#ifdef HAVE_XZ
2184 uint64_t rsize;
2185
2186 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2187 return -EBADMSG;
2188
2189 data = from->compress_buffer;
2190 l = rsize;
2191#else
2192 return -EPROTONOSUPPORT;
2193#endif
2194 } else
2195 data = o->data.payload;
2196
2197 r = journal_file_append_data(to, data, l, &u, &h);
2198 if (r < 0)
2199 return r;
2200
2201 xor_hash ^= le64toh(u->data.hash);
2202 items[i].object_offset = htole64(h);
2203 items[i].hash = u->data.hash;
2204
2205 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2206 if (r < 0)
2207 return r;
2208 }
2209
2210 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2211}
babfc091
LP
2212
2213void journal_default_metrics(JournalMetrics *m, int fd) {
2214 uint64_t fs_size = 0;
2215 struct statvfs ss;
a7bc2c2a 2216 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2217
2218 assert(m);
2219 assert(fd >= 0);
2220
2221 if (fstatvfs(fd, &ss) >= 0)
2222 fs_size = ss.f_frsize * ss.f_blocks;
2223
2224 if (m->max_use == (uint64_t) -1) {
2225
2226 if (fs_size > 0) {
2227 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2228
2229 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2230 m->max_use = DEFAULT_MAX_USE_UPPER;
2231
2232 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2233 m->max_use = DEFAULT_MAX_USE_LOWER;
2234 } else
2235 m->max_use = DEFAULT_MAX_USE_LOWER;
2236 } else {
2237 m->max_use = PAGE_ALIGN(m->max_use);
2238
2239 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2240 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2241 }
2242
2243 if (m->max_size == (uint64_t) -1) {
2244 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2245
2246 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2247 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2248 } else
2249 m->max_size = PAGE_ALIGN(m->max_size);
2250
2251 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2252 m->max_size = JOURNAL_FILE_SIZE_MIN;
2253
2254 if (m->max_size*2 > m->max_use)
2255 m->max_use = m->max_size*2;
2256
2257 if (m->min_size == (uint64_t) -1)
2258 m->min_size = JOURNAL_FILE_SIZE_MIN;
2259 else {
2260 m->min_size = PAGE_ALIGN(m->min_size);
2261
2262 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2263 m->min_size = JOURNAL_FILE_SIZE_MIN;
2264
2265 if (m->min_size > m->max_size)
2266 m->max_size = m->min_size;
2267 }
2268
2269 if (m->keep_free == (uint64_t) -1) {
2270
2271 if (fs_size > 0) {
2272 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2273
2274 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2275 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2276
2277 } else
2278 m->keep_free = DEFAULT_KEEP_FREE;
2279 }
2280
e7bf07b3
LP
2281 log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2282 format_bytes(a, sizeof(a), m->max_use),
2283 format_bytes(b, sizeof(b), m->max_size),
2284 format_bytes(c, sizeof(c), m->min_size),
2285 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2286}