]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
journald: don't enforce monotonicity of realtime clocks when copying entries
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
30#include "journal-def.h"
31#include "journal-file.h"
32#include "lookup3.h"
807e17f0 33#include "compress.h"
cec736d2 34
de190aef
LP
35#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*16ULL)
36#define DEFAULT_FIELD_HASH_TABLE_SIZE (2047ULL*16ULL)
cec736d2 37
1fa80181 38#define DEFAULT_WINDOW_SIZE (8ULL*1024ULL*1024ULL)
cec736d2 39
be19b7df 40#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 41
babfc091 42/* This is the minimum journal file size */
b47ffcfd 43#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
babfc091
LP
44
45/* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
49
50/* This is the upper bound if we deduce max_size from max_use */
71100051 51#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
52
53/* This is the upper bound if we deduce the keep_free value from the
54 * file system size */
55#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57/* This is the keep_free value when we can't determine the system
58 * size */
59#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
60
cec736d2
LP
61static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' };
62
63#define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
64
65void journal_file_close(JournalFile *f) {
de190aef 66 int t;
cec736d2 67
de190aef 68 assert(f);
cec736d2 69
d384c7a8
MS
70 if (f->header) {
71 if (f->writable)
72 f->header->state = STATE_OFFLINE;
cec736d2 73
d384c7a8
MS
74 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
75 }
cec736d2 76
de190aef
LP
77 for (t = 0; t < _WINDOW_MAX; t++)
78 if (f->windows[t].ptr)
79 munmap(f->windows[t].ptr, f->windows[t].size);
cec736d2 80
0ac38b70
LP
81 if (f->fd >= 0)
82 close_nointr_nofail(f->fd);
83
cec736d2 84 free(f->path);
807e17f0
LP
85
86#ifdef HAVE_XZ
87 free(f->compress_buffer);
88#endif
89
cec736d2
LP
90 free(f);
91}
92
0ac38b70 93static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
94 Header h;
95 ssize_t k;
96 int r;
97
98 assert(f);
99
100 zero(h);
101 memcpy(h.signature, signature, 8);
23b0b2b2 102 h.header_size = htole64(ALIGN64(sizeof(h)));
cec736d2
LP
103
104 r = sd_id128_randomize(&h.file_id);
105 if (r < 0)
106 return r;
107
0ac38b70
LP
108 if (template) {
109 h.seqnum_id = template->header->seqnum_id;
110 h.seqnum = template->header->seqnum;
111 } else
112 h.seqnum_id = h.file_id;
cec736d2
LP
113
114 k = pwrite(f->fd, &h, sizeof(h), 0);
115 if (k < 0)
116 return -errno;
117
118 if (k != sizeof(h))
119 return -EIO;
120
121 return 0;
122}
123
124static int journal_file_refresh_header(JournalFile *f) {
125 int r;
de190aef 126 sd_id128_t boot_id;
cec736d2
LP
127
128 assert(f);
129
130 r = sd_id128_get_machine(&f->header->machine_id);
131 if (r < 0)
132 return r;
133
de190aef 134 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
135 if (r < 0)
136 return r;
137
de190aef
LP
138 if (sd_id128_equal(boot_id, f->header->boot_id))
139 f->tail_entry_monotonic_valid = true;
140
141 f->header->boot_id = boot_id;
142
143 f->header->state = STATE_ONLINE;
b788cc23
LP
144
145 __sync_synchronize();
146
cec736d2
LP
147 return 0;
148}
149
150static int journal_file_verify_header(JournalFile *f) {
151 assert(f);
152
153 if (memcmp(f->header, signature, 8))
154 return -EBADMSG;
155
807e17f0
LP
156#ifdef HAVE_XZ
157 if ((le64toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
158 return -EPROTONOSUPPORT;
159#else
cec736d2
LP
160 if (f->header->incompatible_flags != 0)
161 return -EPROTONOSUPPORT;
807e17f0 162#endif
cec736d2 163
23b0b2b2
LP
164 if (f->header->header_size != htole64(ALIGN64(sizeof(*(f->header)))))
165 return -EBADMSG;
166
167 if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
cec736d2
LP
168 return -ENODATA;
169
170 if (f->writable) {
ccdbaf91 171 uint8_t state;
cec736d2
LP
172 sd_id128_t machine_id;
173 int r;
174
175 r = sd_id128_get_machine(&machine_id);
176 if (r < 0)
177 return r;
178
179 if (!sd_id128_equal(machine_id, f->header->machine_id))
180 return -EHOSTDOWN;
181
de190aef 182 state = f->header->state;
cec736d2
LP
183
184 if (state == STATE_ONLINE)
185 log_debug("Journal file %s is already online. Assuming unclean closing. Ignoring.", f->path);
08984293 186 /* FIXME: immediately rotate */
cec736d2
LP
187 else if (state == STATE_ARCHIVED)
188 return -ESHUTDOWN;
189 else if (state != STATE_OFFLINE)
190 log_debug("Journal file %s has unknown state %u. Ignoring.", f->path, state);
191 }
192
193 return 0;
194}
195
196static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
cec736d2 197 uint64_t old_size, new_size;
fec2aa2f 198 int r;
cec736d2
LP
199
200 assert(f);
201
cec736d2 202 /* We assume that this file is not sparse, and we know that
38ac38b2 203 * for sure, since we always call posix_fallocate()
cec736d2
LP
204 * ourselves */
205
206 old_size =
23b0b2b2 207 le64toh(f->header->header_size) +
cec736d2
LP
208 le64toh(f->header->arena_size);
209
bc85bfee 210 new_size = PAGE_ALIGN(offset + size);
23b0b2b2
LP
211 if (new_size < le64toh(f->header->header_size))
212 new_size = le64toh(f->header->header_size);
bc85bfee
LP
213
214 if (new_size <= old_size)
cec736d2
LP
215 return 0;
216
bc85bfee
LP
217 if (f->metrics.max_size > 0 &&
218 new_size > f->metrics.max_size)
219 return -E2BIG;
cec736d2 220
bc85bfee
LP
221 if (new_size > f->metrics.min_size &&
222 f->metrics.keep_free > 0) {
cec736d2
LP
223 struct statvfs svfs;
224
225 if (fstatvfs(f->fd, &svfs) >= 0) {
226 uint64_t available;
227
228 available = svfs.f_bfree * svfs.f_bsize;
229
bc85bfee
LP
230 if (available >= f->metrics.keep_free)
231 available -= f->metrics.keep_free;
cec736d2
LP
232 else
233 available = 0;
234
235 if (new_size - old_size > available)
236 return -E2BIG;
237 }
238 }
239
bc85bfee
LP
240 /* Note that the glibc fallocate() fallback is very
241 inefficient, hence we try to minimize the allocation area
242 as we can. */
fec2aa2f
GV
243 r = posix_fallocate(f->fd, old_size, new_size - old_size);
244 if (r != 0)
245 return -r;
cec736d2
LP
246
247 if (fstat(f->fd, &f->last_stat) < 0)
248 return -errno;
249
23b0b2b2 250 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
cec736d2
LP
251
252 return 0;
253}
254
255static int journal_file_map(
256 JournalFile *f,
257 uint64_t offset,
258 uint64_t size,
259 void **_window,
260 uint64_t *_woffset,
261 uint64_t *_wsize,
262 void **ret) {
263
264 uint64_t woffset, wsize;
265 void *window;
266
267 assert(f);
268 assert(size > 0);
269 assert(ret);
270
271 woffset = offset & ~((uint64_t) page_size() - 1ULL);
272 wsize = size + (offset - woffset);
273 wsize = PAGE_ALIGN(wsize);
274
2a59ea54
LP
275 /* Avoid SIGBUS on invalid accesses */
276 if (woffset + wsize > (uint64_t) PAGE_ALIGN(f->last_stat.st_size))
277 return -EADDRNOTAVAIL;
278
cec736d2
LP
279 window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset);
280 if (window == MAP_FAILED)
281 return -errno;
282
283 if (_window)
284 *_window = window;
285
286 if (_woffset)
287 *_woffset = woffset;
288
289 if (_wsize)
290 *_wsize = wsize;
291
292 *ret = (uint8_t*) window + (offset - woffset);
293
294 return 0;
295}
296
de190aef 297static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_t size, void **ret) {
6c8a39b8 298 void *p = NULL;
cec736d2
LP
299 uint64_t delta;
300 int r;
de190aef 301 Window *w;
cec736d2
LP
302
303 assert(f);
304 assert(ret);
de190aef
LP
305 assert(wt >= 0);
306 assert(wt < _WINDOW_MAX);
cec736d2 307
4bbdcdb3
LP
308 if (offset + size > (uint64_t) f->last_stat.st_size) {
309 /* Hmm, out of range? Let's refresh the fstat() data
310 * first, before we trust that check. */
311
312 if (fstat(f->fd, &f->last_stat) < 0 ||
313 offset + size > (uint64_t) f->last_stat.st_size)
314 return -EADDRNOTAVAIL;
315 }
316
de190aef 317 w = f->windows + wt;
cec736d2 318
de190aef
LP
319 if (_likely_(w->ptr &&
320 w->offset <= offset &&
321 w->offset + w->size >= offset + size)) {
322
323 *ret = (uint8_t*) w->ptr + (offset - w->offset);
cec736d2
LP
324 return 0;
325 }
326
de190aef
LP
327 if (w->ptr) {
328 if (munmap(w->ptr, w->size) < 0)
cec736d2
LP
329 return -errno;
330
de190aef
LP
331 w->ptr = NULL;
332 w->size = w->offset = 0;
cec736d2
LP
333 }
334
335 if (size < DEFAULT_WINDOW_SIZE) {
336 /* If the default window size is larger then what was
337 * asked for extend the mapping a bit in the hope to
338 * minimize needed remappings later on. We add half
339 * the window space before and half behind the
340 * requested mapping */
341
1921a5cb 342 delta = (DEFAULT_WINDOW_SIZE - size) / 2;
cec736d2 343
a99c349d 344 if (delta > offset)
cec736d2
LP
345 delta = offset;
346
347 offset -= delta;
a99c349d 348 size = DEFAULT_WINDOW_SIZE;
cec736d2
LP
349 } else
350 delta = 0;
351
2a59ea54 352 if (offset + size > (uint64_t) f->last_stat.st_size)
1921a5cb 353 size = (uint64_t) f->last_stat.st_size - offset;
2a59ea54
LP
354
355 if (size <= 0)
356 return -EADDRNOTAVAIL;
357
cec736d2
LP
358 r = journal_file_map(f,
359 offset, size,
de190aef
LP
360 &w->ptr, &w->offset, &w->size,
361 &p);
cec736d2
LP
362
363 if (r < 0)
364 return r;
365
366 *ret = (uint8_t*) p + delta;
367 return 0;
368}
369
370static bool verify_hash(Object *o) {
de190aef 371 uint64_t h1, h2;
cec736d2
LP
372
373 assert(o);
374
807e17f0 375 if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
cec736d2 376 h1 = le64toh(o->data.hash);
de190aef
LP
377 h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
378 } else if (o->object.type == OBJECT_FIELD) {
379 h1 = le64toh(o->field.hash);
380 h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
381 } else
382 return true;
cec736d2 383
de190aef 384 return h1 == h2;
cec736d2
LP
385}
386
de190aef 387int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
388 int r;
389 void *t;
390 Object *o;
391 uint64_t s;
392
393 assert(f);
394 assert(ret);
de190aef 395 assert(type < _OBJECT_TYPE_MAX);
cec736d2 396
de190aef 397 r = journal_file_move_to(f, type >= 0 ? type : WINDOW_UNKNOWN, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
398 if (r < 0)
399 return r;
400
401 o = (Object*) t;
402 s = le64toh(o->object.size);
403
404 if (s < sizeof(ObjectHeader))
405 return -EBADMSG;
406
de190aef 407 if (type >= 0 && o->object.type != type)
cec736d2
LP
408 return -EBADMSG;
409
410 if (s > sizeof(ObjectHeader)) {
de190aef 411 r = journal_file_move_to(f, o->object.type, offset, s, &t);
cec736d2
LP
412 if (r < 0)
413 return r;
414
415 o = (Object*) t;
416 }
417
418 if (!verify_hash(o))
419 return -EBADMSG;
420
421 *ret = o;
422 return 0;
423}
424
c2373f84 425static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
426 uint64_t r;
427
428 assert(f);
429
430 r = le64toh(f->header->seqnum) + 1;
c2373f84
LP
431
432 if (seqnum) {
de190aef 433 /* If an external seqnum counter was passed, we update
c2373f84
LP
434 * both the local and the external one, and set it to
435 * the maximum of both */
436
437 if (*seqnum + 1 > r)
438 r = *seqnum + 1;
439
440 *seqnum = r;
441 }
442
cec736d2
LP
443 f->header->seqnum = htole64(r);
444
de190aef
LP
445 if (f->header->first_seqnum == 0)
446 f->header->first_seqnum = htole64(r);
447
cec736d2
LP
448 return r;
449}
450
de190aef 451static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
452 int r;
453 uint64_t p;
454 Object *tail, *o;
455 void *t;
456
457 assert(f);
458 assert(size >= sizeof(ObjectHeader));
459 assert(offset);
460 assert(ret);
461
462 p = le64toh(f->header->tail_object_offset);
cec736d2 463 if (p == 0)
23b0b2b2 464 p = le64toh(f->header->header_size);
cec736d2 465 else {
de190aef 466 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
467 if (r < 0)
468 return r;
469
470 p += ALIGN64(le64toh(tail->object.size));
471 }
472
473 r = journal_file_allocate(f, p, size);
474 if (r < 0)
475 return r;
476
de190aef 477 r = journal_file_move_to(f, type, p, size, &t);
cec736d2
LP
478 if (r < 0)
479 return r;
480
481 o = (Object*) t;
482
483 zero(o->object);
de190aef 484 o->object.type = type;
cec736d2
LP
485 o->object.size = htole64(size);
486
487 f->header->tail_object_offset = htole64(p);
cec736d2
LP
488 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
489
490 *ret = o;
491 *offset = p;
492
493 return 0;
494}
495
de190aef 496static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
497 uint64_t s, p;
498 Object *o;
499 int r;
500
501 assert(f);
502
de190aef
LP
503 s = DEFAULT_DATA_HASH_TABLE_SIZE;
504 r = journal_file_append_object(f,
505 OBJECT_DATA_HASH_TABLE,
506 offsetof(Object, hash_table.items) + s,
507 &o, &p);
cec736d2
LP
508 if (r < 0)
509 return r;
510
de190aef 511 memset(o->hash_table.items, 0, s);
cec736d2 512
de190aef
LP
513 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
514 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
515
516 return 0;
517}
518
de190aef 519static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
520 uint64_t s, p;
521 Object *o;
522 int r;
523
524 assert(f);
525
de190aef
LP
526 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
527 r = journal_file_append_object(f,
528 OBJECT_FIELD_HASH_TABLE,
529 offsetof(Object, hash_table.items) + s,
530 &o, &p);
cec736d2
LP
531 if (r < 0)
532 return r;
533
de190aef 534 memset(o->hash_table.items, 0, s);
cec736d2 535
de190aef
LP
536 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
537 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
538
539 return 0;
540}
541
de190aef 542static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
543 uint64_t s, p;
544 void *t;
545 int r;
546
547 assert(f);
548
de190aef
LP
549 p = le64toh(f->header->data_hash_table_offset);
550 s = le64toh(f->header->data_hash_table_size);
cec736d2 551
de190aef
LP
552 r = journal_file_move_to(f,
553 WINDOW_DATA_HASH_TABLE,
554 p, s,
555 &t);
cec736d2
LP
556 if (r < 0)
557 return r;
558
de190aef 559 f->data_hash_table = t;
cec736d2
LP
560 return 0;
561}
562
de190aef 563static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
564 uint64_t s, p;
565 void *t;
566 int r;
567
568 assert(f);
569
de190aef
LP
570 p = le64toh(f->header->field_hash_table_offset);
571 s = le64toh(f->header->field_hash_table_size);
cec736d2 572
de190aef
LP
573 r = journal_file_move_to(f,
574 WINDOW_FIELD_HASH_TABLE,
575 p, s,
576 &t);
cec736d2
LP
577 if (r < 0)
578 return r;
579
de190aef 580 f->field_hash_table = t;
cec736d2
LP
581 return 0;
582}
583
de190aef
LP
584static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
585 uint64_t p, h;
cec736d2
LP
586 int r;
587
588 assert(f);
589 assert(o);
590 assert(offset > 0);
de190aef 591 assert(o->object.type == OBJECT_DATA);
cec736d2 592
48496df6
LP
593 /* This might alter the window we are looking at */
594
de190aef
LP
595 o->data.next_hash_offset = o->data.next_field_offset = 0;
596 o->data.entry_offset = o->data.entry_array_offset = 0;
597 o->data.n_entries = 0;
cec736d2 598
de190aef 599 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
8db4213e 600 p = le64toh(f->data_hash_table[h].tail_hash_offset);
cec736d2
LP
601 if (p == 0) {
602 /* Only entry in the hash table is easy */
de190aef 603 f->data_hash_table[h].head_hash_offset = htole64(offset);
cec736d2 604 } else {
48496df6
LP
605 /* Move back to the previous data object, to patch in
606 * pointer */
cec736d2 607
de190aef 608 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
609 if (r < 0)
610 return r;
611
de190aef 612 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
613 }
614
de190aef 615 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2
LP
616
617 return 0;
618}
619
de190aef
LP
620int journal_file_find_data_object_with_hash(
621 JournalFile *f,
622 const void *data, uint64_t size, uint64_t hash,
623 Object **ret, uint64_t *offset) {
48496df6 624
de190aef 625 uint64_t p, osize, h;
cec736d2
LP
626 int r;
627
628 assert(f);
629 assert(data || size == 0);
630
631 osize = offsetof(Object, data.payload) + size;
632
bc85bfee
LP
633 if (f->header->data_hash_table_size == 0)
634 return -EBADMSG;
635
de190aef
LP
636 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
637 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 638
de190aef
LP
639 while (p > 0) {
640 Object *o;
cec736d2 641
de190aef 642 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
643 if (r < 0)
644 return r;
645
807e17f0 646 if (le64toh(o->data.hash) != hash)
85a131e8 647 goto next;
807e17f0
LP
648
649 if (o->object.flags & OBJECT_COMPRESSED) {
650#ifdef HAVE_XZ
b785c858 651 uint64_t l, rsize;
cec736d2 652
807e17f0
LP
653 l = le64toh(o->object.size);
654 if (l <= offsetof(Object, data.payload))
cec736d2
LP
655 return -EBADMSG;
656
807e17f0
LP
657 l -= offsetof(Object, data.payload);
658
659 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
660 return -EBADMSG;
661
b785c858 662 if (rsize == size &&
807e17f0
LP
663 memcmp(f->compress_buffer, data, size) == 0) {
664
665 if (ret)
666 *ret = o;
667
668 if (offset)
669 *offset = p;
670
671 return 1;
672 }
673#else
674 return -EPROTONOSUPPORT;
675#endif
676
677 } else if (le64toh(o->object.size) == osize &&
678 memcmp(o->data.payload, data, size) == 0) {
679
cec736d2
LP
680 if (ret)
681 *ret = o;
682
683 if (offset)
684 *offset = p;
685
de190aef 686 return 1;
cec736d2
LP
687 }
688
85a131e8 689 next:
cec736d2
LP
690 p = le64toh(o->data.next_hash_offset);
691 }
692
de190aef
LP
693 return 0;
694}
695
696int journal_file_find_data_object(
697 JournalFile *f,
698 const void *data, uint64_t size,
699 Object **ret, uint64_t *offset) {
700
701 uint64_t hash;
702
703 assert(f);
704 assert(data || size == 0);
705
706 hash = hash64(data, size);
707
708 return journal_file_find_data_object_with_hash(f,
709 data, size, hash,
710 ret, offset);
711}
712
48496df6
LP
713static int journal_file_append_data(
714 JournalFile *f,
715 const void *data, uint64_t size,
716 Object **ret, uint64_t *offset) {
717
de190aef
LP
718 uint64_t hash, p;
719 uint64_t osize;
720 Object *o;
721 int r;
807e17f0 722 bool compressed = false;
de190aef
LP
723
724 assert(f);
725 assert(data || size == 0);
726
727 hash = hash64(data, size);
728
729 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
730 if (r < 0)
731 return r;
732 else if (r > 0) {
733
734 if (ret)
735 *ret = o;
736
737 if (offset)
738 *offset = p;
739
740 return 0;
741 }
742
743 osize = offsetof(Object, data.payload) + size;
744 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
745 if (r < 0)
746 return r;
747
cec736d2 748 o->data.hash = htole64(hash);
807e17f0
LP
749
750#ifdef HAVE_XZ
751 if (f->compress &&
752 size >= COMPRESSION_SIZE_THRESHOLD) {
753 uint64_t rsize;
754
755 compressed = compress_blob(data, size, o->data.payload, &rsize);
756
757 if (compressed) {
758 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
759 o->object.flags |= OBJECT_COMPRESSED;
760
761 f->header->incompatible_flags = htole32(le32toh(f->header->incompatible_flags) | HEADER_INCOMPATIBLE_COMPRESSED);
762
763 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
764 }
765 }
766#endif
767
768 if (!compressed)
769 memcpy(o->data.payload, data, size);
cec736d2 770
de190aef 771 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
772 if (r < 0)
773 return r;
774
48496df6
LP
775 /* The linking might have altered the window, so let's
776 * refresh our pointer */
777 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
778 if (r < 0)
779 return r;
780
cec736d2
LP
781 if (ret)
782 *ret = o;
783
784 if (offset)
de190aef 785 *offset = p;
cec736d2
LP
786
787 return 0;
788}
789
790uint64_t journal_file_entry_n_items(Object *o) {
791 assert(o);
7be3aa17 792 assert(o->object.type == OBJECT_ENTRY);
cec736d2
LP
793
794 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
795}
796
de190aef
LP
797static uint64_t journal_file_entry_array_n_items(Object *o) {
798 assert(o);
7be3aa17 799 assert(o->object.type == OBJECT_ENTRY_ARRAY);
de190aef
LP
800
801 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
802}
803
804static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
805 le64_t *first,
806 le64_t *idx,
de190aef 807 uint64_t p) {
cec736d2 808 int r;
de190aef
LP
809 uint64_t n = 0, ap = 0, q, i, a, hidx;
810 Object *o;
811
cec736d2 812 assert(f);
de190aef
LP
813 assert(first);
814 assert(idx);
815 assert(p > 0);
cec736d2 816
de190aef
LP
817 a = le64toh(*first);
818 i = hidx = le64toh(*idx);
819 while (a > 0) {
820
821 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
822 if (r < 0)
823 return r;
cec736d2 824
de190aef
LP
825 n = journal_file_entry_array_n_items(o);
826 if (i < n) {
827 o->entry_array.items[i] = htole64(p);
828 *idx = htole64(hidx + 1);
829 return 0;
830 }
cec736d2 831
de190aef
LP
832 i -= n;
833 ap = a;
834 a = le64toh(o->entry_array.next_entry_array_offset);
835 }
836
837 if (hidx > n)
838 n = (hidx+1) * 2;
839 else
840 n = n * 2;
841
842 if (n < 4)
843 n = 4;
844
845 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
846 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
847 &o, &q);
cec736d2
LP
848 if (r < 0)
849 return r;
850
de190aef 851 o->entry_array.items[i] = htole64(p);
cec736d2 852
de190aef 853 if (ap == 0)
7be3aa17 854 *first = htole64(q);
cec736d2 855 else {
de190aef 856 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
857 if (r < 0)
858 return r;
859
de190aef
LP
860 o->entry_array.next_entry_array_offset = htole64(q);
861 }
cec736d2 862
de190aef
LP
863 *idx = htole64(hidx + 1);
864
865 return 0;
866}
cec736d2 867
de190aef 868static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
869 le64_t *extra,
870 le64_t *first,
871 le64_t *idx,
de190aef
LP
872 uint64_t p) {
873
874 int r;
875
876 assert(f);
877 assert(extra);
878 assert(first);
879 assert(idx);
880 assert(p > 0);
881
882 if (*idx == 0)
883 *extra = htole64(p);
884 else {
4fd052ae 885 le64_t i;
de190aef 886
7be3aa17 887 i = htole64(le64toh(*idx) - 1);
de190aef
LP
888 r = link_entry_into_array(f, first, &i, p);
889 if (r < 0)
890 return r;
cec736d2
LP
891 }
892
de190aef
LP
893 *idx = htole64(le64toh(*idx) + 1);
894 return 0;
895}
896
897static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
898 uint64_t p;
899 int r;
900 assert(f);
901 assert(o);
902 assert(offset > 0);
903
904 p = le64toh(o->entry.items[i].object_offset);
905 if (p == 0)
906 return -EINVAL;
907
908 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
909 if (r < 0)
910 return r;
911
de190aef
LP
912 return link_entry_into_array_plus_one(f,
913 &o->data.entry_offset,
914 &o->data.entry_array_offset,
915 &o->data.n_entries,
916 offset);
cec736d2
LP
917}
918
919static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 920 uint64_t n, i;
cec736d2
LP
921 int r;
922
923 assert(f);
924 assert(o);
925 assert(offset > 0);
de190aef 926 assert(o->object.type == OBJECT_ENTRY);
cec736d2 927
b788cc23
LP
928 __sync_synchronize();
929
cec736d2 930 /* Link up the entry itself */
de190aef
LP
931 r = link_entry_into_array(f,
932 &f->header->entry_array_offset,
933 &f->header->n_entries,
934 offset);
935 if (r < 0)
936 return r;
cec736d2 937
aaf53376 938 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
cec736d2 939
de190aef 940 if (f->header->head_entry_realtime == 0)
0ac38b70 941 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 942
0ac38b70 943 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
944 f->header->tail_entry_monotonic = o->entry.monotonic;
945
946 f->tail_entry_monotonic_valid = true;
cec736d2
LP
947
948 /* Link up the items */
949 n = journal_file_entry_n_items(o);
950 for (i = 0; i < n; i++) {
951 r = journal_file_link_entry_item(f, o, offset, i);
952 if (r < 0)
953 return r;
954 }
955
cec736d2
LP
956 return 0;
957}
958
959static int journal_file_append_entry_internal(
960 JournalFile *f,
961 const dual_timestamp *ts,
962 uint64_t xor_hash,
963 const EntryItem items[], unsigned n_items,
de190aef 964 uint64_t *seqnum,
cec736d2
LP
965 Object **ret, uint64_t *offset) {
966 uint64_t np;
967 uint64_t osize;
968 Object *o;
969 int r;
970
971 assert(f);
972 assert(items || n_items == 0);
de190aef 973 assert(ts);
cec736d2
LP
974
975 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
976
de190aef 977 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
978 if (r < 0)
979 return r;
980
de190aef 981 o->entry.seqnum = htole64(journal_file_seqnum(f, seqnum));
cec736d2 982 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
983 o->entry.realtime = htole64(ts->realtime);
984 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
985 o->entry.xor_hash = htole64(xor_hash);
986 o->entry.boot_id = f->header->boot_id;
987
988 r = journal_file_link_entry(f, o, np);
989 if (r < 0)
990 return r;
991
992 if (ret)
993 *ret = o;
994
995 if (offset)
996 *offset = np;
997
998 return 0;
999}
1000
cf244689 1001void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
1002 assert(f);
1003
1004 /* inotify() does not receive IN_MODIFY events from file
1005 * accesses done via mmap(). After each access we hence
1006 * trigger IN_MODIFY by truncating the journal file to its
1007 * current size which triggers IN_MODIFY. */
1008
bc85bfee
LP
1009 __sync_synchronize();
1010
50f20cfd
LP
1011 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1012 log_error("Failed to to truncate file to its own size: %m");
1013}
1014
de190aef 1015int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1016 unsigned i;
1017 EntryItem *items;
1018 int r;
1019 uint64_t xor_hash = 0;
de190aef 1020 struct dual_timestamp _ts;
cec736d2
LP
1021
1022 assert(f);
1023 assert(iovec || n_iovec == 0);
1024
de190aef
LP
1025 if (!f->writable)
1026 return -EPERM;
1027
1028 if (!ts) {
1029 dual_timestamp_get(&_ts);
1030 ts = &_ts;
1031 }
1032
1033 if (f->tail_entry_monotonic_valid &&
1034 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1035 return -EINVAL;
1036
cf244689 1037 items = alloca(sizeof(EntryItem) * n_iovec);
cec736d2
LP
1038
1039 for (i = 0; i < n_iovec; i++) {
1040 uint64_t p;
1041 Object *o;
1042
1043 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1044 if (r < 0)
cf244689 1045 return r;
cec736d2
LP
1046
1047 xor_hash ^= le64toh(o->data.hash);
1048 items[i].object_offset = htole64(p);
de7b95cd 1049 items[i].hash = o->data.hash;
cec736d2
LP
1050 }
1051
de190aef 1052 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1053
50f20cfd
LP
1054 journal_file_post_change(f);
1055
cec736d2
LP
1056 return r;
1057}
1058
de190aef
LP
1059static int generic_array_get(JournalFile *f,
1060 uint64_t first,
1061 uint64_t i,
1062 Object **ret, uint64_t *offset) {
1063
cec736d2 1064 Object *o;
6c8a39b8 1065 uint64_t p = 0, a;
cec736d2
LP
1066 int r;
1067
1068 assert(f);
1069
de190aef
LP
1070 a = first;
1071 while (a > 0) {
1072 uint64_t n;
cec736d2 1073
de190aef
LP
1074 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1075 if (r < 0)
1076 return r;
cec736d2 1077
de190aef
LP
1078 n = journal_file_entry_array_n_items(o);
1079 if (i < n) {
1080 p = le64toh(o->entry_array.items[i]);
1081 break;
cec736d2
LP
1082 }
1083
de190aef
LP
1084 i -= n;
1085 a = le64toh(o->entry_array.next_entry_array_offset);
1086 }
1087
1088 if (a <= 0 || p <= 0)
1089 return 0;
1090
1091 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1092 if (r < 0)
1093 return r;
1094
1095 if (ret)
1096 *ret = o;
1097
1098 if (offset)
1099 *offset = p;
1100
1101 return 1;
1102}
1103
1104static int generic_array_get_plus_one(JournalFile *f,
1105 uint64_t extra,
1106 uint64_t first,
1107 uint64_t i,
1108 Object **ret, uint64_t *offset) {
1109
1110 Object *o;
1111
1112 assert(f);
1113
1114 if (i == 0) {
1115 int r;
1116
1117 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1118 if (r < 0)
1119 return r;
1120
de190aef
LP
1121 if (ret)
1122 *ret = o;
cec736d2 1123
de190aef
LP
1124 if (offset)
1125 *offset = extra;
cec736d2 1126
de190aef 1127 return 1;
cec736d2
LP
1128 }
1129
de190aef
LP
1130 return generic_array_get(f, first, i-1, ret, offset);
1131}
cec736d2 1132
de190aef
LP
1133enum {
1134 TEST_FOUND,
1135 TEST_LEFT,
1136 TEST_RIGHT
1137};
cec736d2 1138
de190aef
LP
1139static int generic_array_bisect(JournalFile *f,
1140 uint64_t first,
1141 uint64_t n,
1142 uint64_t needle,
1143 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1144 direction_t direction,
1145 Object **ret,
1146 uint64_t *offset,
1147 uint64_t *idx) {
1148
1149 uint64_t a, p, t = 0, i = 0, last_p = 0;
1150 bool subtract_one = false;
1151 Object *o, *array = NULL;
1152 int r;
cec736d2 1153
de190aef
LP
1154 assert(f);
1155 assert(test_object);
cec736d2 1156
de190aef
LP
1157 a = first;
1158 while (a > 0) {
1159 uint64_t left, right, k, lp;
1160
1161 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1162 if (r < 0)
1163 return r;
1164
de190aef
LP
1165 k = journal_file_entry_array_n_items(array);
1166 right = MIN(k, n);
1167 if (right <= 0)
1168 return 0;
cec736d2 1169
de190aef
LP
1170 i = right - 1;
1171 lp = p = le64toh(array->entry_array.items[i]);
1172 if (p <= 0)
1173 return -EBADMSG;
cec736d2 1174
de190aef
LP
1175 r = test_object(f, p, needle);
1176 if (r < 0)
1177 return r;
cec736d2 1178
de190aef
LP
1179 if (r == TEST_FOUND)
1180 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1181
1182 if (r == TEST_RIGHT) {
1183 left = 0;
1184 right -= 1;
1185 for (;;) {
1186 if (left == right) {
1187 if (direction == DIRECTION_UP)
1188 subtract_one = true;
1189
1190 i = left;
1191 goto found;
1192 }
1193
1194 assert(left < right);
1195
1196 i = (left + right) / 2;
1197 p = le64toh(array->entry_array.items[i]);
1198 if (p <= 0)
1199 return -EBADMSG;
1200
1201 r = test_object(f, p, needle);
1202 if (r < 0)
1203 return r;
cec736d2 1204
de190aef
LP
1205 if (r == TEST_FOUND)
1206 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1207
1208 if (r == TEST_RIGHT)
1209 right = i;
1210 else
1211 left = i + 1;
1212 }
1213 }
1214
cbdca852
LP
1215 if (k > n) {
1216 if (direction == DIRECTION_UP) {
1217 i = n;
1218 subtract_one = true;
1219 goto found;
1220 }
1221
cec736d2 1222 return 0;
cbdca852 1223 }
cec736d2 1224
de190aef
LP
1225 last_p = lp;
1226
1227 n -= k;
1228 t += k;
1229 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1230 }
1231
1232 return 0;
de190aef
LP
1233
1234found:
1235 if (subtract_one && t == 0 && i == 0)
1236 return 0;
1237
1238 if (subtract_one && i == 0)
1239 p = last_p;
1240 else if (subtract_one)
1241 p = le64toh(array->entry_array.items[i-1]);
1242 else
1243 p = le64toh(array->entry_array.items[i]);
1244
1245 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1246 if (r < 0)
1247 return r;
1248
1249 if (ret)
1250 *ret = o;
1251
1252 if (offset)
1253 *offset = p;
1254
1255 if (idx)
cbdca852 1256 *idx = t + i + (subtract_one ? -1 : 0);
de190aef
LP
1257
1258 return 1;
cec736d2
LP
1259}
1260
de190aef
LP
1261static int generic_array_bisect_plus_one(JournalFile *f,
1262 uint64_t extra,
1263 uint64_t first,
1264 uint64_t n,
1265 uint64_t needle,
1266 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1267 direction_t direction,
1268 Object **ret,
1269 uint64_t *offset,
1270 uint64_t *idx) {
1271
cec736d2 1272 int r;
cbdca852
LP
1273 bool step_back = false;
1274 Object *o;
cec736d2
LP
1275
1276 assert(f);
de190aef 1277 assert(test_object);
cec736d2 1278
de190aef
LP
1279 if (n <= 0)
1280 return 0;
cec736d2 1281
de190aef
LP
1282 /* This bisects the array in object 'first', but first checks
1283 * an extra */
de190aef
LP
1284 r = test_object(f, extra, needle);
1285 if (r < 0)
1286 return r;
a536e261
LP
1287
1288 if (r == TEST_FOUND)
1289 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1290
cbdca852
LP
1291 /* if we are looking with DIRECTION_UP then we need to first
1292 see if in the actual array there is a matching entry, and
1293 return the last one of that. But if there isn't any we need
1294 to return this one. Hence remember this, and return it
1295 below. */
1296 if (r == TEST_LEFT)
1297 step_back = direction == DIRECTION_UP;
de190aef 1298
cbdca852
LP
1299 if (r == TEST_RIGHT) {
1300 if (direction == DIRECTION_DOWN)
1301 goto found;
1302 else
1303 return 0;
a536e261 1304 }
cec736d2 1305
de190aef
LP
1306 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1307
cbdca852
LP
1308 if (r == 0 && step_back)
1309 goto found;
1310
ecf68b1d 1311 if (r > 0 && idx)
de190aef
LP
1312 (*idx) ++;
1313
1314 return r;
cbdca852
LP
1315
1316found:
1317 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1318 if (r < 0)
1319 return r;
1320
1321 if (ret)
1322 *ret = o;
1323
1324 if (offset)
1325 *offset = extra;
1326
1327 if (idx)
1328 *idx = 0;
1329
1330 return 1;
1331}
1332
1333static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1334 assert(f);
1335 assert(p > 0);
1336
1337 if (p == needle)
1338 return TEST_FOUND;
1339 else if (p < needle)
1340 return TEST_LEFT;
1341 else
1342 return TEST_RIGHT;
1343}
1344
1345int journal_file_move_to_entry_by_offset(
1346 JournalFile *f,
1347 uint64_t p,
1348 direction_t direction,
1349 Object **ret,
1350 uint64_t *offset) {
1351
1352 return generic_array_bisect(f,
1353 le64toh(f->header->entry_array_offset),
1354 le64toh(f->header->n_entries),
1355 p,
1356 test_object_offset,
1357 direction,
1358 ret, offset, NULL);
de190aef
LP
1359}
1360
cbdca852 1361
de190aef
LP
1362static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1363 Object *o;
1364 int r;
1365
1366 assert(f);
1367 assert(p > 0);
1368
1369 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1370 if (r < 0)
1371 return r;
1372
de190aef
LP
1373 if (le64toh(o->entry.seqnum) == needle)
1374 return TEST_FOUND;
1375 else if (le64toh(o->entry.seqnum) < needle)
1376 return TEST_LEFT;
1377 else
1378 return TEST_RIGHT;
1379}
cec736d2 1380
de190aef
LP
1381int journal_file_move_to_entry_by_seqnum(
1382 JournalFile *f,
1383 uint64_t seqnum,
1384 direction_t direction,
1385 Object **ret,
1386 uint64_t *offset) {
1387
1388 return generic_array_bisect(f,
1389 le64toh(f->header->entry_array_offset),
1390 le64toh(f->header->n_entries),
1391 seqnum,
1392 test_object_seqnum,
1393 direction,
1394 ret, offset, NULL);
1395}
cec736d2 1396
de190aef
LP
1397static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1398 Object *o;
1399 int r;
1400
1401 assert(f);
1402 assert(p > 0);
1403
1404 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1405 if (r < 0)
1406 return r;
1407
1408 if (le64toh(o->entry.realtime) == needle)
1409 return TEST_FOUND;
1410 else if (le64toh(o->entry.realtime) < needle)
1411 return TEST_LEFT;
1412 else
1413 return TEST_RIGHT;
cec736d2
LP
1414}
1415
de190aef
LP
1416int journal_file_move_to_entry_by_realtime(
1417 JournalFile *f,
1418 uint64_t realtime,
1419 direction_t direction,
1420 Object **ret,
1421 uint64_t *offset) {
1422
1423 return generic_array_bisect(f,
1424 le64toh(f->header->entry_array_offset),
1425 le64toh(f->header->n_entries),
1426 realtime,
1427 test_object_realtime,
1428 direction,
1429 ret, offset, NULL);
1430}
1431
1432static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1433 Object *o;
1434 int r;
1435
1436 assert(f);
1437 assert(p > 0);
1438
1439 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1440 if (r < 0)
1441 return r;
1442
1443 if (le64toh(o->entry.monotonic) == needle)
1444 return TEST_FOUND;
1445 else if (le64toh(o->entry.monotonic) < needle)
1446 return TEST_LEFT;
1447 else
1448 return TEST_RIGHT;
1449}
1450
1451int journal_file_move_to_entry_by_monotonic(
1452 JournalFile *f,
1453 sd_id128_t boot_id,
1454 uint64_t monotonic,
1455 direction_t direction,
1456 Object **ret,
1457 uint64_t *offset) {
1458
10b6f904 1459 char t[9+32+1] = "_BOOT_ID=";
de190aef
LP
1460 Object *o;
1461 int r;
1462
cbdca852 1463 assert(f);
de190aef 1464
cbdca852 1465 sd_id128_to_string(boot_id, t + 9);
de190aef
LP
1466 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1467 if (r < 0)
1468 return r;
cbdca852 1469 if (r == 0)
de190aef
LP
1470 return -ENOENT;
1471
1472 return generic_array_bisect_plus_one(f,
1473 le64toh(o->data.entry_offset),
1474 le64toh(o->data.entry_array_offset),
1475 le64toh(o->data.n_entries),
1476 monotonic,
1477 test_object_monotonic,
1478 direction,
1479 ret, offset, NULL);
1480}
1481
de190aef
LP
1482int journal_file_next_entry(
1483 JournalFile *f,
1484 Object *o, uint64_t p,
1485 direction_t direction,
1486 Object **ret, uint64_t *offset) {
1487
1488 uint64_t i, n;
cec736d2
LP
1489 int r;
1490
1491 assert(f);
de190aef
LP
1492 assert(p > 0 || !o);
1493
1494 n = le64toh(f->header->n_entries);
1495 if (n <= 0)
1496 return 0;
cec736d2
LP
1497
1498 if (!o)
de190aef 1499 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1500 else {
de190aef 1501 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1502 return -EINVAL;
1503
de190aef
LP
1504 r = generic_array_bisect(f,
1505 le64toh(f->header->entry_array_offset),
1506 le64toh(f->header->n_entries),
1507 p,
1508 test_object_offset,
1509 DIRECTION_DOWN,
1510 NULL, NULL,
1511 &i);
1512 if (r <= 0)
1513 return r;
1514
1515 if (direction == DIRECTION_DOWN) {
1516 if (i >= n - 1)
1517 return 0;
1518
1519 i++;
1520 } else {
1521 if (i <= 0)
1522 return 0;
1523
1524 i--;
1525 }
cec736d2
LP
1526 }
1527
de190aef
LP
1528 /* And jump to it */
1529 return generic_array_get(f,
1530 le64toh(f->header->entry_array_offset),
1531 i,
1532 ret, offset);
1533}
cec736d2 1534
de190aef
LP
1535int journal_file_skip_entry(
1536 JournalFile *f,
1537 Object *o, uint64_t p,
1538 int64_t skip,
1539 Object **ret, uint64_t *offset) {
1540
1541 uint64_t i, n;
1542 int r;
1543
1544 assert(f);
1545 assert(o);
1546 assert(p > 0);
1547
1548 if (o->object.type != OBJECT_ENTRY)
1549 return -EINVAL;
1550
1551 r = generic_array_bisect(f,
1552 le64toh(f->header->entry_array_offset),
1553 le64toh(f->header->n_entries),
1554 p,
1555 test_object_offset,
1556 DIRECTION_DOWN,
1557 NULL, NULL,
1558 &i);
1559 if (r <= 0)
cec736d2
LP
1560 return r;
1561
de190aef
LP
1562 /* Calculate new index */
1563 if (skip < 0) {
1564 if ((uint64_t) -skip >= i)
1565 i = 0;
1566 else
1567 i = i - (uint64_t) -skip;
1568 } else
1569 i += (uint64_t) skip;
cec736d2 1570
de190aef
LP
1571 n = le64toh(f->header->n_entries);
1572 if (n <= 0)
1573 return -EBADMSG;
cec736d2 1574
de190aef
LP
1575 if (i >= n)
1576 i = n-1;
1577
1578 return generic_array_get(f,
1579 le64toh(f->header->entry_array_offset),
1580 i,
1581 ret, offset);
cec736d2
LP
1582}
1583
de190aef
LP
1584int journal_file_next_entry_for_data(
1585 JournalFile *f,
1586 Object *o, uint64_t p,
1587 uint64_t data_offset,
1588 direction_t direction,
1589 Object **ret, uint64_t *offset) {
1590
1591 uint64_t n, i;
cec736d2 1592 int r;
de190aef 1593 Object *d;
cec736d2
LP
1594
1595 assert(f);
de190aef 1596 assert(p > 0 || !o);
cec736d2 1597
de190aef 1598 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 1599 if (r < 0)
de190aef 1600 return r;
cec736d2 1601
de190aef
LP
1602 n = le64toh(d->data.n_entries);
1603 if (n <= 0)
1604 return n;
cec736d2 1605
de190aef
LP
1606 if (!o)
1607 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1608 else {
1609 if (o->object.type != OBJECT_ENTRY)
1610 return -EINVAL;
cec736d2 1611
de190aef
LP
1612 r = generic_array_bisect_plus_one(f,
1613 le64toh(d->data.entry_offset),
1614 le64toh(d->data.entry_array_offset),
1615 le64toh(d->data.n_entries),
1616 p,
1617 test_object_offset,
1618 DIRECTION_DOWN,
1619 NULL, NULL,
1620 &i);
1621
1622 if (r <= 0)
cec736d2
LP
1623 return r;
1624
de190aef
LP
1625 if (direction == DIRECTION_DOWN) {
1626 if (i >= n - 1)
1627 return 0;
cec736d2 1628
de190aef
LP
1629 i++;
1630 } else {
1631 if (i <= 0)
1632 return 0;
cec736d2 1633
de190aef
LP
1634 i--;
1635 }
cec736d2 1636
de190aef 1637 }
cec736d2 1638
de190aef
LP
1639 return generic_array_get_plus_one(f,
1640 le64toh(d->data.entry_offset),
1641 le64toh(d->data.entry_array_offset),
1642 i,
1643 ret, offset);
1644}
cec736d2 1645
cbdca852
LP
1646int journal_file_move_to_entry_by_offset_for_data(
1647 JournalFile *f,
1648 uint64_t data_offset,
1649 uint64_t p,
1650 direction_t direction,
1651 Object **ret, uint64_t *offset) {
1652
1653 int r;
1654 Object *d;
1655
1656 assert(f);
1657
1658 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1659 if (r < 0)
1660 return r;
1661
1662 return generic_array_bisect_plus_one(f,
1663 le64toh(d->data.entry_offset),
1664 le64toh(d->data.entry_array_offset),
1665 le64toh(d->data.n_entries),
1666 p,
1667 test_object_offset,
1668 direction,
1669 ret, offset, NULL);
1670}
1671
1672int journal_file_move_to_entry_by_monotonic_for_data(
1673 JournalFile *f,
1674 uint64_t data_offset,
1675 sd_id128_t boot_id,
1676 uint64_t monotonic,
1677 direction_t direction,
1678 Object **ret, uint64_t *offset) {
1679
1680 char t[9+32+1] = "_BOOT_ID=";
1681 Object *o, *d;
1682 int r;
1683 uint64_t b, z;
1684
1685 assert(f);
1686
1687 /* First, seek by time */
1688 sd_id128_to_string(boot_id, t + 9);
1689 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1690 if (r < 0)
1691 return r;
1692 if (r == 0)
1693 return -ENOENT;
1694
1695 r = generic_array_bisect_plus_one(f,
1696 le64toh(o->data.entry_offset),
1697 le64toh(o->data.entry_array_offset),
1698 le64toh(o->data.n_entries),
1699 monotonic,
1700 test_object_monotonic,
1701 direction,
1702 NULL, &z, NULL);
1703 if (r <= 0)
1704 return r;
1705
1706 /* And now, continue seeking until we find an entry that
1707 * exists in both bisection arrays */
1708
1709 for (;;) {
1710 Object *qo;
1711 uint64_t p, q;
1712
1713 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1714 if (r < 0)
1715 return r;
1716
1717 r = generic_array_bisect_plus_one(f,
1718 le64toh(d->data.entry_offset),
1719 le64toh(d->data.entry_array_offset),
1720 le64toh(d->data.n_entries),
1721 z,
1722 test_object_offset,
1723 direction,
1724 NULL, &p, NULL);
1725 if (r <= 0)
1726 return r;
1727
1728 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1729 if (r < 0)
1730 return r;
1731
1732 r = generic_array_bisect_plus_one(f,
1733 le64toh(o->data.entry_offset),
1734 le64toh(o->data.entry_array_offset),
1735 le64toh(o->data.n_entries),
1736 p,
1737 test_object_offset,
1738 direction,
1739 &qo, &q, NULL);
1740
1741 if (r <= 0)
1742 return r;
1743
1744 if (p == q) {
1745 if (ret)
1746 *ret = qo;
1747 if (offset)
1748 *offset = q;
1749
1750 return 1;
1751 }
1752
1753 z = q;
1754 }
1755
1756 return 0;
1757}
1758
de190aef
LP
1759int journal_file_move_to_entry_by_seqnum_for_data(
1760 JournalFile *f,
1761 uint64_t data_offset,
1762 uint64_t seqnum,
1763 direction_t direction,
1764 Object **ret, uint64_t *offset) {
cec736d2 1765
de190aef
LP
1766 Object *d;
1767 int r;
cec736d2 1768
91a31dde
LP
1769 assert(f);
1770
de190aef 1771 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1772 if (r < 0)
de190aef 1773 return r;
cec736d2 1774
de190aef
LP
1775 return generic_array_bisect_plus_one(f,
1776 le64toh(d->data.entry_offset),
1777 le64toh(d->data.entry_array_offset),
1778 le64toh(d->data.n_entries),
1779 seqnum,
1780 test_object_seqnum,
1781 direction,
1782 ret, offset, NULL);
1783}
cec736d2 1784
de190aef
LP
1785int journal_file_move_to_entry_by_realtime_for_data(
1786 JournalFile *f,
1787 uint64_t data_offset,
1788 uint64_t realtime,
1789 direction_t direction,
1790 Object **ret, uint64_t *offset) {
1791
1792 Object *d;
1793 int r;
1794
91a31dde
LP
1795 assert(f);
1796
de190aef 1797 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
91a31dde 1798 if (r < 0)
de190aef
LP
1799 return r;
1800
1801 return generic_array_bisect_plus_one(f,
1802 le64toh(d->data.entry_offset),
1803 le64toh(d->data.entry_array_offset),
1804 le64toh(d->data.n_entries),
1805 realtime,
1806 test_object_realtime,
1807 direction,
1808 ret, offset, NULL);
cec736d2
LP
1809}
1810
1811void journal_file_dump(JournalFile *f) {
1812 char a[33], b[33], c[33];
1813 Object *o;
1814 int r;
1815 uint64_t p;
1816
1817 assert(f);
1818
de190aef
LP
1819 printf("File Path: %s\n"
1820 "File ID: %s\n"
cec736d2
LP
1821 "Machine ID: %s\n"
1822 "Boot ID: %s\n"
de190aef
LP
1823 "Arena size: %llu\n"
1824 "Objects: %lu\n"
1825 "Entries: %lu\n",
1826 f->path,
cec736d2
LP
1827 sd_id128_to_string(f->header->file_id, a),
1828 sd_id128_to_string(f->header->machine_id, b),
1829 sd_id128_to_string(f->header->boot_id, c),
de190aef
LP
1830 (unsigned long long) le64toh(f->header->arena_size),
1831 (unsigned long) le64toh(f->header->n_objects),
1832 (unsigned long) le64toh(f->header->n_entries));
cec736d2 1833
23b0b2b2 1834 p = le64toh(f->header->header_size);
cec736d2 1835 while (p != 0) {
de190aef 1836 r = journal_file_move_to_object(f, -1, p, &o);
cec736d2
LP
1837 if (r < 0)
1838 goto fail;
1839
1840 switch (o->object.type) {
1841
1842 case OBJECT_UNUSED:
1843 printf("Type: OBJECT_UNUSED\n");
1844 break;
1845
1846 case OBJECT_DATA:
1847 printf("Type: OBJECT_DATA\n");
1848 break;
1849
1850 case OBJECT_ENTRY:
3fbf9cbb
LP
1851 printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
1852 (unsigned long long) le64toh(o->entry.seqnum),
1853 (unsigned long long) le64toh(o->entry.monotonic),
1854 (unsigned long long) le64toh(o->entry.realtime));
cec736d2
LP
1855 break;
1856
de190aef
LP
1857 case OBJECT_FIELD_HASH_TABLE:
1858 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
cec736d2
LP
1859 break;
1860
de190aef
LP
1861 case OBJECT_DATA_HASH_TABLE:
1862 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1863 break;
1864
1865 case OBJECT_ENTRY_ARRAY:
1866 printf("Type: OBJECT_ENTRY_ARRAY\n");
cec736d2 1867 break;
8144056f
LP
1868
1869 case OBJECT_SIGNATURE:
1870 printf("Type: OBJECT_SIGNATURE\n");
1871 break;
cec736d2
LP
1872 }
1873
807e17f0
LP
1874 if (o->object.flags & OBJECT_COMPRESSED)
1875 printf("Flags: COMPRESSED\n");
1876
cec736d2
LP
1877 if (p == le64toh(f->header->tail_object_offset))
1878 p = 0;
1879 else
1880 p = p + ALIGN64(le64toh(o->object.size));
1881 }
1882
1883 return;
1884fail:
1885 log_error("File corrupt");
1886}
1887
1888int journal_file_open(
1889 const char *fname,
1890 int flags,
1891 mode_t mode,
0ac38b70 1892 JournalFile *template,
cec736d2
LP
1893 JournalFile **ret) {
1894
1895 JournalFile *f;
1896 int r;
1897 bool newly_created = false;
1898
1899 assert(fname);
1900
1901 if ((flags & O_ACCMODE) != O_RDONLY &&
1902 (flags & O_ACCMODE) != O_RDWR)
1903 return -EINVAL;
1904
9447a7f1
LP
1905 if (!endswith(fname, ".journal"))
1906 return -EINVAL;
1907
cec736d2
LP
1908 f = new0(JournalFile, 1);
1909 if (!f)
1910 return -ENOMEM;
1911
0ac38b70
LP
1912 f->fd = -1;
1913 f->flags = flags;
1914 f->mode = mode;
cec736d2
LP
1915 f->writable = (flags & O_ACCMODE) != O_RDONLY;
1916 f->prot = prot_from_flags(flags);
1917
15944db8
LP
1918 if (template) {
1919 f->metrics = template->metrics;
1920 f->compress = template->compress;
1921 }
1922
cec736d2
LP
1923 f->path = strdup(fname);
1924 if (!f->path) {
1925 r = -ENOMEM;
1926 goto fail;
1927 }
1928
0ac38b70
LP
1929 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
1930 if (f->fd < 0) {
1931 r = -errno;
1932 goto fail;
1933 }
1934
cec736d2
LP
1935 if (fstat(f->fd, &f->last_stat) < 0) {
1936 r = -errno;
1937 goto fail;
1938 }
1939
1940 if (f->last_stat.st_size == 0 && f->writable) {
1941 newly_created = true;
1942
0ac38b70 1943 r = journal_file_init_header(f, template);
cec736d2
LP
1944 if (r < 0)
1945 goto fail;
1946
1947 if (fstat(f->fd, &f->last_stat) < 0) {
1948 r = -errno;
1949 goto fail;
1950 }
1951 }
1952
1953 if (f->last_stat.st_size < (off_t) sizeof(Header)) {
1954 r = -EIO;
1955 goto fail;
1956 }
1957
1958 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
1959 if (f->header == MAP_FAILED) {
1960 f->header = NULL;
1961 r = -errno;
1962 goto fail;
1963 }
1964
1965 if (!newly_created) {
1966 r = journal_file_verify_header(f);
1967 if (r < 0)
1968 goto fail;
1969 }
1970
1971 if (f->writable) {
1972 r = journal_file_refresh_header(f);
1973 if (r < 0)
1974 goto fail;
1975 }
1976
1977 if (newly_created) {
1978
de190aef 1979 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
1980 if (r < 0)
1981 goto fail;
1982
de190aef 1983 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
1984 if (r < 0)
1985 goto fail;
1986 }
1987
de190aef 1988 r = journal_file_map_field_hash_table(f);
cec736d2
LP
1989 if (r < 0)
1990 goto fail;
1991
de190aef 1992 r = journal_file_map_data_hash_table(f);
cec736d2
LP
1993 if (r < 0)
1994 goto fail;
1995
1996 if (ret)
1997 *ret = f;
1998
1999 return 0;
2000
2001fail:
2002 journal_file_close(f);
2003
2004 return r;
2005}
0ac38b70
LP
2006
2007int journal_file_rotate(JournalFile **f) {
2008 char *p;
2009 size_t l;
2010 JournalFile *old_file, *new_file = NULL;
2011 int r;
2012
2013 assert(f);
2014 assert(*f);
2015
2016 old_file = *f;
2017
2018 if (!old_file->writable)
2019 return -EINVAL;
2020
2021 if (!endswith(old_file->path, ".journal"))
2022 return -EINVAL;
2023
2024 l = strlen(old_file->path);
2025
9447a7f1 2026 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
0ac38b70
LP
2027 if (!p)
2028 return -ENOMEM;
2029
2030 memcpy(p, old_file->path, l - 8);
2031 p[l-8] = '@';
2032 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2033 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2034 "-%016llx-%016llx.journal",
2035 (unsigned long long) le64toh((*f)->header->seqnum),
2036 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2037
2038 r = rename(old_file->path, p);
2039 free(p);
2040
2041 if (r < 0)
2042 return -errno;
2043
ccdbaf91 2044 old_file->header->state = STATE_ARCHIVED;
0ac38b70
LP
2045
2046 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, old_file, &new_file);
2047 journal_file_close(old_file);
2048
2049 *f = new_file;
2050 return r;
2051}
2052
9447a7f1
LP
2053int journal_file_open_reliably(
2054 const char *fname,
2055 int flags,
2056 mode_t mode,
2057 JournalFile *template,
2058 JournalFile **ret) {
2059
2060 int r;
2061 size_t l;
2062 char *p;
2063
2064 r = journal_file_open(fname, flags, mode, template, ret);
0071d9f1
LP
2065 if (r != -EBADMSG && /* corrupted */
2066 r != -ENODATA && /* truncated */
2067 r != -EHOSTDOWN && /* other machine */
2068 r != -EPROTONOSUPPORT) /* incompatible feature */
9447a7f1
LP
2069 return r;
2070
2071 if ((flags & O_ACCMODE) == O_RDONLY)
2072 return r;
2073
2074 if (!(flags & O_CREAT))
2075 return r;
2076
5c70eab4
LP
2077 /* The file is corrupted. Rotate it away and try it again (but only once) */
2078
9447a7f1
LP
2079 l = strlen(fname);
2080 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2081 (int) (l-8), fname,
2082 (unsigned long long) now(CLOCK_REALTIME),
2083 random_ull()) < 0)
2084 return -ENOMEM;
2085
2086 r = rename(fname, p);
2087 free(p);
2088 if (r < 0)
2089 return -errno;
2090
2091 log_warning("File %s corrupted, renaming and replacing.", fname);
2092
2093 return journal_file_open(fname, flags, mode, template, ret);
2094}
2095
0ac38b70
LP
2096struct vacuum_info {
2097 off_t usage;
2098 char *filename;
2099
2100 uint64_t realtime;
2101 sd_id128_t seqnum_id;
2102 uint64_t seqnum;
5c70eab4
LP
2103
2104 bool have_seqnum;
0ac38b70
LP
2105};
2106
2107static int vacuum_compare(const void *_a, const void *_b) {
2108 const struct vacuum_info *a, *b;
2109
2110 a = _a;
2111 b = _b;
2112
5c70eab4
LP
2113 if (a->have_seqnum && b->have_seqnum &&
2114 sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
0ac38b70
LP
2115 if (a->seqnum < b->seqnum)
2116 return -1;
2117 else if (a->seqnum > b->seqnum)
2118 return 1;
2119 else
2120 return 0;
2121 }
2122
2123 if (a->realtime < b->realtime)
2124 return -1;
2125 else if (a->realtime > b->realtime)
2126 return 1;
5c70eab4 2127 else if (a->have_seqnum && b->have_seqnum)
0ac38b70 2128 return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
5c70eab4
LP
2129 else
2130 return strcmp(a->filename, b->filename);
0ac38b70
LP
2131}
2132
2133int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
2134 DIR *d;
2135 int r = 0;
2136 struct vacuum_info *list = NULL;
2137 unsigned n_list = 0, n_allocated = 0, i;
2138 uint64_t sum = 0;
2139
2140 assert(directory);
2141
2142 if (max_use <= 0)
babfc091 2143 return 0;
0ac38b70
LP
2144
2145 d = opendir(directory);
2146 if (!d)
2147 return -errno;
2148
2149 for (;;) {
2150 int k;
2151 struct dirent buf, *de;
2152 size_t q;
2153 struct stat st;
2154 char *p;
7ea07dcd 2155 unsigned long long seqnum = 0, realtime;
0ac38b70 2156 sd_id128_t seqnum_id;
5c70eab4 2157 bool have_seqnum;
0ac38b70
LP
2158
2159 k = readdir_r(d, &buf, &de);
2160 if (k != 0) {
2161 r = -k;
2162 goto finish;
2163 }
2164
2165 if (!de)
2166 break;
2167
5c70eab4
LP
2168 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
2169 continue;
2170
2171 if (!S_ISREG(st.st_mode))
0ac38b70
LP
2172 continue;
2173
2174 q = strlen(de->d_name);
2175
5c70eab4 2176 if (endswith(de->d_name, ".journal")) {
0ac38b70 2177
5c70eab4 2178 /* Vacuum archived files */
0ac38b70 2179
5c70eab4
LP
2180 if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
2181 continue;
0ac38b70 2182
5c70eab4
LP
2183 if (de->d_name[q-8-16-1] != '-' ||
2184 de->d_name[q-8-16-1-16-1] != '-' ||
2185 de->d_name[q-8-16-1-16-1-32-1] != '@')
2186 continue;
0ac38b70 2187
5c70eab4
LP
2188 p = strdup(de->d_name);
2189 if (!p) {
2190 r = -ENOMEM;
2191 goto finish;
2192 }
0ac38b70 2193
5c70eab4
LP
2194 de->d_name[q-8-16-1-16-1] = 0;
2195 if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
2196 free(p);
2197 continue;
2198 }
2199
2200 if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
2201 free(p);
2202 continue;
2203 }
2204
2205 have_seqnum = true;
2206
2207 } else if (endswith(de->d_name, ".journal~")) {
2208 unsigned long long tmp;
2209
2210 /* Vacuum corrupted files */
2211
2212 if (q < 1 + 16 + 1 + 16 + 8 + 1)
2213 continue;
0ac38b70 2214
5c70eab4
LP
2215 if (de->d_name[q-1-8-16-1] != '-' ||
2216 de->d_name[q-1-8-16-1-16-1] != '@')
2217 continue;
2218
2219 p = strdup(de->d_name);
2220 if (!p) {
2221 r = -ENOMEM;
2222 goto finish;
2223 }
2224
2225 if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
2226 free(p);
2227 continue;
2228 }
2229
2230 have_seqnum = false;
2231 } else
0ac38b70 2232 continue;
0ac38b70
LP
2233
2234 if (n_list >= n_allocated) {
2235 struct vacuum_info *j;
2236
2237 n_allocated = MAX(n_allocated * 2U, 8U);
2238 j = realloc(list, n_allocated * sizeof(struct vacuum_info));
2239 if (!j) {
2240 free(p);
2241 r = -ENOMEM;
2242 goto finish;
2243 }
2244
2245 list = j;
2246 }
2247
2248 list[n_list].filename = p;
a3a52c0f 2249 list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
0ac38b70
LP
2250 list[n_list].seqnum = seqnum;
2251 list[n_list].realtime = realtime;
2252 list[n_list].seqnum_id = seqnum_id;
5c70eab4 2253 list[n_list].have_seqnum = have_seqnum;
0ac38b70
LP
2254
2255 sum += list[n_list].usage;
2256
2257 n_list ++;
2258 }
2259
2260 qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
2261
2262 for(i = 0; i < n_list; i++) {
2263 struct statvfs ss;
2264
2265 if (fstatvfs(dirfd(d), &ss) < 0) {
2266 r = -errno;
2267 goto finish;
2268 }
2269
2270 if (sum <= max_use &&
2271 (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
2272 break;
2273
2274 if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
e7bf07b3 2275 log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
0ac38b70
LP
2276 sum -= list[i].usage;
2277 } else if (errno != ENOENT)
2278 log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
2279 }
2280
2281finish:
2282 for (i = 0; i < n_list; i++)
2283 free(list[i].filename);
2284
2285 free(list);
2286
de190aef
LP
2287 if (d)
2288 closedir(d);
2289
0ac38b70
LP
2290 return r;
2291}
cf244689
LP
2292
2293int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2294 uint64_t i, n;
2295 uint64_t q, xor_hash = 0;
2296 int r;
2297 EntryItem *items;
2298 dual_timestamp ts;
2299
2300 assert(from);
2301 assert(to);
2302 assert(o);
2303 assert(p);
2304
2305 if (!to->writable)
2306 return -EPERM;
2307
2308 ts.monotonic = le64toh(o->entry.monotonic);
2309 ts.realtime = le64toh(o->entry.realtime);
2310
2311 if (to->tail_entry_monotonic_valid &&
2312 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2313 return -EINVAL;
2314
cf244689
LP
2315 n = journal_file_entry_n_items(o);
2316 items = alloca(sizeof(EntryItem) * n);
2317
2318 for (i = 0; i < n; i++) {
4fd052ae
FC
2319 uint64_t l, h;
2320 le64_t le_hash;
cf244689
LP
2321 size_t t;
2322 void *data;
2323 Object *u;
2324
2325 q = le64toh(o->entry.items[i].object_offset);
2326 le_hash = o->entry.items[i].hash;
2327
2328 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2329 if (r < 0)
2330 return r;
2331
2332 if (le_hash != o->data.hash)
2333 return -EBADMSG;
2334
2335 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2336 t = (size_t) l;
2337
2338 /* We hit the limit on 32bit machines */
2339 if ((uint64_t) t != l)
2340 return -E2BIG;
2341
2342 if (o->object.flags & OBJECT_COMPRESSED) {
2343#ifdef HAVE_XZ
2344 uint64_t rsize;
2345
2346 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2347 return -EBADMSG;
2348
2349 data = from->compress_buffer;
2350 l = rsize;
2351#else
2352 return -EPROTONOSUPPORT;
2353#endif
2354 } else
2355 data = o->data.payload;
2356
2357 r = journal_file_append_data(to, data, l, &u, &h);
2358 if (r < 0)
2359 return r;
2360
2361 xor_hash ^= le64toh(u->data.hash);
2362 items[i].object_offset = htole64(h);
2363 items[i].hash = u->data.hash;
2364
2365 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2366 if (r < 0)
2367 return r;
2368 }
2369
2370 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2371}
babfc091
LP
2372
2373void journal_default_metrics(JournalMetrics *m, int fd) {
2374 uint64_t fs_size = 0;
2375 struct statvfs ss;
a7bc2c2a 2376 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2377
2378 assert(m);
2379 assert(fd >= 0);
2380
2381 if (fstatvfs(fd, &ss) >= 0)
2382 fs_size = ss.f_frsize * ss.f_blocks;
2383
2384 if (m->max_use == (uint64_t) -1) {
2385
2386 if (fs_size > 0) {
2387 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2388
2389 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2390 m->max_use = DEFAULT_MAX_USE_UPPER;
2391
2392 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2393 m->max_use = DEFAULT_MAX_USE_LOWER;
2394 } else
2395 m->max_use = DEFAULT_MAX_USE_LOWER;
2396 } else {
2397 m->max_use = PAGE_ALIGN(m->max_use);
2398
2399 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2400 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2401 }
2402
2403 if (m->max_size == (uint64_t) -1) {
2404 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2405
2406 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2407 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2408 } else
2409 m->max_size = PAGE_ALIGN(m->max_size);
2410
2411 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2412 m->max_size = JOURNAL_FILE_SIZE_MIN;
2413
2414 if (m->max_size*2 > m->max_use)
2415 m->max_use = m->max_size*2;
2416
2417 if (m->min_size == (uint64_t) -1)
2418 m->min_size = JOURNAL_FILE_SIZE_MIN;
2419 else {
2420 m->min_size = PAGE_ALIGN(m->min_size);
2421
2422 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2423 m->min_size = JOURNAL_FILE_SIZE_MIN;
2424
2425 if (m->min_size > m->max_size)
2426 m->max_size = m->min_size;
2427 }
2428
2429 if (m->keep_free == (uint64_t) -1) {
2430
2431 if (fs_size > 0) {
2432 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2433
2434 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2435 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2436
2437 } else
2438 m->keep_free = DEFAULT_KEEP_FREE;
2439 }
2440
e7bf07b3
LP
2441 log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2442 format_bytes(a, sizeof(a), m->max_use),
2443 format_bytes(b, sizeof(b), m->max_size),
2444 format_bytes(c, sizeof(c), m->min_size),
2445 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2446}
08984293
LP
2447
2448int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2449 Object *o;
2450 int r;
2451
2452 assert(f);
2453 assert(from || to);
2454
2455 if (from) {
2456 r = journal_file_next_entry(f, NULL, 0, DIRECTION_DOWN, &o, NULL);
2457 if (r <= 0)
2458 return r;
2459
2460 *from = le64toh(o->entry.realtime);
2461 }
2462
2463 if (to) {
2464 r = journal_file_next_entry(f, NULL, 0, DIRECTION_UP, &o, NULL);
2465 if (r <= 0)
2466 return r;
2467
2468 *to = le64toh(o->entry.realtime);
2469 }
2470
2471 return 1;
2472}
2473
2474int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2475 char t[9+32+1] = "_BOOT_ID=";
2476 Object *o;
2477 uint64_t p;
2478 int r;
2479
2480 assert(f);
2481 assert(from || to);
2482
2483 sd_id128_to_string(boot_id, t + 9);
2484
2485 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2486 if (r <= 0)
2487 return r;
2488
2489 if (le64toh(o->data.n_entries) <= 0)
2490 return 0;
2491
2492 if (from) {
2493 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2494 if (r < 0)
2495 return r;
2496
2497 *from = le64toh(o->entry.monotonic);
2498 }
2499
2500 if (to) {
2501 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2502 if (r < 0)
2503 return r;
2504
2505 r = generic_array_get_plus_one(f,
2506 le64toh(o->data.entry_offset),
2507 le64toh(o->data.entry_array_offset),
2508 le64toh(o->data.n_entries)-1,
2509 &o, NULL);
2510 if (r <= 0)
2511 return r;
2512
2513 *to = le64toh(o->entry.monotonic);
2514 }
2515
2516 return 1;
2517}