]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journal-file.c
relicense to LGPLv2.1 (with exceptions)
[thirdparty/systemd.git] / src / journal / journal-file.c
CommitLineData
cec736d2
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
cec736d2
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
cec736d2 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
cec736d2
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mman.h>
23#include <errno.h>
24#include <sys/uio.h>
25#include <unistd.h>
26#include <sys/statvfs.h>
27#include <fcntl.h>
28#include <stddef.h>
29
30#include "journal-def.h"
31#include "journal-file.h"
32#include "lookup3.h"
807e17f0 33#include "compress.h"
cec736d2 34
de190aef
LP
35#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*16ULL)
36#define DEFAULT_FIELD_HASH_TABLE_SIZE (2047ULL*16ULL)
cec736d2 37
1fa80181 38#define DEFAULT_WINDOW_SIZE (8ULL*1024ULL*1024ULL)
cec736d2 39
be19b7df 40#define COMPRESSION_SIZE_THRESHOLD (512ULL)
807e17f0 41
babfc091 42/* This is the minimum journal file size */
b47ffcfd 43#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
babfc091
LP
44
45/* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
49
50/* This is the upper bound if we deduce max_size from max_use */
71100051 51#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
babfc091
LP
52
53/* This is the upper bound if we deduce the keep_free value from the
54 * file system size */
55#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57/* This is the keep_free value when we can't determine the system
58 * size */
59#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
60
cec736d2
LP
61static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' };
62
63#define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
64
65void journal_file_close(JournalFile *f) {
de190aef 66 int t;
cec736d2 67
de190aef 68 assert(f);
cec736d2 69
de190aef
LP
70 if (f->header && f->writable)
71 f->header->state = STATE_OFFLINE;
cec736d2 72
cec736d2 73
de190aef
LP
74 for (t = 0; t < _WINDOW_MAX; t++)
75 if (f->windows[t].ptr)
76 munmap(f->windows[t].ptr, f->windows[t].size);
cec736d2 77
0ac38b70
LP
78 if (f->fd >= 0)
79 close_nointr_nofail(f->fd);
80
cec736d2 81 free(f->path);
807e17f0
LP
82
83#ifdef HAVE_XZ
84 free(f->compress_buffer);
85#endif
86
cec736d2
LP
87 free(f);
88}
89
0ac38b70 90static int journal_file_init_header(JournalFile *f, JournalFile *template) {
cec736d2
LP
91 Header h;
92 ssize_t k;
93 int r;
94
95 assert(f);
96
97 zero(h);
98 memcpy(h.signature, signature, 8);
99 h.arena_offset = htole64(ALIGN64(sizeof(h)));
cec736d2
LP
100
101 r = sd_id128_randomize(&h.file_id);
102 if (r < 0)
103 return r;
104
0ac38b70
LP
105 if (template) {
106 h.seqnum_id = template->header->seqnum_id;
107 h.seqnum = template->header->seqnum;
108 } else
109 h.seqnum_id = h.file_id;
cec736d2
LP
110
111 k = pwrite(f->fd, &h, sizeof(h), 0);
112 if (k < 0)
113 return -errno;
114
115 if (k != sizeof(h))
116 return -EIO;
117
118 return 0;
119}
120
121static int journal_file_refresh_header(JournalFile *f) {
122 int r;
de190aef 123 sd_id128_t boot_id;
cec736d2
LP
124
125 assert(f);
126
127 r = sd_id128_get_machine(&f->header->machine_id);
128 if (r < 0)
129 return r;
130
de190aef 131 r = sd_id128_get_boot(&boot_id);
cec736d2
LP
132 if (r < 0)
133 return r;
134
de190aef
LP
135 if (sd_id128_equal(boot_id, f->header->boot_id))
136 f->tail_entry_monotonic_valid = true;
137
138 f->header->boot_id = boot_id;
139
140 f->header->state = STATE_ONLINE;
b788cc23
LP
141
142 __sync_synchronize();
143
cec736d2
LP
144 return 0;
145}
146
147static int journal_file_verify_header(JournalFile *f) {
148 assert(f);
149
150 if (memcmp(f->header, signature, 8))
151 return -EBADMSG;
152
807e17f0
LP
153#ifdef HAVE_XZ
154 if ((le64toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
155 return -EPROTONOSUPPORT;
156#else
cec736d2
LP
157 if (f->header->incompatible_flags != 0)
158 return -EPROTONOSUPPORT;
807e17f0 159#endif
cec736d2
LP
160
161 if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->arena_offset) + le64toh(f->header->arena_size)))
162 return -ENODATA;
163
164 if (f->writable) {
ccdbaf91 165 uint8_t state;
cec736d2
LP
166 sd_id128_t machine_id;
167 int r;
168
169 r = sd_id128_get_machine(&machine_id);
170 if (r < 0)
171 return r;
172
173 if (!sd_id128_equal(machine_id, f->header->machine_id))
174 return -EHOSTDOWN;
175
de190aef 176 state = f->header->state;
cec736d2
LP
177
178 if (state == STATE_ONLINE)
179 log_debug("Journal file %s is already online. Assuming unclean closing. Ignoring.", f->path);
180 else if (state == STATE_ARCHIVED)
181 return -ESHUTDOWN;
182 else if (state != STATE_OFFLINE)
183 log_debug("Journal file %s has unknown state %u. Ignoring.", f->path, state);
184 }
185
186 return 0;
187}
188
189static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
cec736d2
LP
190 uint64_t old_size, new_size;
191
192 assert(f);
193
cec736d2 194 /* We assume that this file is not sparse, and we know that
38ac38b2 195 * for sure, since we always call posix_fallocate()
cec736d2
LP
196 * ourselves */
197
198 old_size =
199 le64toh(f->header->arena_offset) +
200 le64toh(f->header->arena_size);
201
bc85bfee
LP
202 new_size = PAGE_ALIGN(offset + size);
203 if (new_size < le64toh(f->header->arena_offset))
204 new_size = le64toh(f->header->arena_offset);
205
206 if (new_size <= old_size)
cec736d2
LP
207 return 0;
208
bc85bfee
LP
209 if (f->metrics.max_size > 0 &&
210 new_size > f->metrics.max_size)
211 return -E2BIG;
cec736d2 212
bc85bfee
LP
213 if (new_size > f->metrics.min_size &&
214 f->metrics.keep_free > 0) {
cec736d2
LP
215 struct statvfs svfs;
216
217 if (fstatvfs(f->fd, &svfs) >= 0) {
218 uint64_t available;
219
220 available = svfs.f_bfree * svfs.f_bsize;
221
bc85bfee
LP
222 if (available >= f->metrics.keep_free)
223 available -= f->metrics.keep_free;
cec736d2
LP
224 else
225 available = 0;
226
227 if (new_size - old_size > available)
228 return -E2BIG;
229 }
230 }
231
bc85bfee
LP
232 /* Note that the glibc fallocate() fallback is very
233 inefficient, hence we try to minimize the allocation area
234 as we can. */
38ac38b2 235 if (posix_fallocate(f->fd, old_size, new_size - old_size) < 0)
cec736d2
LP
236 return -errno;
237
238 if (fstat(f->fd, &f->last_stat) < 0)
239 return -errno;
240
8dc6b88f 241 f->header->arena_size = htole64(new_size - le64toh(f->header->arena_offset));
cec736d2
LP
242
243 return 0;
244}
245
246static int journal_file_map(
247 JournalFile *f,
248 uint64_t offset,
249 uint64_t size,
250 void **_window,
251 uint64_t *_woffset,
252 uint64_t *_wsize,
253 void **ret) {
254
255 uint64_t woffset, wsize;
256 void *window;
257
258 assert(f);
259 assert(size > 0);
260 assert(ret);
261
262 woffset = offset & ~((uint64_t) page_size() - 1ULL);
263 wsize = size + (offset - woffset);
264 wsize = PAGE_ALIGN(wsize);
265
2a59ea54
LP
266 /* Avoid SIGBUS on invalid accesses */
267 if (woffset + wsize > (uint64_t) PAGE_ALIGN(f->last_stat.st_size))
268 return -EADDRNOTAVAIL;
269
cec736d2
LP
270 window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset);
271 if (window == MAP_FAILED)
272 return -errno;
273
274 if (_window)
275 *_window = window;
276
277 if (_woffset)
278 *_woffset = woffset;
279
280 if (_wsize)
281 *_wsize = wsize;
282
283 *ret = (uint8_t*) window + (offset - woffset);
284
285 return 0;
286}
287
de190aef 288static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_t size, void **ret) {
6c8a39b8 289 void *p = NULL;
cec736d2
LP
290 uint64_t delta;
291 int r;
de190aef 292 Window *w;
cec736d2
LP
293
294 assert(f);
295 assert(ret);
de190aef
LP
296 assert(wt >= 0);
297 assert(wt < _WINDOW_MAX);
cec736d2 298
4bbdcdb3
LP
299 if (offset + size > (uint64_t) f->last_stat.st_size) {
300 /* Hmm, out of range? Let's refresh the fstat() data
301 * first, before we trust that check. */
302
303 if (fstat(f->fd, &f->last_stat) < 0 ||
304 offset + size > (uint64_t) f->last_stat.st_size)
305 return -EADDRNOTAVAIL;
306 }
307
de190aef 308 w = f->windows + wt;
cec736d2 309
de190aef
LP
310 if (_likely_(w->ptr &&
311 w->offset <= offset &&
312 w->offset + w->size >= offset + size)) {
313
314 *ret = (uint8_t*) w->ptr + (offset - w->offset);
cec736d2
LP
315 return 0;
316 }
317
de190aef
LP
318 if (w->ptr) {
319 if (munmap(w->ptr, w->size) < 0)
cec736d2
LP
320 return -errno;
321
de190aef
LP
322 w->ptr = NULL;
323 w->size = w->offset = 0;
cec736d2
LP
324 }
325
326 if (size < DEFAULT_WINDOW_SIZE) {
327 /* If the default window size is larger then what was
328 * asked for extend the mapping a bit in the hope to
329 * minimize needed remappings later on. We add half
330 * the window space before and half behind the
331 * requested mapping */
332
1921a5cb 333 delta = (DEFAULT_WINDOW_SIZE - size) / 2;
cec736d2 334
a99c349d 335 if (delta > offset)
cec736d2
LP
336 delta = offset;
337
338 offset -= delta;
a99c349d 339 size = DEFAULT_WINDOW_SIZE;
cec736d2
LP
340 } else
341 delta = 0;
342
2a59ea54 343 if (offset + size > (uint64_t) f->last_stat.st_size)
1921a5cb 344 size = (uint64_t) f->last_stat.st_size - offset;
2a59ea54
LP
345
346 if (size <= 0)
347 return -EADDRNOTAVAIL;
348
cec736d2
LP
349 r = journal_file_map(f,
350 offset, size,
de190aef
LP
351 &w->ptr, &w->offset, &w->size,
352 &p);
cec736d2
LP
353
354 if (r < 0)
355 return r;
356
357 *ret = (uint8_t*) p + delta;
358 return 0;
359}
360
361static bool verify_hash(Object *o) {
de190aef 362 uint64_t h1, h2;
cec736d2
LP
363
364 assert(o);
365
807e17f0 366 if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
cec736d2 367 h1 = le64toh(o->data.hash);
de190aef
LP
368 h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
369 } else if (o->object.type == OBJECT_FIELD) {
370 h1 = le64toh(o->field.hash);
371 h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
372 } else
373 return true;
cec736d2 374
de190aef 375 return h1 == h2;
cec736d2
LP
376}
377
de190aef 378int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
cec736d2
LP
379 int r;
380 void *t;
381 Object *o;
382 uint64_t s;
383
384 assert(f);
385 assert(ret);
de190aef 386 assert(type < _OBJECT_TYPE_MAX);
cec736d2 387
de190aef 388 r = journal_file_move_to(f, type >= 0 ? type : WINDOW_UNKNOWN, offset, sizeof(ObjectHeader), &t);
cec736d2
LP
389 if (r < 0)
390 return r;
391
392 o = (Object*) t;
393 s = le64toh(o->object.size);
394
395 if (s < sizeof(ObjectHeader))
396 return -EBADMSG;
397
de190aef 398 if (type >= 0 && o->object.type != type)
cec736d2
LP
399 return -EBADMSG;
400
401 if (s > sizeof(ObjectHeader)) {
de190aef 402 r = journal_file_move_to(f, o->object.type, offset, s, &t);
cec736d2
LP
403 if (r < 0)
404 return r;
405
406 o = (Object*) t;
407 }
408
409 if (!verify_hash(o))
410 return -EBADMSG;
411
412 *ret = o;
413 return 0;
414}
415
c2373f84 416static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) {
cec736d2
LP
417 uint64_t r;
418
419 assert(f);
420
421 r = le64toh(f->header->seqnum) + 1;
c2373f84
LP
422
423 if (seqnum) {
de190aef 424 /* If an external seqnum counter was passed, we update
c2373f84
LP
425 * both the local and the external one, and set it to
426 * the maximum of both */
427
428 if (*seqnum + 1 > r)
429 r = *seqnum + 1;
430
431 *seqnum = r;
432 }
433
cec736d2
LP
434 f->header->seqnum = htole64(r);
435
de190aef
LP
436 if (f->header->first_seqnum == 0)
437 f->header->first_seqnum = htole64(r);
438
cec736d2
LP
439 return r;
440}
441
de190aef 442static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
cec736d2
LP
443 int r;
444 uint64_t p;
445 Object *tail, *o;
446 void *t;
447
448 assert(f);
449 assert(size >= sizeof(ObjectHeader));
450 assert(offset);
451 assert(ret);
452
453 p = le64toh(f->header->tail_object_offset);
cec736d2
LP
454 if (p == 0)
455 p = le64toh(f->header->arena_offset);
456 else {
de190aef 457 r = journal_file_move_to_object(f, -1, p, &tail);
cec736d2
LP
458 if (r < 0)
459 return r;
460
461 p += ALIGN64(le64toh(tail->object.size));
462 }
463
464 r = journal_file_allocate(f, p, size);
465 if (r < 0)
466 return r;
467
de190aef 468 r = journal_file_move_to(f, type, p, size, &t);
cec736d2
LP
469 if (r < 0)
470 return r;
471
472 o = (Object*) t;
473
474 zero(o->object);
de190aef 475 o->object.type = type;
cec736d2
LP
476 o->object.size = htole64(size);
477
478 f->header->tail_object_offset = htole64(p);
cec736d2
LP
479 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
480
481 *ret = o;
482 *offset = p;
483
484 return 0;
485}
486
de190aef 487static int journal_file_setup_data_hash_table(JournalFile *f) {
cec736d2
LP
488 uint64_t s, p;
489 Object *o;
490 int r;
491
492 assert(f);
493
de190aef
LP
494 s = DEFAULT_DATA_HASH_TABLE_SIZE;
495 r = journal_file_append_object(f,
496 OBJECT_DATA_HASH_TABLE,
497 offsetof(Object, hash_table.items) + s,
498 &o, &p);
cec736d2
LP
499 if (r < 0)
500 return r;
501
de190aef 502 memset(o->hash_table.items, 0, s);
cec736d2 503
de190aef
LP
504 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
505 f->header->data_hash_table_size = htole64(s);
cec736d2
LP
506
507 return 0;
508}
509
de190aef 510static int journal_file_setup_field_hash_table(JournalFile *f) {
cec736d2
LP
511 uint64_t s, p;
512 Object *o;
513 int r;
514
515 assert(f);
516
de190aef
LP
517 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
518 r = journal_file_append_object(f,
519 OBJECT_FIELD_HASH_TABLE,
520 offsetof(Object, hash_table.items) + s,
521 &o, &p);
cec736d2
LP
522 if (r < 0)
523 return r;
524
de190aef 525 memset(o->hash_table.items, 0, s);
cec736d2 526
de190aef
LP
527 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
528 f->header->field_hash_table_size = htole64(s);
cec736d2
LP
529
530 return 0;
531}
532
de190aef 533static int journal_file_map_data_hash_table(JournalFile *f) {
cec736d2
LP
534 uint64_t s, p;
535 void *t;
536 int r;
537
538 assert(f);
539
de190aef
LP
540 p = le64toh(f->header->data_hash_table_offset);
541 s = le64toh(f->header->data_hash_table_size);
cec736d2 542
de190aef
LP
543 r = journal_file_move_to(f,
544 WINDOW_DATA_HASH_TABLE,
545 p, s,
546 &t);
cec736d2
LP
547 if (r < 0)
548 return r;
549
de190aef 550 f->data_hash_table = t;
cec736d2
LP
551 return 0;
552}
553
de190aef 554static int journal_file_map_field_hash_table(JournalFile *f) {
cec736d2
LP
555 uint64_t s, p;
556 void *t;
557 int r;
558
559 assert(f);
560
de190aef
LP
561 p = le64toh(f->header->field_hash_table_offset);
562 s = le64toh(f->header->field_hash_table_size);
cec736d2 563
de190aef
LP
564 r = journal_file_move_to(f,
565 WINDOW_FIELD_HASH_TABLE,
566 p, s,
567 &t);
cec736d2
LP
568 if (r < 0)
569 return r;
570
de190aef 571 f->field_hash_table = t;
cec736d2
LP
572 return 0;
573}
574
de190aef
LP
575static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
576 uint64_t p, h;
cec736d2
LP
577 int r;
578
579 assert(f);
580 assert(o);
581 assert(offset > 0);
de190aef 582 assert(o->object.type == OBJECT_DATA);
cec736d2 583
48496df6
LP
584 /* This might alter the window we are looking at */
585
de190aef
LP
586 o->data.next_hash_offset = o->data.next_field_offset = 0;
587 o->data.entry_offset = o->data.entry_array_offset = 0;
588 o->data.n_entries = 0;
cec736d2 589
de190aef
LP
590 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
591 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2
LP
592 if (p == 0) {
593 /* Only entry in the hash table is easy */
de190aef 594 f->data_hash_table[h].head_hash_offset = htole64(offset);
cec736d2 595 } else {
48496df6
LP
596 /* Move back to the previous data object, to patch in
597 * pointer */
cec736d2 598
de190aef 599 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
600 if (r < 0)
601 return r;
602
de190aef 603 o->data.next_hash_offset = htole64(offset);
cec736d2
LP
604 }
605
de190aef 606 f->data_hash_table[h].tail_hash_offset = htole64(offset);
cec736d2
LP
607
608 return 0;
609}
610
de190aef
LP
611int journal_file_find_data_object_with_hash(
612 JournalFile *f,
613 const void *data, uint64_t size, uint64_t hash,
614 Object **ret, uint64_t *offset) {
48496df6 615
de190aef 616 uint64_t p, osize, h;
cec736d2
LP
617 int r;
618
619 assert(f);
620 assert(data || size == 0);
621
622 osize = offsetof(Object, data.payload) + size;
623
bc85bfee
LP
624 if (f->header->data_hash_table_size == 0)
625 return -EBADMSG;
626
de190aef
LP
627 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
628 p = le64toh(f->data_hash_table[h].head_hash_offset);
cec736d2 629
de190aef
LP
630 while (p > 0) {
631 Object *o;
cec736d2 632
de190aef 633 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
634 if (r < 0)
635 return r;
636
807e17f0 637 if (le64toh(o->data.hash) != hash)
85a131e8 638 goto next;
807e17f0
LP
639
640 if (o->object.flags & OBJECT_COMPRESSED) {
641#ifdef HAVE_XZ
b785c858 642 uint64_t l, rsize;
cec736d2 643
807e17f0
LP
644 l = le64toh(o->object.size);
645 if (l <= offsetof(Object, data.payload))
cec736d2
LP
646 return -EBADMSG;
647
807e17f0
LP
648 l -= offsetof(Object, data.payload);
649
650 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
651 return -EBADMSG;
652
b785c858 653 if (rsize == size &&
807e17f0
LP
654 memcmp(f->compress_buffer, data, size) == 0) {
655
656 if (ret)
657 *ret = o;
658
659 if (offset)
660 *offset = p;
661
662 return 1;
663 }
664#else
665 return -EPROTONOSUPPORT;
666#endif
667
668 } else if (le64toh(o->object.size) == osize &&
669 memcmp(o->data.payload, data, size) == 0) {
670
cec736d2
LP
671 if (ret)
672 *ret = o;
673
674 if (offset)
675 *offset = p;
676
de190aef 677 return 1;
cec736d2
LP
678 }
679
85a131e8 680 next:
cec736d2
LP
681 p = le64toh(o->data.next_hash_offset);
682 }
683
de190aef
LP
684 return 0;
685}
686
687int journal_file_find_data_object(
688 JournalFile *f,
689 const void *data, uint64_t size,
690 Object **ret, uint64_t *offset) {
691
692 uint64_t hash;
693
694 assert(f);
695 assert(data || size == 0);
696
697 hash = hash64(data, size);
698
699 return journal_file_find_data_object_with_hash(f,
700 data, size, hash,
701 ret, offset);
702}
703
48496df6
LP
704static int journal_file_append_data(
705 JournalFile *f,
706 const void *data, uint64_t size,
707 Object **ret, uint64_t *offset) {
708
de190aef
LP
709 uint64_t hash, p;
710 uint64_t osize;
711 Object *o;
712 int r;
807e17f0 713 bool compressed = false;
de190aef
LP
714
715 assert(f);
716 assert(data || size == 0);
717
718 hash = hash64(data, size);
719
720 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
721 if (r < 0)
722 return r;
723 else if (r > 0) {
724
725 if (ret)
726 *ret = o;
727
728 if (offset)
729 *offset = p;
730
731 return 0;
732 }
733
734 osize = offsetof(Object, data.payload) + size;
735 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
cec736d2
LP
736 if (r < 0)
737 return r;
738
cec736d2 739 o->data.hash = htole64(hash);
807e17f0
LP
740
741#ifdef HAVE_XZ
742 if (f->compress &&
743 size >= COMPRESSION_SIZE_THRESHOLD) {
744 uint64_t rsize;
745
746 compressed = compress_blob(data, size, o->data.payload, &rsize);
747
748 if (compressed) {
749 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
750 o->object.flags |= OBJECT_COMPRESSED;
751
752 f->header->incompatible_flags = htole32(le32toh(f->header->incompatible_flags) | HEADER_INCOMPATIBLE_COMPRESSED);
753
754 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
755 }
756 }
757#endif
758
759 if (!compressed)
760 memcpy(o->data.payload, data, size);
cec736d2 761
de190aef 762 r = journal_file_link_data(f, o, p, hash);
cec736d2
LP
763 if (r < 0)
764 return r;
765
48496df6
LP
766 /* The linking might have altered the window, so let's
767 * refresh our pointer */
768 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
769 if (r < 0)
770 return r;
771
cec736d2
LP
772 if (ret)
773 *ret = o;
774
775 if (offset)
de190aef 776 *offset = p;
cec736d2
LP
777
778 return 0;
779}
780
781uint64_t journal_file_entry_n_items(Object *o) {
782 assert(o);
7be3aa17 783 assert(o->object.type == OBJECT_ENTRY);
cec736d2
LP
784
785 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
786}
787
de190aef
LP
788static uint64_t journal_file_entry_array_n_items(Object *o) {
789 assert(o);
7be3aa17 790 assert(o->object.type == OBJECT_ENTRY_ARRAY);
de190aef
LP
791
792 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
793}
794
795static int link_entry_into_array(JournalFile *f,
4fd052ae
FC
796 le64_t *first,
797 le64_t *idx,
de190aef 798 uint64_t p) {
cec736d2 799 int r;
de190aef
LP
800 uint64_t n = 0, ap = 0, q, i, a, hidx;
801 Object *o;
802
cec736d2 803 assert(f);
de190aef
LP
804 assert(first);
805 assert(idx);
806 assert(p > 0);
cec736d2 807
de190aef
LP
808 a = le64toh(*first);
809 i = hidx = le64toh(*idx);
810 while (a > 0) {
811
812 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
813 if (r < 0)
814 return r;
cec736d2 815
de190aef
LP
816 n = journal_file_entry_array_n_items(o);
817 if (i < n) {
818 o->entry_array.items[i] = htole64(p);
819 *idx = htole64(hidx + 1);
820 return 0;
821 }
cec736d2 822
de190aef
LP
823 i -= n;
824 ap = a;
825 a = le64toh(o->entry_array.next_entry_array_offset);
826 }
827
828 if (hidx > n)
829 n = (hidx+1) * 2;
830 else
831 n = n * 2;
832
833 if (n < 4)
834 n = 4;
835
836 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
837 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
838 &o, &q);
cec736d2
LP
839 if (r < 0)
840 return r;
841
de190aef 842 o->entry_array.items[i] = htole64(p);
cec736d2 843
de190aef 844 if (ap == 0)
7be3aa17 845 *first = htole64(q);
cec736d2 846 else {
de190aef 847 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
cec736d2
LP
848 if (r < 0)
849 return r;
850
de190aef
LP
851 o->entry_array.next_entry_array_offset = htole64(q);
852 }
cec736d2 853
de190aef
LP
854 *idx = htole64(hidx + 1);
855
856 return 0;
857}
cec736d2 858
de190aef 859static int link_entry_into_array_plus_one(JournalFile *f,
4fd052ae
FC
860 le64_t *extra,
861 le64_t *first,
862 le64_t *idx,
de190aef
LP
863 uint64_t p) {
864
865 int r;
866
867 assert(f);
868 assert(extra);
869 assert(first);
870 assert(idx);
871 assert(p > 0);
872
873 if (*idx == 0)
874 *extra = htole64(p);
875 else {
4fd052ae 876 le64_t i;
de190aef 877
7be3aa17 878 i = htole64(le64toh(*idx) - 1);
de190aef
LP
879 r = link_entry_into_array(f, first, &i, p);
880 if (r < 0)
881 return r;
cec736d2
LP
882 }
883
de190aef
LP
884 *idx = htole64(le64toh(*idx) + 1);
885 return 0;
886}
887
888static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
889 uint64_t p;
890 int r;
891 assert(f);
892 assert(o);
893 assert(offset > 0);
894
895 p = le64toh(o->entry.items[i].object_offset);
896 if (p == 0)
897 return -EINVAL;
898
899 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
cec736d2
LP
900 if (r < 0)
901 return r;
902
de190aef
LP
903 return link_entry_into_array_plus_one(f,
904 &o->data.entry_offset,
905 &o->data.entry_array_offset,
906 &o->data.n_entries,
907 offset);
cec736d2
LP
908}
909
910static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
de190aef 911 uint64_t n, i;
cec736d2
LP
912 int r;
913
914 assert(f);
915 assert(o);
916 assert(offset > 0);
de190aef 917 assert(o->object.type == OBJECT_ENTRY);
cec736d2 918
b788cc23
LP
919 __sync_synchronize();
920
cec736d2 921 /* Link up the entry itself */
de190aef
LP
922 r = link_entry_into_array(f,
923 &f->header->entry_array_offset,
924 &f->header->n_entries,
925 offset);
926 if (r < 0)
927 return r;
cec736d2 928
aaf53376 929 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
cec736d2 930
de190aef 931 if (f->header->head_entry_realtime == 0)
0ac38b70 932 f->header->head_entry_realtime = o->entry.realtime;
cec736d2 933
0ac38b70 934 f->header->tail_entry_realtime = o->entry.realtime;
de190aef
LP
935 f->header->tail_entry_monotonic = o->entry.monotonic;
936
937 f->tail_entry_monotonic_valid = true;
cec736d2
LP
938
939 /* Link up the items */
940 n = journal_file_entry_n_items(o);
941 for (i = 0; i < n; i++) {
942 r = journal_file_link_entry_item(f, o, offset, i);
943 if (r < 0)
944 return r;
945 }
946
cec736d2
LP
947 return 0;
948}
949
950static int journal_file_append_entry_internal(
951 JournalFile *f,
952 const dual_timestamp *ts,
953 uint64_t xor_hash,
954 const EntryItem items[], unsigned n_items,
de190aef 955 uint64_t *seqnum,
cec736d2
LP
956 Object **ret, uint64_t *offset) {
957 uint64_t np;
958 uint64_t osize;
959 Object *o;
960 int r;
961
962 assert(f);
963 assert(items || n_items == 0);
de190aef 964 assert(ts);
cec736d2
LP
965
966 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
967
de190aef 968 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
cec736d2
LP
969 if (r < 0)
970 return r;
971
de190aef 972 o->entry.seqnum = htole64(journal_file_seqnum(f, seqnum));
cec736d2 973 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
de190aef
LP
974 o->entry.realtime = htole64(ts->realtime);
975 o->entry.monotonic = htole64(ts->monotonic);
cec736d2
LP
976 o->entry.xor_hash = htole64(xor_hash);
977 o->entry.boot_id = f->header->boot_id;
978
979 r = journal_file_link_entry(f, o, np);
980 if (r < 0)
981 return r;
982
983 if (ret)
984 *ret = o;
985
986 if (offset)
987 *offset = np;
988
989 return 0;
990}
991
cf244689 992void journal_file_post_change(JournalFile *f) {
50f20cfd
LP
993 assert(f);
994
995 /* inotify() does not receive IN_MODIFY events from file
996 * accesses done via mmap(). After each access we hence
997 * trigger IN_MODIFY by truncating the journal file to its
998 * current size which triggers IN_MODIFY. */
999
bc85bfee
LP
1000 __sync_synchronize();
1001
50f20cfd
LP
1002 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1003 log_error("Failed to to truncate file to its own size: %m");
1004}
1005
de190aef 1006int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
cec736d2
LP
1007 unsigned i;
1008 EntryItem *items;
1009 int r;
1010 uint64_t xor_hash = 0;
de190aef 1011 struct dual_timestamp _ts;
cec736d2
LP
1012
1013 assert(f);
1014 assert(iovec || n_iovec == 0);
1015
de190aef
LP
1016 if (!f->writable)
1017 return -EPERM;
1018
1019 if (!ts) {
1020 dual_timestamp_get(&_ts);
1021 ts = &_ts;
1022 }
1023
1024 if (f->tail_entry_monotonic_valid &&
1025 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1026 return -EINVAL;
1027
cf244689 1028 items = alloca(sizeof(EntryItem) * n_iovec);
cec736d2
LP
1029
1030 for (i = 0; i < n_iovec; i++) {
1031 uint64_t p;
1032 Object *o;
1033
1034 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1035 if (r < 0)
cf244689 1036 return r;
cec736d2
LP
1037
1038 xor_hash ^= le64toh(o->data.hash);
1039 items[i].object_offset = htole64(p);
de7b95cd 1040 items[i].hash = o->data.hash;
cec736d2
LP
1041 }
1042
de190aef 1043 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
cec736d2 1044
50f20cfd
LP
1045 journal_file_post_change(f);
1046
cec736d2
LP
1047 return r;
1048}
1049
de190aef
LP
1050static int generic_array_get(JournalFile *f,
1051 uint64_t first,
1052 uint64_t i,
1053 Object **ret, uint64_t *offset) {
1054
cec736d2 1055 Object *o;
6c8a39b8 1056 uint64_t p = 0, a;
cec736d2
LP
1057 int r;
1058
1059 assert(f);
1060
de190aef
LP
1061 a = first;
1062 while (a > 0) {
1063 uint64_t n;
cec736d2 1064
de190aef
LP
1065 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1066 if (r < 0)
1067 return r;
cec736d2 1068
de190aef
LP
1069 n = journal_file_entry_array_n_items(o);
1070 if (i < n) {
1071 p = le64toh(o->entry_array.items[i]);
1072 break;
cec736d2
LP
1073 }
1074
de190aef
LP
1075 i -= n;
1076 a = le64toh(o->entry_array.next_entry_array_offset);
1077 }
1078
1079 if (a <= 0 || p <= 0)
1080 return 0;
1081
1082 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1083 if (r < 0)
1084 return r;
1085
1086 if (ret)
1087 *ret = o;
1088
1089 if (offset)
1090 *offset = p;
1091
1092 return 1;
1093}
1094
1095static int generic_array_get_plus_one(JournalFile *f,
1096 uint64_t extra,
1097 uint64_t first,
1098 uint64_t i,
1099 Object **ret, uint64_t *offset) {
1100
1101 Object *o;
1102
1103 assert(f);
1104
1105 if (i == 0) {
1106 int r;
1107
1108 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
cec736d2
LP
1109 if (r < 0)
1110 return r;
1111
de190aef
LP
1112 if (ret)
1113 *ret = o;
cec736d2 1114
de190aef
LP
1115 if (offset)
1116 *offset = extra;
cec736d2 1117
de190aef 1118 return 1;
cec736d2
LP
1119 }
1120
de190aef
LP
1121 return generic_array_get(f, first, i-1, ret, offset);
1122}
cec736d2 1123
de190aef
LP
1124enum {
1125 TEST_FOUND,
1126 TEST_LEFT,
1127 TEST_RIGHT
1128};
cec736d2 1129
de190aef
LP
1130static int generic_array_bisect(JournalFile *f,
1131 uint64_t first,
1132 uint64_t n,
1133 uint64_t needle,
1134 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1135 direction_t direction,
1136 Object **ret,
1137 uint64_t *offset,
1138 uint64_t *idx) {
1139
1140 uint64_t a, p, t = 0, i = 0, last_p = 0;
1141 bool subtract_one = false;
1142 Object *o, *array = NULL;
1143 int r;
cec736d2 1144
de190aef
LP
1145 assert(f);
1146 assert(test_object);
cec736d2 1147
de190aef
LP
1148 a = first;
1149 while (a > 0) {
1150 uint64_t left, right, k, lp;
1151
1152 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
cec736d2
LP
1153 if (r < 0)
1154 return r;
1155
de190aef
LP
1156 k = journal_file_entry_array_n_items(array);
1157 right = MIN(k, n);
1158 if (right <= 0)
1159 return 0;
cec736d2 1160
de190aef
LP
1161 i = right - 1;
1162 lp = p = le64toh(array->entry_array.items[i]);
1163 if (p <= 0)
1164 return -EBADMSG;
cec736d2 1165
de190aef
LP
1166 r = test_object(f, p, needle);
1167 if (r < 0)
1168 return r;
cec736d2 1169
de190aef
LP
1170 if (r == TEST_FOUND)
1171 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1172
1173 if (r == TEST_RIGHT) {
1174 left = 0;
1175 right -= 1;
1176 for (;;) {
1177 if (left == right) {
1178 if (direction == DIRECTION_UP)
1179 subtract_one = true;
1180
1181 i = left;
1182 goto found;
1183 }
1184
1185 assert(left < right);
1186
1187 i = (left + right) / 2;
1188 p = le64toh(array->entry_array.items[i]);
1189 if (p <= 0)
1190 return -EBADMSG;
1191
1192 r = test_object(f, p, needle);
1193 if (r < 0)
1194 return r;
cec736d2 1195
de190aef
LP
1196 if (r == TEST_FOUND)
1197 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1198
1199 if (r == TEST_RIGHT)
1200 right = i;
1201 else
1202 left = i + 1;
1203 }
1204 }
1205
1206 if (k > n)
cec736d2
LP
1207 return 0;
1208
de190aef
LP
1209 last_p = lp;
1210
1211 n -= k;
1212 t += k;
1213 a = le64toh(array->entry_array.next_entry_array_offset);
cec736d2
LP
1214 }
1215
1216 return 0;
de190aef
LP
1217
1218found:
1219 if (subtract_one && t == 0 && i == 0)
1220 return 0;
1221
1222 if (subtract_one && i == 0)
1223 p = last_p;
1224 else if (subtract_one)
1225 p = le64toh(array->entry_array.items[i-1]);
1226 else
1227 p = le64toh(array->entry_array.items[i]);
1228
1229 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1230 if (r < 0)
1231 return r;
1232
1233 if (ret)
1234 *ret = o;
1235
1236 if (offset)
1237 *offset = p;
1238
1239 if (idx)
1240 *idx = t + i - (subtract_one ? 1 : 0);
1241
1242 return 1;
cec736d2
LP
1243}
1244
de190aef
LP
1245static int generic_array_bisect_plus_one(JournalFile *f,
1246 uint64_t extra,
1247 uint64_t first,
1248 uint64_t n,
1249 uint64_t needle,
1250 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1251 direction_t direction,
1252 Object **ret,
1253 uint64_t *offset,
1254 uint64_t *idx) {
1255
cec736d2
LP
1256 int r;
1257
1258 assert(f);
de190aef 1259 assert(test_object);
cec736d2 1260
de190aef
LP
1261 if (n <= 0)
1262 return 0;
cec736d2 1263
de190aef
LP
1264 /* This bisects the array in object 'first', but first checks
1265 * an extra */
de190aef
LP
1266 r = test_object(f, extra, needle);
1267 if (r < 0)
1268 return r;
1269 else if (r == TEST_FOUND) {
1270 Object *o;
1271
1272 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1273 if (r < 0)
1274 return r;
1275
1276 if (ret)
1277 *ret = o;
cec736d2 1278
de190aef
LP
1279 if (offset)
1280 *offset = extra;
440ee366
LP
1281
1282 if (idx)
1283 *idx = 0;
1284
1285 return 1;
de190aef 1286 } else if (r == TEST_RIGHT)
cec736d2
LP
1287 return 0;
1288
de190aef
LP
1289 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1290
1291 if (r > 0)
1292 (*idx) ++;
1293
1294 return r;
1295}
1296
1297static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1298 Object *o;
1299 int r;
1300
1301 assert(f);
1302 assert(p > 0);
1303
1304 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
cec736d2
LP
1305 if (r < 0)
1306 return r;
1307
de190aef
LP
1308 if (le64toh(o->entry.seqnum) == needle)
1309 return TEST_FOUND;
1310 else if (le64toh(o->entry.seqnum) < needle)
1311 return TEST_LEFT;
1312 else
1313 return TEST_RIGHT;
1314}
cec736d2 1315
de190aef
LP
1316int journal_file_move_to_entry_by_seqnum(
1317 JournalFile *f,
1318 uint64_t seqnum,
1319 direction_t direction,
1320 Object **ret,
1321 uint64_t *offset) {
1322
1323 return generic_array_bisect(f,
1324 le64toh(f->header->entry_array_offset),
1325 le64toh(f->header->n_entries),
1326 seqnum,
1327 test_object_seqnum,
1328 direction,
1329 ret, offset, NULL);
1330}
cec736d2 1331
de190aef
LP
1332static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1333 Object *o;
1334 int r;
1335
1336 assert(f);
1337 assert(p > 0);
1338
1339 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1340 if (r < 0)
1341 return r;
1342
1343 if (le64toh(o->entry.realtime) == needle)
1344 return TEST_FOUND;
1345 else if (le64toh(o->entry.realtime) < needle)
1346 return TEST_LEFT;
1347 else
1348 return TEST_RIGHT;
cec736d2
LP
1349}
1350
de190aef
LP
1351int journal_file_move_to_entry_by_realtime(
1352 JournalFile *f,
1353 uint64_t realtime,
1354 direction_t direction,
1355 Object **ret,
1356 uint64_t *offset) {
1357
1358 return generic_array_bisect(f,
1359 le64toh(f->header->entry_array_offset),
1360 le64toh(f->header->n_entries),
1361 realtime,
1362 test_object_realtime,
1363 direction,
1364 ret, offset, NULL);
1365}
1366
1367static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1368 Object *o;
1369 int r;
1370
1371 assert(f);
1372 assert(p > 0);
1373
1374 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1375 if (r < 0)
1376 return r;
1377
1378 if (le64toh(o->entry.monotonic) == needle)
1379 return TEST_FOUND;
1380 else if (le64toh(o->entry.monotonic) < needle)
1381 return TEST_LEFT;
1382 else
1383 return TEST_RIGHT;
1384}
1385
1386int journal_file_move_to_entry_by_monotonic(
1387 JournalFile *f,
1388 sd_id128_t boot_id,
1389 uint64_t monotonic,
1390 direction_t direction,
1391 Object **ret,
1392 uint64_t *offset) {
1393
1394 char t[8+32+1] = "_BOOT_ID=";
1395 Object *o;
1396 int r;
1397
1398 sd_id128_to_string(boot_id, t + 8);
1399
1400 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1401 if (r < 0)
1402 return r;
1403 else if (r == 0)
1404 return -ENOENT;
1405
1406 return generic_array_bisect_plus_one(f,
1407 le64toh(o->data.entry_offset),
1408 le64toh(o->data.entry_array_offset),
1409 le64toh(o->data.n_entries),
1410 monotonic,
1411 test_object_monotonic,
1412 direction,
1413 ret, offset, NULL);
1414}
1415
1416static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1417 assert(f);
1418 assert(p > 0);
1419
1420 if (p == needle)
1421 return TEST_FOUND;
1422 else if (p < needle)
1423 return TEST_LEFT;
1424 else
1425 return TEST_RIGHT;
1426}
1427
1428int journal_file_next_entry(
1429 JournalFile *f,
1430 Object *o, uint64_t p,
1431 direction_t direction,
1432 Object **ret, uint64_t *offset) {
1433
1434 uint64_t i, n;
cec736d2
LP
1435 int r;
1436
1437 assert(f);
de190aef
LP
1438 assert(p > 0 || !o);
1439
1440 n = le64toh(f->header->n_entries);
1441 if (n <= 0)
1442 return 0;
cec736d2
LP
1443
1444 if (!o)
de190aef 1445 i = direction == DIRECTION_DOWN ? 0 : n - 1;
cec736d2 1446 else {
de190aef 1447 if (o->object.type != OBJECT_ENTRY)
cec736d2
LP
1448 return -EINVAL;
1449
de190aef
LP
1450 r = generic_array_bisect(f,
1451 le64toh(f->header->entry_array_offset),
1452 le64toh(f->header->n_entries),
1453 p,
1454 test_object_offset,
1455 DIRECTION_DOWN,
1456 NULL, NULL,
1457 &i);
1458 if (r <= 0)
1459 return r;
1460
1461 if (direction == DIRECTION_DOWN) {
1462 if (i >= n - 1)
1463 return 0;
1464
1465 i++;
1466 } else {
1467 if (i <= 0)
1468 return 0;
1469
1470 i--;
1471 }
cec736d2
LP
1472 }
1473
de190aef
LP
1474 /* And jump to it */
1475 return generic_array_get(f,
1476 le64toh(f->header->entry_array_offset),
1477 i,
1478 ret, offset);
1479}
cec736d2 1480
de190aef
LP
1481int journal_file_skip_entry(
1482 JournalFile *f,
1483 Object *o, uint64_t p,
1484 int64_t skip,
1485 Object **ret, uint64_t *offset) {
1486
1487 uint64_t i, n;
1488 int r;
1489
1490 assert(f);
1491 assert(o);
1492 assert(p > 0);
1493
1494 if (o->object.type != OBJECT_ENTRY)
1495 return -EINVAL;
1496
1497 r = generic_array_bisect(f,
1498 le64toh(f->header->entry_array_offset),
1499 le64toh(f->header->n_entries),
1500 p,
1501 test_object_offset,
1502 DIRECTION_DOWN,
1503 NULL, NULL,
1504 &i);
1505 if (r <= 0)
cec736d2
LP
1506 return r;
1507
de190aef
LP
1508 /* Calculate new index */
1509 if (skip < 0) {
1510 if ((uint64_t) -skip >= i)
1511 i = 0;
1512 else
1513 i = i - (uint64_t) -skip;
1514 } else
1515 i += (uint64_t) skip;
cec736d2 1516
de190aef
LP
1517 n = le64toh(f->header->n_entries);
1518 if (n <= 0)
1519 return -EBADMSG;
cec736d2 1520
de190aef
LP
1521 if (i >= n)
1522 i = n-1;
1523
1524 return generic_array_get(f,
1525 le64toh(f->header->entry_array_offset),
1526 i,
1527 ret, offset);
cec736d2
LP
1528}
1529
de190aef
LP
1530int journal_file_next_entry_for_data(
1531 JournalFile *f,
1532 Object *o, uint64_t p,
1533 uint64_t data_offset,
1534 direction_t direction,
1535 Object **ret, uint64_t *offset) {
1536
1537 uint64_t n, i;
cec736d2 1538 int r;
de190aef 1539 Object *d;
cec736d2
LP
1540
1541 assert(f);
de190aef 1542 assert(p > 0 || !o);
cec736d2 1543
de190aef 1544 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
466ccd92 1545 if (r < 0)
de190aef 1546 return r;
cec736d2 1547
de190aef
LP
1548 n = le64toh(d->data.n_entries);
1549 if (n <= 0)
1550 return n;
cec736d2 1551
de190aef
LP
1552 if (!o)
1553 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1554 else {
1555 if (o->object.type != OBJECT_ENTRY)
1556 return -EINVAL;
cec736d2 1557
de190aef
LP
1558 r = generic_array_bisect_plus_one(f,
1559 le64toh(d->data.entry_offset),
1560 le64toh(d->data.entry_array_offset),
1561 le64toh(d->data.n_entries),
1562 p,
1563 test_object_offset,
1564 DIRECTION_DOWN,
1565 NULL, NULL,
1566 &i);
1567
1568 if (r <= 0)
cec736d2
LP
1569 return r;
1570
de190aef
LP
1571 if (direction == DIRECTION_DOWN) {
1572 if (i >= n - 1)
1573 return 0;
cec736d2 1574
de190aef
LP
1575 i++;
1576 } else {
1577 if (i <= 0)
1578 return 0;
cec736d2 1579
de190aef
LP
1580 i--;
1581 }
cec736d2 1582
de190aef 1583 }
cec736d2 1584
de190aef
LP
1585 return generic_array_get_plus_one(f,
1586 le64toh(d->data.entry_offset),
1587 le64toh(d->data.entry_array_offset),
1588 i,
1589 ret, offset);
1590}
cec736d2 1591
de190aef
LP
1592int journal_file_move_to_entry_by_seqnum_for_data(
1593 JournalFile *f,
1594 uint64_t data_offset,
1595 uint64_t seqnum,
1596 direction_t direction,
1597 Object **ret, uint64_t *offset) {
cec736d2 1598
de190aef
LP
1599 Object *d;
1600 int r;
cec736d2 1601
de190aef
LP
1602 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1603 if (r <= 0)
1604 return r;
cec736d2 1605
de190aef
LP
1606 return generic_array_bisect_plus_one(f,
1607 le64toh(d->data.entry_offset),
1608 le64toh(d->data.entry_array_offset),
1609 le64toh(d->data.n_entries),
1610 seqnum,
1611 test_object_seqnum,
1612 direction,
1613 ret, offset, NULL);
1614}
cec736d2 1615
de190aef
LP
1616int journal_file_move_to_entry_by_realtime_for_data(
1617 JournalFile *f,
1618 uint64_t data_offset,
1619 uint64_t realtime,
1620 direction_t direction,
1621 Object **ret, uint64_t *offset) {
1622
1623 Object *d;
1624 int r;
1625
1626 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1627 if (r <= 0)
1628 return r;
1629
1630 return generic_array_bisect_plus_one(f,
1631 le64toh(d->data.entry_offset),
1632 le64toh(d->data.entry_array_offset),
1633 le64toh(d->data.n_entries),
1634 realtime,
1635 test_object_realtime,
1636 direction,
1637 ret, offset, NULL);
cec736d2
LP
1638}
1639
1640void journal_file_dump(JournalFile *f) {
1641 char a[33], b[33], c[33];
1642 Object *o;
1643 int r;
1644 uint64_t p;
1645
1646 assert(f);
1647
de190aef
LP
1648 printf("File Path: %s\n"
1649 "File ID: %s\n"
cec736d2
LP
1650 "Machine ID: %s\n"
1651 "Boot ID: %s\n"
de190aef
LP
1652 "Arena size: %llu\n"
1653 "Objects: %lu\n"
1654 "Entries: %lu\n",
1655 f->path,
cec736d2
LP
1656 sd_id128_to_string(f->header->file_id, a),
1657 sd_id128_to_string(f->header->machine_id, b),
1658 sd_id128_to_string(f->header->boot_id, c),
de190aef
LP
1659 (unsigned long long) le64toh(f->header->arena_size),
1660 (unsigned long) le64toh(f->header->n_objects),
1661 (unsigned long) le64toh(f->header->n_entries));
cec736d2 1662
de190aef 1663 p = le64toh(f->header->arena_offset);
cec736d2 1664 while (p != 0) {
de190aef 1665 r = journal_file_move_to_object(f, -1, p, &o);
cec736d2
LP
1666 if (r < 0)
1667 goto fail;
1668
1669 switch (o->object.type) {
1670
1671 case OBJECT_UNUSED:
1672 printf("Type: OBJECT_UNUSED\n");
1673 break;
1674
1675 case OBJECT_DATA:
1676 printf("Type: OBJECT_DATA\n");
1677 break;
1678
1679 case OBJECT_ENTRY:
3fbf9cbb
LP
1680 printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
1681 (unsigned long long) le64toh(o->entry.seqnum),
1682 (unsigned long long) le64toh(o->entry.monotonic),
1683 (unsigned long long) le64toh(o->entry.realtime));
cec736d2
LP
1684 break;
1685
de190aef
LP
1686 case OBJECT_FIELD_HASH_TABLE:
1687 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
cec736d2
LP
1688 break;
1689
de190aef
LP
1690 case OBJECT_DATA_HASH_TABLE:
1691 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1692 break;
1693
1694 case OBJECT_ENTRY_ARRAY:
1695 printf("Type: OBJECT_ENTRY_ARRAY\n");
cec736d2
LP
1696 break;
1697 }
1698
807e17f0
LP
1699 if (o->object.flags & OBJECT_COMPRESSED)
1700 printf("Flags: COMPRESSED\n");
1701
cec736d2
LP
1702 if (p == le64toh(f->header->tail_object_offset))
1703 p = 0;
1704 else
1705 p = p + ALIGN64(le64toh(o->object.size));
1706 }
1707
1708 return;
1709fail:
1710 log_error("File corrupt");
1711}
1712
1713int journal_file_open(
1714 const char *fname,
1715 int flags,
1716 mode_t mode,
0ac38b70 1717 JournalFile *template,
cec736d2
LP
1718 JournalFile **ret) {
1719
1720 JournalFile *f;
1721 int r;
1722 bool newly_created = false;
1723
1724 assert(fname);
1725
1726 if ((flags & O_ACCMODE) != O_RDONLY &&
1727 (flags & O_ACCMODE) != O_RDWR)
1728 return -EINVAL;
1729
9447a7f1
LP
1730 if (!endswith(fname, ".journal"))
1731 return -EINVAL;
1732
cec736d2
LP
1733 f = new0(JournalFile, 1);
1734 if (!f)
1735 return -ENOMEM;
1736
0ac38b70
LP
1737 f->fd = -1;
1738 f->flags = flags;
1739 f->mode = mode;
cec736d2
LP
1740 f->writable = (flags & O_ACCMODE) != O_RDONLY;
1741 f->prot = prot_from_flags(flags);
1742
15944db8
LP
1743 if (template) {
1744 f->metrics = template->metrics;
1745 f->compress = template->compress;
1746 }
1747
cec736d2
LP
1748 f->path = strdup(fname);
1749 if (!f->path) {
1750 r = -ENOMEM;
1751 goto fail;
1752 }
1753
0ac38b70
LP
1754 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
1755 if (f->fd < 0) {
1756 r = -errno;
1757 goto fail;
1758 }
1759
cec736d2
LP
1760 if (fstat(f->fd, &f->last_stat) < 0) {
1761 r = -errno;
1762 goto fail;
1763 }
1764
1765 if (f->last_stat.st_size == 0 && f->writable) {
1766 newly_created = true;
1767
0ac38b70 1768 r = journal_file_init_header(f, template);
cec736d2
LP
1769 if (r < 0)
1770 goto fail;
1771
1772 if (fstat(f->fd, &f->last_stat) < 0) {
1773 r = -errno;
1774 goto fail;
1775 }
1776 }
1777
1778 if (f->last_stat.st_size < (off_t) sizeof(Header)) {
1779 r = -EIO;
1780 goto fail;
1781 }
1782
1783 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
1784 if (f->header == MAP_FAILED) {
1785 f->header = NULL;
1786 r = -errno;
1787 goto fail;
1788 }
1789
1790 if (!newly_created) {
1791 r = journal_file_verify_header(f);
1792 if (r < 0)
1793 goto fail;
1794 }
1795
1796 if (f->writable) {
1797 r = journal_file_refresh_header(f);
1798 if (r < 0)
1799 goto fail;
1800 }
1801
1802 if (newly_created) {
1803
de190aef 1804 r = journal_file_setup_field_hash_table(f);
cec736d2
LP
1805 if (r < 0)
1806 goto fail;
1807
de190aef 1808 r = journal_file_setup_data_hash_table(f);
cec736d2
LP
1809 if (r < 0)
1810 goto fail;
1811 }
1812
de190aef 1813 r = journal_file_map_field_hash_table(f);
cec736d2
LP
1814 if (r < 0)
1815 goto fail;
1816
de190aef 1817 r = journal_file_map_data_hash_table(f);
cec736d2
LP
1818 if (r < 0)
1819 goto fail;
1820
1821 if (ret)
1822 *ret = f;
1823
1824 return 0;
1825
1826fail:
1827 journal_file_close(f);
1828
1829 return r;
1830}
0ac38b70
LP
1831
1832int journal_file_rotate(JournalFile **f) {
1833 char *p;
1834 size_t l;
1835 JournalFile *old_file, *new_file = NULL;
1836 int r;
1837
1838 assert(f);
1839 assert(*f);
1840
1841 old_file = *f;
1842
1843 if (!old_file->writable)
1844 return -EINVAL;
1845
1846 if (!endswith(old_file->path, ".journal"))
1847 return -EINVAL;
1848
1849 l = strlen(old_file->path);
1850
9447a7f1 1851 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
0ac38b70
LP
1852 if (!p)
1853 return -ENOMEM;
1854
1855 memcpy(p, old_file->path, l - 8);
1856 p[l-8] = '@';
1857 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
1858 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
1859 "-%016llx-%016llx.journal",
1860 (unsigned long long) le64toh((*f)->header->seqnum),
1861 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
1862
1863 r = rename(old_file->path, p);
1864 free(p);
1865
1866 if (r < 0)
1867 return -errno;
1868
ccdbaf91 1869 old_file->header->state = STATE_ARCHIVED;
0ac38b70
LP
1870
1871 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, old_file, &new_file);
1872 journal_file_close(old_file);
1873
1874 *f = new_file;
1875 return r;
1876}
1877
9447a7f1
LP
1878int journal_file_open_reliably(
1879 const char *fname,
1880 int flags,
1881 mode_t mode,
1882 JournalFile *template,
1883 JournalFile **ret) {
1884
1885 int r;
1886 size_t l;
1887 char *p;
1888
1889 r = journal_file_open(fname, flags, mode, template, ret);
0071d9f1
LP
1890 if (r != -EBADMSG && /* corrupted */
1891 r != -ENODATA && /* truncated */
1892 r != -EHOSTDOWN && /* other machine */
1893 r != -EPROTONOSUPPORT) /* incompatible feature */
9447a7f1
LP
1894 return r;
1895
1896 if ((flags & O_ACCMODE) == O_RDONLY)
1897 return r;
1898
1899 if (!(flags & O_CREAT))
1900 return r;
1901
5c70eab4
LP
1902 /* The file is corrupted. Rotate it away and try it again (but only once) */
1903
9447a7f1
LP
1904 l = strlen(fname);
1905 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
1906 (int) (l-8), fname,
1907 (unsigned long long) now(CLOCK_REALTIME),
1908 random_ull()) < 0)
1909 return -ENOMEM;
1910
1911 r = rename(fname, p);
1912 free(p);
1913 if (r < 0)
1914 return -errno;
1915
1916 log_warning("File %s corrupted, renaming and replacing.", fname);
1917
1918 return journal_file_open(fname, flags, mode, template, ret);
1919}
1920
0ac38b70
LP
1921struct vacuum_info {
1922 off_t usage;
1923 char *filename;
1924
1925 uint64_t realtime;
1926 sd_id128_t seqnum_id;
1927 uint64_t seqnum;
5c70eab4
LP
1928
1929 bool have_seqnum;
0ac38b70
LP
1930};
1931
1932static int vacuum_compare(const void *_a, const void *_b) {
1933 const struct vacuum_info *a, *b;
1934
1935 a = _a;
1936 b = _b;
1937
5c70eab4
LP
1938 if (a->have_seqnum && b->have_seqnum &&
1939 sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
0ac38b70
LP
1940 if (a->seqnum < b->seqnum)
1941 return -1;
1942 else if (a->seqnum > b->seqnum)
1943 return 1;
1944 else
1945 return 0;
1946 }
1947
1948 if (a->realtime < b->realtime)
1949 return -1;
1950 else if (a->realtime > b->realtime)
1951 return 1;
5c70eab4 1952 else if (a->have_seqnum && b->have_seqnum)
0ac38b70 1953 return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
5c70eab4
LP
1954 else
1955 return strcmp(a->filename, b->filename);
0ac38b70
LP
1956}
1957
1958int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
1959 DIR *d;
1960 int r = 0;
1961 struct vacuum_info *list = NULL;
1962 unsigned n_list = 0, n_allocated = 0, i;
1963 uint64_t sum = 0;
1964
1965 assert(directory);
1966
1967 if (max_use <= 0)
babfc091 1968 return 0;
0ac38b70
LP
1969
1970 d = opendir(directory);
1971 if (!d)
1972 return -errno;
1973
1974 for (;;) {
1975 int k;
1976 struct dirent buf, *de;
1977 size_t q;
1978 struct stat st;
1979 char *p;
1980 unsigned long long seqnum, realtime;
1981 sd_id128_t seqnum_id;
5c70eab4 1982 bool have_seqnum;
0ac38b70
LP
1983
1984 k = readdir_r(d, &buf, &de);
1985 if (k != 0) {
1986 r = -k;
1987 goto finish;
1988 }
1989
1990 if (!de)
1991 break;
1992
5c70eab4
LP
1993 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
1994 continue;
1995
1996 if (!S_ISREG(st.st_mode))
0ac38b70
LP
1997 continue;
1998
1999 q = strlen(de->d_name);
2000
5c70eab4 2001 if (endswith(de->d_name, ".journal")) {
0ac38b70 2002
5c70eab4 2003 /* Vacuum archived files */
0ac38b70 2004
5c70eab4
LP
2005 if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
2006 continue;
0ac38b70 2007
5c70eab4
LP
2008 if (de->d_name[q-8-16-1] != '-' ||
2009 de->d_name[q-8-16-1-16-1] != '-' ||
2010 de->d_name[q-8-16-1-16-1-32-1] != '@')
2011 continue;
0ac38b70 2012
5c70eab4
LP
2013 p = strdup(de->d_name);
2014 if (!p) {
2015 r = -ENOMEM;
2016 goto finish;
2017 }
0ac38b70 2018
5c70eab4
LP
2019 de->d_name[q-8-16-1-16-1] = 0;
2020 if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
2021 free(p);
2022 continue;
2023 }
2024
2025 if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
2026 free(p);
2027 continue;
2028 }
2029
2030 have_seqnum = true;
2031
2032 } else if (endswith(de->d_name, ".journal~")) {
2033 unsigned long long tmp;
2034
2035 /* Vacuum corrupted files */
2036
2037 if (q < 1 + 16 + 1 + 16 + 8 + 1)
2038 continue;
0ac38b70 2039
5c70eab4
LP
2040 if (de->d_name[q-1-8-16-1] != '-' ||
2041 de->d_name[q-1-8-16-1-16-1] != '@')
2042 continue;
2043
2044 p = strdup(de->d_name);
2045 if (!p) {
2046 r = -ENOMEM;
2047 goto finish;
2048 }
2049
2050 if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
2051 free(p);
2052 continue;
2053 }
2054
2055 have_seqnum = false;
2056 } else
0ac38b70 2057 continue;
0ac38b70
LP
2058
2059 if (n_list >= n_allocated) {
2060 struct vacuum_info *j;
2061
2062 n_allocated = MAX(n_allocated * 2U, 8U);
2063 j = realloc(list, n_allocated * sizeof(struct vacuum_info));
2064 if (!j) {
2065 free(p);
2066 r = -ENOMEM;
2067 goto finish;
2068 }
2069
2070 list = j;
2071 }
2072
2073 list[n_list].filename = p;
a3a52c0f 2074 list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
0ac38b70
LP
2075 list[n_list].seqnum = seqnum;
2076 list[n_list].realtime = realtime;
2077 list[n_list].seqnum_id = seqnum_id;
5c70eab4 2078 list[n_list].have_seqnum = have_seqnum;
0ac38b70
LP
2079
2080 sum += list[n_list].usage;
2081
2082 n_list ++;
2083 }
2084
2085 qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
2086
2087 for(i = 0; i < n_list; i++) {
2088 struct statvfs ss;
2089
2090 if (fstatvfs(dirfd(d), &ss) < 0) {
2091 r = -errno;
2092 goto finish;
2093 }
2094
2095 if (sum <= max_use &&
2096 (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
2097 break;
2098
2099 if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
e7bf07b3 2100 log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
0ac38b70
LP
2101 sum -= list[i].usage;
2102 } else if (errno != ENOENT)
2103 log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
2104 }
2105
2106finish:
2107 for (i = 0; i < n_list; i++)
2108 free(list[i].filename);
2109
2110 free(list);
2111
de190aef
LP
2112 if (d)
2113 closedir(d);
2114
0ac38b70
LP
2115 return r;
2116}
cf244689
LP
2117
2118int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2119 uint64_t i, n;
2120 uint64_t q, xor_hash = 0;
2121 int r;
2122 EntryItem *items;
2123 dual_timestamp ts;
2124
2125 assert(from);
2126 assert(to);
2127 assert(o);
2128 assert(p);
2129
2130 if (!to->writable)
2131 return -EPERM;
2132
2133 ts.monotonic = le64toh(o->entry.monotonic);
2134 ts.realtime = le64toh(o->entry.realtime);
2135
2136 if (to->tail_entry_monotonic_valid &&
2137 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2138 return -EINVAL;
2139
2140 if (ts.realtime < le64toh(to->header->tail_entry_realtime))
2141 return -EINVAL;
2142
2143 n = journal_file_entry_n_items(o);
2144 items = alloca(sizeof(EntryItem) * n);
2145
2146 for (i = 0; i < n; i++) {
4fd052ae
FC
2147 uint64_t l, h;
2148 le64_t le_hash;
cf244689
LP
2149 size_t t;
2150 void *data;
2151 Object *u;
2152
2153 q = le64toh(o->entry.items[i].object_offset);
2154 le_hash = o->entry.items[i].hash;
2155
2156 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2157 if (r < 0)
2158 return r;
2159
2160 if (le_hash != o->data.hash)
2161 return -EBADMSG;
2162
2163 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2164 t = (size_t) l;
2165
2166 /* We hit the limit on 32bit machines */
2167 if ((uint64_t) t != l)
2168 return -E2BIG;
2169
2170 if (o->object.flags & OBJECT_COMPRESSED) {
2171#ifdef HAVE_XZ
2172 uint64_t rsize;
2173
2174 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2175 return -EBADMSG;
2176
2177 data = from->compress_buffer;
2178 l = rsize;
2179#else
2180 return -EPROTONOSUPPORT;
2181#endif
2182 } else
2183 data = o->data.payload;
2184
2185 r = journal_file_append_data(to, data, l, &u, &h);
2186 if (r < 0)
2187 return r;
2188
2189 xor_hash ^= le64toh(u->data.hash);
2190 items[i].object_offset = htole64(h);
2191 items[i].hash = u->data.hash;
2192
2193 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2194 if (r < 0)
2195 return r;
2196 }
2197
2198 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2199}
babfc091
LP
2200
2201void journal_default_metrics(JournalMetrics *m, int fd) {
2202 uint64_t fs_size = 0;
2203 struct statvfs ss;
a7bc2c2a 2204 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
babfc091
LP
2205
2206 assert(m);
2207 assert(fd >= 0);
2208
2209 if (fstatvfs(fd, &ss) >= 0)
2210 fs_size = ss.f_frsize * ss.f_blocks;
2211
2212 if (m->max_use == (uint64_t) -1) {
2213
2214 if (fs_size > 0) {
2215 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2216
2217 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2218 m->max_use = DEFAULT_MAX_USE_UPPER;
2219
2220 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2221 m->max_use = DEFAULT_MAX_USE_LOWER;
2222 } else
2223 m->max_use = DEFAULT_MAX_USE_LOWER;
2224 } else {
2225 m->max_use = PAGE_ALIGN(m->max_use);
2226
2227 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2228 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2229 }
2230
2231 if (m->max_size == (uint64_t) -1) {
2232 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2233
2234 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2235 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2236 } else
2237 m->max_size = PAGE_ALIGN(m->max_size);
2238
2239 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2240 m->max_size = JOURNAL_FILE_SIZE_MIN;
2241
2242 if (m->max_size*2 > m->max_use)
2243 m->max_use = m->max_size*2;
2244
2245 if (m->min_size == (uint64_t) -1)
2246 m->min_size = JOURNAL_FILE_SIZE_MIN;
2247 else {
2248 m->min_size = PAGE_ALIGN(m->min_size);
2249
2250 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2251 m->min_size = JOURNAL_FILE_SIZE_MIN;
2252
2253 if (m->min_size > m->max_size)
2254 m->max_size = m->min_size;
2255 }
2256
2257 if (m->keep_free == (uint64_t) -1) {
2258
2259 if (fs_size > 0) {
2260 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2261
2262 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2263 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2264
2265 } else
2266 m->keep_free = DEFAULT_KEEP_FREE;
2267 }
2268
e7bf07b3
LP
2269 log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2270 format_bytes(a, sizeof(a), m->max_use),
2271 format_bytes(b, sizeof(b), m->max_size),
2272 format_bytes(c, sizeof(c), m->min_size),
2273 format_bytes(d, sizeof(d), m->keep_free));
babfc091 2274}