]>
Commit | Line | Data |
---|---|---|
cec736d2 LP |
1 | /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ |
2 | ||
3 | /*** | |
4 | This file is part of systemd. | |
5 | ||
6 | Copyright 2011 Lennart Poettering | |
7 | ||
8 | systemd is free software; you can redistribute it and/or modify it | |
9 | under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation; either version 2 of the License, or | |
11 | (at your option) any later version. | |
12 | ||
13 | systemd is distributed in the hope that it will be useful, but | |
14 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
20 | ***/ | |
21 | ||
22 | #include <sys/mman.h> | |
23 | #include <errno.h> | |
24 | #include <sys/uio.h> | |
25 | #include <unistd.h> | |
26 | #include <sys/statvfs.h> | |
27 | #include <fcntl.h> | |
28 | #include <stddef.h> | |
29 | ||
30 | #include "journal-def.h" | |
31 | #include "journal-file.h" | |
32 | #include "lookup3.h" | |
33 | ||
34 | #define DEFAULT_ARENA_MAX_SIZE (16ULL*1024ULL*1024ULL*1024ULL) | |
35 | #define DEFAULT_ARENA_MIN_SIZE (256ULL*1024ULL) | |
36 | #define DEFAULT_ARENA_KEEP_FREE (1ULL*1024ULL*1024ULL) | |
37 | ||
38 | #define DEFAULT_HASH_TABLE_SIZE (2047ULL*16ULL) | |
39 | #define DEFAULT_BISECT_TABLE_SIZE ((DEFAULT_ARENA_MAX_SIZE/(64ULL*1024ULL))*8ULL) | |
40 | ||
41 | #define DEFAULT_WINDOW_SIZE (128ULL*1024ULL*1024ULL) | |
42 | ||
43 | static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' }; | |
44 | ||
45 | #define ALIGN64(x) (((x) + 7ULL) & ~7ULL) | |
46 | ||
47 | void journal_file_close(JournalFile *f) { | |
48 | assert(f); | |
49 | ||
50 | if (f->fd >= 0) | |
51 | close_nointr_nofail(f->fd); | |
52 | ||
53 | if (f->header) | |
54 | munmap(f->header, PAGE_ALIGN(sizeof(Header))); | |
55 | ||
56 | if (f->hash_table_window) | |
57 | munmap(f->hash_table_window, f->hash_table_window_size); | |
58 | ||
59 | if (f->bisect_table_window) | |
60 | munmap(f->bisect_table_window, f->bisect_table_window_size); | |
61 | ||
62 | if (f->window) | |
63 | munmap(f->window, f->window_size); | |
64 | ||
65 | free(f->path); | |
66 | free(f); | |
67 | } | |
68 | ||
69 | static int journal_file_init_header(JournalFile *f) { | |
70 | Header h; | |
71 | ssize_t k; | |
72 | int r; | |
73 | ||
74 | assert(f); | |
75 | ||
76 | zero(h); | |
77 | memcpy(h.signature, signature, 8); | |
78 | h.arena_offset = htole64(ALIGN64(sizeof(h))); | |
79 | h.arena_max_size = htole64(DEFAULT_ARENA_MAX_SIZE); | |
80 | h.arena_min_size = htole64(DEFAULT_ARENA_MIN_SIZE); | |
81 | h.arena_keep_free = htole64(DEFAULT_ARENA_KEEP_FREE); | |
82 | ||
83 | r = sd_id128_randomize(&h.file_id); | |
84 | if (r < 0) | |
85 | return r; | |
86 | ||
87 | h.seqnum_id = h.file_id; | |
88 | ||
89 | k = pwrite(f->fd, &h, sizeof(h), 0); | |
90 | if (k < 0) | |
91 | return -errno; | |
92 | ||
93 | if (k != sizeof(h)) | |
94 | return -EIO; | |
95 | ||
96 | return 0; | |
97 | } | |
98 | ||
99 | static int journal_file_refresh_header(JournalFile *f) { | |
100 | int r; | |
101 | ||
102 | assert(f); | |
103 | ||
104 | r = sd_id128_get_machine(&f->header->machine_id); | |
105 | if (r < 0) | |
106 | return r; | |
107 | ||
108 | r = sd_id128_get_boot(&f->header->boot_id); | |
109 | if (r < 0) | |
110 | return r; | |
111 | ||
112 | f->header->state = htole32(STATE_ONLINE); | |
113 | return 0; | |
114 | } | |
115 | ||
116 | static int journal_file_verify_header(JournalFile *f) { | |
117 | assert(f); | |
118 | ||
119 | if (memcmp(f->header, signature, 8)) | |
120 | return -EBADMSG; | |
121 | ||
122 | if (f->header->incompatible_flags != 0) | |
123 | return -EPROTONOSUPPORT; | |
124 | ||
125 | if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->arena_offset) + le64toh(f->header->arena_size))) | |
126 | return -ENODATA; | |
127 | ||
128 | if (f->writable) { | |
129 | uint32_t state; | |
130 | sd_id128_t machine_id; | |
131 | int r; | |
132 | ||
133 | r = sd_id128_get_machine(&machine_id); | |
134 | if (r < 0) | |
135 | return r; | |
136 | ||
137 | if (!sd_id128_equal(machine_id, f->header->machine_id)) | |
138 | return -EHOSTDOWN; | |
139 | ||
140 | state = le32toh(f->header->state); | |
141 | ||
142 | if (state == STATE_ONLINE) | |
143 | log_debug("Journal file %s is already online. Assuming unclean closing. Ignoring.", f->path); | |
144 | else if (state == STATE_ARCHIVED) | |
145 | return -ESHUTDOWN; | |
146 | else if (state != STATE_OFFLINE) | |
147 | log_debug("Journal file %s has unknown state %u. Ignoring.", f->path, state); | |
148 | } | |
149 | ||
150 | return 0; | |
151 | } | |
152 | ||
153 | static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) { | |
154 | uint64_t asize; | |
155 | uint64_t old_size, new_size; | |
156 | ||
157 | assert(f); | |
158 | ||
159 | if (offset < le64toh(f->header->arena_offset)) | |
160 | return -EINVAL; | |
161 | ||
162 | new_size = PAGE_ALIGN(offset + size); | |
163 | ||
164 | /* We assume that this file is not sparse, and we know that | |
38ac38b2 | 165 | * for sure, since we always call posix_fallocate() |
cec736d2 LP |
166 | * ourselves */ |
167 | ||
168 | old_size = | |
169 | le64toh(f->header->arena_offset) + | |
170 | le64toh(f->header->arena_size); | |
171 | ||
172 | if (old_size >= new_size) | |
173 | return 0; | |
174 | ||
175 | asize = new_size - le64toh(f->header->arena_offset); | |
176 | ||
177 | if (asize > le64toh(f->header->arena_min_size)) { | |
178 | struct statvfs svfs; | |
179 | ||
180 | if (fstatvfs(f->fd, &svfs) >= 0) { | |
181 | uint64_t available; | |
182 | ||
183 | available = svfs.f_bfree * svfs.f_bsize; | |
184 | ||
185 | if (available >= f->header->arena_keep_free) | |
186 | available -= f->header->arena_keep_free; | |
187 | else | |
188 | available = 0; | |
189 | ||
190 | if (new_size - old_size > available) | |
191 | return -E2BIG; | |
192 | } | |
193 | } | |
194 | ||
195 | if (asize > le64toh(f->header->arena_max_size)) | |
196 | return -E2BIG; | |
197 | ||
38ac38b2 | 198 | if (posix_fallocate(f->fd, old_size, new_size - old_size) < 0) |
cec736d2 LP |
199 | return -errno; |
200 | ||
201 | if (fstat(f->fd, &f->last_stat) < 0) | |
202 | return -errno; | |
203 | ||
204 | f->header->arena_size = htole64(asize); | |
205 | ||
206 | return 0; | |
207 | } | |
208 | ||
209 | static int journal_file_map( | |
210 | JournalFile *f, | |
211 | uint64_t offset, | |
212 | uint64_t size, | |
213 | void **_window, | |
214 | uint64_t *_woffset, | |
215 | uint64_t *_wsize, | |
216 | void **ret) { | |
217 | ||
218 | uint64_t woffset, wsize; | |
219 | void *window; | |
220 | ||
221 | assert(f); | |
222 | assert(size > 0); | |
223 | assert(ret); | |
224 | ||
225 | woffset = offset & ~((uint64_t) page_size() - 1ULL); | |
226 | wsize = size + (offset - woffset); | |
227 | wsize = PAGE_ALIGN(wsize); | |
228 | ||
229 | window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset); | |
230 | if (window == MAP_FAILED) | |
231 | return -errno; | |
232 | ||
233 | if (_window) | |
234 | *_window = window; | |
235 | ||
236 | if (_woffset) | |
237 | *_woffset = woffset; | |
238 | ||
239 | if (_wsize) | |
240 | *_wsize = wsize; | |
241 | ||
242 | *ret = (uint8_t*) window + (offset - woffset); | |
243 | ||
244 | return 0; | |
245 | } | |
246 | ||
247 | static int journal_file_move_to(JournalFile *f, uint64_t offset, uint64_t size, void **ret) { | |
248 | void *p; | |
249 | uint64_t delta; | |
250 | int r; | |
251 | ||
252 | assert(f); | |
253 | assert(ret); | |
254 | ||
255 | if (_likely_(f->window && | |
256 | f->window_offset <= offset && | |
257 | f->window_offset+f->window_size >= offset + size)) { | |
258 | ||
259 | *ret = (uint8_t*) f->window + (offset - f->window_offset); | |
260 | return 0; | |
261 | } | |
262 | ||
263 | if (f->window) { | |
264 | if (munmap(f->window, f->window_size) < 0) | |
265 | return -errno; | |
266 | ||
267 | f->window = NULL; | |
268 | f->window_size = f->window_offset = 0; | |
269 | } | |
270 | ||
271 | if (size < DEFAULT_WINDOW_SIZE) { | |
272 | /* If the default window size is larger then what was | |
273 | * asked for extend the mapping a bit in the hope to | |
274 | * minimize needed remappings later on. We add half | |
275 | * the window space before and half behind the | |
276 | * requested mapping */ | |
277 | ||
278 | delta = PAGE_ALIGN((DEFAULT_WINDOW_SIZE - size) / 2); | |
279 | ||
280 | if (offset < delta) | |
281 | delta = offset; | |
282 | ||
283 | offset -= delta; | |
284 | size += (DEFAULT_WINDOW_SIZE - delta); | |
285 | } else | |
286 | delta = 0; | |
287 | ||
288 | r = journal_file_map(f, | |
289 | offset, size, | |
290 | &f->window, &f->window_offset, &f->window_size, | |
291 | & p); | |
292 | ||
293 | if (r < 0) | |
294 | return r; | |
295 | ||
296 | *ret = (uint8_t*) p + delta; | |
297 | return 0; | |
298 | } | |
299 | ||
300 | static bool verify_hash(Object *o) { | |
301 | uint64_t t; | |
302 | ||
303 | assert(o); | |
304 | ||
305 | t = le64toh(o->object.type); | |
306 | if (t == OBJECT_DATA) { | |
307 | uint64_t s, h1, h2; | |
308 | ||
309 | s = le64toh(o->object.size); | |
310 | ||
311 | h1 = le64toh(o->data.hash); | |
312 | h2 = hash64(o->data.payload, s - offsetof(Object, data.payload)); | |
313 | ||
314 | return h1 == h2; | |
315 | } | |
316 | ||
317 | return true; | |
318 | } | |
319 | ||
320 | int journal_file_move_to_object(JournalFile *f, uint64_t offset, int type, Object **ret) { | |
321 | int r; | |
322 | void *t; | |
323 | Object *o; | |
324 | uint64_t s; | |
325 | ||
326 | assert(f); | |
327 | assert(ret); | |
328 | ||
329 | r = journal_file_move_to(f, offset, sizeof(ObjectHeader), &t); | |
330 | if (r < 0) | |
331 | return r; | |
332 | ||
333 | o = (Object*) t; | |
334 | s = le64toh(o->object.size); | |
335 | ||
336 | if (s < sizeof(ObjectHeader)) | |
337 | return -EBADMSG; | |
338 | ||
339 | if (type >= 0 && le64toh(o->object.type) != type) | |
340 | return -EBADMSG; | |
341 | ||
342 | if (s > sizeof(ObjectHeader)) { | |
343 | r = journal_file_move_to(f, offset, s, &t); | |
344 | if (r < 0) | |
345 | return r; | |
346 | ||
347 | o = (Object*) t; | |
348 | } | |
349 | ||
350 | if (!verify_hash(o)) | |
351 | return -EBADMSG; | |
352 | ||
353 | *ret = o; | |
354 | return 0; | |
355 | } | |
356 | ||
357 | static uint64_t journal_file_seqnum(JournalFile *f) { | |
358 | uint64_t r; | |
359 | ||
360 | assert(f); | |
361 | ||
362 | r = le64toh(f->header->seqnum) + 1; | |
363 | f->header->seqnum = htole64(r); | |
364 | ||
365 | return r; | |
366 | } | |
367 | ||
368 | static int journal_file_append_object(JournalFile *f, uint64_t size, Object **ret, uint64_t *offset) { | |
369 | int r; | |
370 | uint64_t p; | |
371 | Object *tail, *o; | |
372 | void *t; | |
373 | ||
374 | assert(f); | |
375 | assert(size >= sizeof(ObjectHeader)); | |
376 | assert(offset); | |
377 | assert(ret); | |
378 | ||
379 | p = le64toh(f->header->tail_object_offset); | |
380 | ||
381 | if (p == 0) | |
382 | p = le64toh(f->header->arena_offset); | |
383 | else { | |
384 | r = journal_file_move_to_object(f, p, -1, &tail); | |
385 | if (r < 0) | |
386 | return r; | |
387 | ||
388 | p += ALIGN64(le64toh(tail->object.size)); | |
389 | } | |
390 | ||
391 | r = journal_file_allocate(f, p, size); | |
392 | if (r < 0) | |
393 | return r; | |
394 | ||
395 | r = journal_file_move_to(f, p, size, &t); | |
396 | if (r < 0) | |
397 | return r; | |
398 | ||
399 | o = (Object*) t; | |
400 | ||
401 | zero(o->object); | |
402 | o->object.type = htole64(OBJECT_UNUSED); | |
403 | zero(o->object.reserved); | |
404 | o->object.size = htole64(size); | |
405 | ||
406 | f->header->tail_object_offset = htole64(p); | |
407 | if (f->header->head_object_offset == 0) | |
408 | f->header->head_object_offset = htole64(p); | |
409 | ||
410 | f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1); | |
411 | ||
412 | *ret = o; | |
413 | *offset = p; | |
414 | ||
415 | return 0; | |
416 | } | |
417 | ||
418 | static int journal_file_setup_hash_table(JournalFile *f) { | |
419 | uint64_t s, p; | |
420 | Object *o; | |
421 | int r; | |
422 | ||
423 | assert(f); | |
424 | ||
425 | s = DEFAULT_HASH_TABLE_SIZE; | |
426 | r = journal_file_append_object(f, offsetof(Object, hash_table.table) + s, &o, &p); | |
427 | if (r < 0) | |
428 | return r; | |
429 | ||
430 | o->object.type = htole64(OBJECT_HASH_TABLE); | |
431 | memset(o->hash_table.table, 0, s); | |
432 | ||
433 | f->header->hash_table_offset = htole64(p + offsetof(Object, hash_table.table)); | |
434 | f->header->hash_table_size = htole64(s); | |
435 | ||
436 | return 0; | |
437 | } | |
438 | ||
439 | static int journal_file_setup_bisect_table(JournalFile *f) { | |
440 | uint64_t s, p; | |
441 | Object *o; | |
442 | int r; | |
443 | ||
444 | assert(f); | |
445 | ||
446 | s = DEFAULT_BISECT_TABLE_SIZE; | |
447 | r = journal_file_append_object(f, offsetof(Object, bisect_table.table) + s, &o, &p); | |
448 | if (r < 0) | |
449 | return r; | |
450 | ||
451 | o->object.type = htole64(OBJECT_BISECT_TABLE); | |
452 | memset(o->bisect_table.table, 0, s); | |
453 | ||
454 | f->header->bisect_table_offset = htole64(p + offsetof(Object, bisect_table.table)); | |
455 | f->header->bisect_table_size = htole64(s); | |
456 | ||
457 | return 0; | |
458 | } | |
459 | ||
460 | static int journal_file_map_hash_table(JournalFile *f) { | |
461 | uint64_t s, p; | |
462 | void *t; | |
463 | int r; | |
464 | ||
465 | assert(f); | |
466 | ||
467 | p = le64toh(f->header->hash_table_offset); | |
468 | s = le64toh(f->header->hash_table_size); | |
469 | ||
470 | r = journal_file_map(f, | |
471 | p, s, | |
472 | &f->hash_table_window, NULL, &f->hash_table_window_size, | |
473 | &t); | |
474 | if (r < 0) | |
475 | return r; | |
476 | ||
477 | f->hash_table = t; | |
478 | return 0; | |
479 | } | |
480 | ||
481 | static int journal_file_map_bisect_table(JournalFile *f) { | |
482 | uint64_t s, p; | |
483 | void *t; | |
484 | int r; | |
485 | ||
486 | assert(f); | |
487 | ||
488 | p = le64toh(f->header->bisect_table_offset); | |
489 | s = le64toh(f->header->bisect_table_size); | |
490 | ||
491 | r = journal_file_map(f, | |
492 | p, s, | |
493 | &f->bisect_table_window, NULL, &f->bisect_table_window_size, | |
494 | &t); | |
495 | ||
496 | if (r < 0) | |
497 | return r; | |
498 | ||
499 | f->bisect_table = t; | |
500 | return 0; | |
501 | } | |
502 | ||
503 | static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash_index) { | |
504 | uint64_t p; | |
505 | int r; | |
506 | ||
507 | assert(f); | |
508 | assert(o); | |
509 | assert(offset > 0); | |
510 | assert(o->object.type == htole64(OBJECT_DATA)); | |
511 | ||
512 | o->data.head_entry_offset = o->data.tail_entry_offset = 0; | |
513 | o->data.next_hash_offset = 0; | |
514 | ||
515 | p = le64toh(f->hash_table[hash_index].tail_hash_offset); | |
516 | if (p == 0) { | |
517 | /* Only entry in the hash table is easy */ | |
518 | ||
519 | o->data.prev_hash_offset = 0; | |
520 | f->hash_table[hash_index].head_hash_offset = htole64(offset); | |
521 | } else { | |
522 | o->data.prev_hash_offset = htole64(p); | |
523 | ||
524 | /* Temporarily move back to the previous data object, | |
525 | * to patch in pointer */ | |
526 | ||
527 | r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); | |
528 | if (r < 0) | |
529 | return r; | |
530 | ||
531 | o->data.next_hash_offset = offset; | |
532 | ||
533 | r = journal_file_move_to_object(f, offset, OBJECT_DATA, &o); | |
534 | if (r < 0) | |
535 | return r; | |
536 | } | |
537 | ||
538 | f->hash_table[hash_index].tail_hash_offset = htole64(offset); | |
539 | ||
540 | return 0; | |
541 | } | |
542 | ||
543 | static int journal_file_append_data(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { | |
544 | uint64_t hash, h, p, np; | |
545 | uint64_t osize; | |
546 | Object *o; | |
547 | int r; | |
548 | ||
549 | assert(f); | |
550 | assert(data || size == 0); | |
551 | ||
552 | osize = offsetof(Object, data.payload) + size; | |
553 | ||
554 | hash = hash64(data, size); | |
555 | h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); | |
556 | p = le64toh(f->hash_table[h].head_hash_offset); | |
557 | ||
558 | while (p != 0) { | |
559 | /* Look for this data object in the hash table */ | |
560 | ||
561 | r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); | |
562 | if (r < 0) | |
563 | return r; | |
564 | ||
565 | if (le64toh(o->object.size) == osize && | |
566 | memcmp(o->data.payload, data, size) == 0) { | |
567 | ||
568 | if (le64toh(o->data.hash) != hash) | |
569 | return -EBADMSG; | |
570 | ||
571 | if (ret) | |
572 | *ret = o; | |
573 | ||
574 | if (offset) | |
575 | *offset = p; | |
576 | ||
577 | return 0; | |
578 | } | |
579 | ||
580 | p = le64toh(o->data.next_hash_offset); | |
581 | } | |
582 | ||
583 | r = journal_file_append_object(f, osize, &o, &np); | |
584 | if (r < 0) | |
585 | return r; | |
586 | ||
587 | o->object.type = htole64(OBJECT_DATA); | |
588 | o->data.hash = htole64(hash); | |
589 | memcpy(o->data.payload, data, size); | |
590 | ||
591 | r = journal_file_link_data(f, o, np, h); | |
592 | if (r < 0) | |
593 | return r; | |
594 | ||
595 | if (ret) | |
596 | *ret = o; | |
597 | ||
598 | if (offset) | |
599 | *offset = np; | |
600 | ||
601 | return 0; | |
602 | } | |
603 | ||
604 | uint64_t journal_file_entry_n_items(Object *o) { | |
605 | assert(o); | |
606 | assert(o->object.type == htole64(OBJECT_ENTRY)); | |
607 | ||
608 | return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem); | |
609 | } | |
610 | ||
611 | static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) { | |
612 | uint64_t p, q; | |
613 | int r; | |
614 | assert(f); | |
615 | assert(o); | |
616 | assert(offset > 0); | |
617 | ||
618 | p = le64toh(o->entry.items[i].object_offset); | |
619 | if (p == 0) | |
620 | return -EINVAL; | |
621 | ||
622 | o->entry.items[i].next_entry_offset = 0; | |
623 | ||
624 | /* Move to the data object */ | |
625 | r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); | |
626 | if (r < 0) | |
627 | return r; | |
628 | ||
629 | q = le64toh(o->data.tail_entry_offset); | |
630 | o->data.tail_entry_offset = htole64(offset); | |
631 | ||
632 | if (q == 0) | |
633 | o->data.head_entry_offset = htole64(offset); | |
634 | else { | |
635 | uint64_t n, j; | |
636 | ||
637 | /* Move to previous entry */ | |
638 | r = journal_file_move_to_object(f, q, OBJECT_ENTRY, &o); | |
639 | if (r < 0) | |
640 | return r; | |
641 | ||
642 | n = journal_file_entry_n_items(o); | |
643 | for (j = 0; j < n; j++) | |
644 | if (le64toh(o->entry.items[j].object_offset) == p) | |
645 | break; | |
646 | ||
647 | if (j >= n) | |
648 | return -EBADMSG; | |
649 | ||
650 | o->entry.items[j].next_entry_offset = offset; | |
651 | } | |
652 | ||
653 | /* Move back to original entry */ | |
654 | r = journal_file_move_to_object(f, offset, OBJECT_ENTRY, &o); | |
655 | if (r < 0) | |
656 | return r; | |
657 | ||
658 | o->entry.items[i].prev_entry_offset = q; | |
659 | return 0; | |
660 | } | |
661 | ||
662 | static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) { | |
663 | uint64_t p, i, n, k, a, b; | |
664 | int r; | |
665 | ||
666 | assert(f); | |
667 | assert(o); | |
668 | assert(offset > 0); | |
669 | assert(o->object.type == htole64(OBJECT_ENTRY)); | |
670 | ||
671 | /* Link up the entry itself */ | |
672 | p = le64toh(f->header->tail_entry_offset); | |
673 | ||
674 | o->entry.prev_entry_offset = f->header->tail_entry_offset; | |
675 | o->entry.next_entry_offset = 0; | |
676 | ||
677 | if (p == 0) | |
678 | f->header->head_entry_offset = htole64(offset); | |
679 | else { | |
680 | /* Temporarily move back to the previous entry, to | |
681 | * patch in pointer */ | |
682 | ||
683 | r = journal_file_move_to_object(f, p, OBJECT_ENTRY, &o); | |
684 | if (r < 0) | |
685 | return r; | |
686 | ||
687 | o->entry.next_entry_offset = htole64(offset); | |
688 | ||
689 | r = journal_file_move_to_object(f, offset, OBJECT_ENTRY, &o); | |
690 | if (r < 0) | |
691 | return r; | |
692 | } | |
693 | ||
694 | f->header->tail_entry_offset = htole64(offset); | |
695 | ||
696 | /* Link up the items */ | |
697 | n = journal_file_entry_n_items(o); | |
698 | for (i = 0; i < n; i++) { | |
699 | r = journal_file_link_entry_item(f, o, offset, i); | |
700 | if (r < 0) | |
701 | return r; | |
702 | } | |
703 | ||
704 | /* Link up the entry in the bisect table */ | |
705 | n = le64toh(f->header->bisect_table_size) / sizeof(uint64_t); | |
706 | k = le64toh(f->header->arena_max_size) / n; | |
707 | ||
708 | a = (le64toh(f->header->last_bisect_offset) + k - 1) / k; | |
709 | b = offset / k; | |
710 | ||
711 | for (; a <= b; a++) | |
712 | f->bisect_table[a] = htole64(offset); | |
713 | ||
714 | f->header->last_bisect_offset = htole64(offset + le64toh(o->object.size)); | |
715 | ||
716 | return 0; | |
717 | } | |
718 | ||
719 | static int journal_file_append_entry_internal( | |
720 | JournalFile *f, | |
721 | const dual_timestamp *ts, | |
722 | uint64_t xor_hash, | |
723 | const EntryItem items[], unsigned n_items, | |
724 | Object **ret, uint64_t *offset) { | |
725 | uint64_t np; | |
726 | uint64_t osize; | |
727 | Object *o; | |
728 | int r; | |
729 | ||
730 | assert(f); | |
731 | assert(items || n_items == 0); | |
732 | ||
733 | osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem)); | |
734 | ||
735 | r = journal_file_append_object(f, osize, &o, &np); | |
736 | if (r < 0) | |
737 | return r; | |
738 | ||
739 | o->object.type = htole64(OBJECT_ENTRY); | |
740 | o->entry.seqnum = htole64(journal_file_seqnum(f)); | |
741 | memcpy(o->entry.items, items, n_items * sizeof(EntryItem)); | |
742 | o->entry.realtime = ts ? htole64(ts->realtime) : 0; | |
743 | o->entry.monotonic = ts ? htole64(ts->monotonic) : 0; | |
744 | o->entry.xor_hash = htole64(xor_hash); | |
745 | o->entry.boot_id = f->header->boot_id; | |
746 | ||
747 | r = journal_file_link_entry(f, o, np); | |
748 | if (r < 0) | |
749 | return r; | |
750 | ||
751 | if (ret) | |
752 | *ret = o; | |
753 | ||
754 | if (offset) | |
755 | *offset = np; | |
756 | ||
757 | return 0; | |
758 | } | |
759 | ||
760 | int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, Object **ret, uint64_t *offset) { | |
761 | unsigned i; | |
762 | EntryItem *items; | |
763 | int r; | |
764 | uint64_t xor_hash = 0; | |
765 | ||
766 | assert(f); | |
767 | assert(iovec || n_iovec == 0); | |
768 | ||
769 | items = new(EntryItem, n_iovec); | |
770 | if (!items) | |
771 | return -ENOMEM; | |
772 | ||
773 | for (i = 0; i < n_iovec; i++) { | |
774 | uint64_t p; | |
775 | Object *o; | |
776 | ||
777 | r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p); | |
778 | if (r < 0) | |
779 | goto finish; | |
780 | ||
781 | xor_hash ^= le64toh(o->data.hash); | |
782 | items[i].object_offset = htole64(p); | |
783 | } | |
784 | ||
785 | r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, ret, offset); | |
786 | ||
787 | finish: | |
788 | free(items); | |
789 | ||
790 | return r; | |
791 | } | |
792 | ||
793 | int journal_file_move_to_entry(JournalFile *f, uint64_t seqnum, Object **ret, uint64_t *offset) { | |
794 | Object *o; | |
795 | uint64_t lower, upper, p, n, k; | |
796 | int r; | |
797 | ||
798 | assert(f); | |
799 | ||
800 | n = le64toh(f->header->bisect_table_size) / sizeof(uint64_t); | |
801 | k = le64toh(f->header->arena_max_size) / n; | |
802 | ||
803 | lower = 0; | |
804 | upper = le64toh(f->header->last_bisect_offset)/k+1; | |
805 | ||
806 | while (lower < upper) { | |
807 | k = (upper + lower) / 2; | |
808 | p = le64toh(f->bisect_table[k]); | |
809 | ||
810 | if (p == 0) { | |
811 | upper = k; | |
812 | continue; | |
813 | } | |
814 | ||
815 | r = journal_file_move_to_object(f, p, OBJECT_ENTRY, &o); | |
816 | if (r < 0) | |
817 | return r; | |
818 | ||
819 | if (o->entry.seqnum == seqnum) { | |
820 | if (ret) | |
821 | *ret = o; | |
822 | ||
823 | if (offset) | |
824 | *offset = p; | |
825 | ||
826 | return 1; | |
827 | } else if (seqnum < o->entry.seqnum) | |
828 | upper = k; | |
829 | else if (seqnum > o->entry.seqnum) | |
830 | lower = k+1; | |
831 | } | |
832 | ||
833 | assert(lower == upper); | |
834 | ||
835 | if (lower <= 0) | |
836 | return 0; | |
837 | ||
838 | /* The object we are looking for is between | |
839 | * bisect_table[lower-1] and bisect_table[lower] */ | |
840 | ||
841 | p = le64toh(f->bisect_table[lower-1]); | |
842 | ||
843 | for (;;) { | |
844 | r = journal_file_move_to_object(f, p, OBJECT_ENTRY, &o); | |
845 | if (r < 0) | |
846 | return r; | |
847 | ||
848 | if (o->entry.seqnum == seqnum) { | |
849 | if (ret) | |
850 | *ret = o; | |
851 | ||
852 | if (offset) | |
853 | *offset = p; | |
854 | ||
855 | return 1; | |
856 | ||
857 | } if (seqnum < o->entry.seqnum) | |
858 | return 0; | |
859 | ||
860 | if (o->entry.next_entry_offset == 0) | |
861 | return 0; | |
862 | ||
863 | p = le64toh(o->entry.next_entry_offset); | |
864 | } | |
865 | ||
866 | return 0; | |
867 | } | |
868 | ||
869 | int journal_file_next_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset) { | |
870 | uint64_t np; | |
871 | int r; | |
872 | ||
873 | assert(f); | |
874 | ||
875 | if (!o) | |
876 | np = le64toh(f->header->head_entry_offset); | |
877 | else { | |
878 | if (le64toh(o->object.type) != OBJECT_ENTRY) | |
879 | return -EINVAL; | |
880 | ||
881 | np = le64toh(o->entry.next_entry_offset); | |
882 | } | |
883 | ||
884 | if (np == 0) | |
885 | return 0; | |
886 | ||
887 | r = journal_file_move_to_object(f, np, OBJECT_ENTRY, &o); | |
888 | if (r < 0) | |
889 | return r; | |
890 | ||
891 | if (ret) | |
892 | *ret = o; | |
893 | ||
894 | if (offset) | |
895 | *offset = np; | |
896 | ||
897 | return 1; | |
898 | } | |
899 | ||
900 | int journal_file_prev_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset) { | |
901 | uint64_t np; | |
902 | int r; | |
903 | ||
904 | assert(f); | |
905 | ||
906 | if (!o) | |
907 | np = le64toh(f->header->tail_entry_offset); | |
908 | else { | |
909 | if (le64toh(o->object.type) != OBJECT_ENTRY) | |
910 | return -EINVAL; | |
911 | ||
912 | np = le64toh(o->entry.prev_entry_offset); | |
913 | } | |
914 | ||
915 | if (np == 0) | |
916 | return 0; | |
917 | ||
918 | r = journal_file_move_to_object(f, np, OBJECT_ENTRY, &o); | |
919 | if (r < 0) | |
920 | return r; | |
921 | ||
922 | if (ret) | |
923 | *ret = o; | |
924 | ||
925 | if (offset) | |
926 | *offset = np; | |
927 | ||
928 | return 1; | |
929 | } | |
930 | ||
931 | int journal_file_find_first_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { | |
932 | uint64_t p, osize, hash, h; | |
933 | int r; | |
934 | ||
935 | assert(f); | |
936 | assert(data || size == 0); | |
937 | ||
938 | osize = offsetof(Object, data.payload) + size; | |
939 | ||
940 | hash = hash64(data, size); | |
941 | h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); | |
942 | p = le64toh(f->hash_table[h].head_hash_offset); | |
943 | ||
944 | while (p != 0) { | |
945 | Object *o; | |
946 | ||
947 | r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); | |
948 | if (r < 0) | |
949 | return r; | |
950 | ||
951 | if (le64toh(o->object.size) == osize && | |
952 | memcmp(o->data.payload, data, size) == 0) { | |
953 | ||
954 | if (le64toh(o->data.hash) != hash) | |
955 | return -EBADMSG; | |
956 | ||
957 | if (o->data.head_entry_offset == 0) | |
958 | return 0; | |
959 | ||
960 | p = le64toh(o->data.head_entry_offset); | |
961 | r = journal_file_move_to_object(f, p, OBJECT_ENTRY, &o); | |
962 | if (r < 0) | |
963 | return r; | |
964 | ||
965 | if (ret) | |
966 | *ret = o; | |
967 | ||
968 | if (offset) | |
969 | *offset = p; | |
970 | ||
971 | return 1; | |
972 | } | |
973 | ||
974 | p = le64toh(o->data.next_hash_offset); | |
975 | } | |
976 | ||
977 | return 0; | |
978 | } | |
979 | ||
980 | int journal_file_find_last_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { | |
981 | uint64_t p, osize, hash, h; | |
982 | int r; | |
983 | ||
984 | assert(f); | |
985 | assert(data || size == 0); | |
986 | ||
987 | osize = offsetof(Object, data.payload) + size; | |
988 | ||
989 | hash = hash64(data, size); | |
990 | h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); | |
991 | p = le64toh(f->hash_table[h].tail_hash_offset); | |
992 | ||
993 | while (p != 0) { | |
994 | Object *o; | |
995 | ||
996 | r = journal_file_move_to_object(f, p, OBJECT_DATA, &o); | |
997 | if (r < 0) | |
998 | return r; | |
999 | ||
1000 | if (le64toh(o->object.size) == osize && | |
1001 | memcmp(o->data.payload, data, size) == 0) { | |
1002 | ||
1003 | if (le64toh(o->data.hash) != hash) | |
1004 | return -EBADMSG; | |
1005 | ||
1006 | if (o->data.tail_entry_offset == 0) | |
1007 | return 0; | |
1008 | ||
1009 | p = le64toh(o->data.tail_entry_offset); | |
1010 | r = journal_file_move_to_object(f, p, OBJECT_ENTRY, &o); | |
1011 | if (r < 0) | |
1012 | return r; | |
1013 | ||
1014 | if (ret) | |
1015 | *ret = o; | |
1016 | ||
1017 | if (offset) | |
1018 | *offset = p; | |
1019 | ||
1020 | return 1; | |
1021 | } | |
1022 | ||
1023 | p = le64toh(o->data.prev_hash_offset); | |
1024 | } | |
1025 | ||
1026 | return 0; | |
1027 | } | |
1028 | ||
1029 | void journal_file_dump(JournalFile *f) { | |
1030 | char a[33], b[33], c[33]; | |
1031 | Object *o; | |
1032 | int r; | |
1033 | uint64_t p; | |
1034 | ||
1035 | assert(f); | |
1036 | ||
1037 | printf("File ID: %s\n" | |
1038 | "Machine ID: %s\n" | |
1039 | "Boot ID: %s\n" | |
1040 | "Arena size: %llu\n", | |
1041 | sd_id128_to_string(f->header->file_id, a), | |
1042 | sd_id128_to_string(f->header->machine_id, b), | |
1043 | sd_id128_to_string(f->header->boot_id, c), | |
1044 | (unsigned long long) le64toh(f->header->arena_size)); | |
1045 | ||
1046 | p = le64toh(f->header->head_object_offset); | |
1047 | while (p != 0) { | |
1048 | r = journal_file_move_to_object(f, p, -1, &o); | |
1049 | if (r < 0) | |
1050 | goto fail; | |
1051 | ||
1052 | switch (o->object.type) { | |
1053 | ||
1054 | case OBJECT_UNUSED: | |
1055 | printf("Type: OBJECT_UNUSED\n"); | |
1056 | break; | |
1057 | ||
1058 | case OBJECT_DATA: | |
1059 | printf("Type: OBJECT_DATA\n"); | |
1060 | break; | |
1061 | ||
1062 | case OBJECT_ENTRY: | |
1063 | printf("Type: OBJECT_ENTRY %llu\n", (unsigned long long) le64toh(o->entry.seqnum)); | |
1064 | break; | |
1065 | ||
1066 | case OBJECT_HASH_TABLE: | |
1067 | printf("Type: OBJECT_HASH_TABLE\n"); | |
1068 | break; | |
1069 | ||
1070 | case OBJECT_BISECT_TABLE: | |
1071 | printf("Type: OBJECT_BISECT_TABLE\n"); | |
1072 | break; | |
1073 | } | |
1074 | ||
1075 | if (p == le64toh(f->header->tail_object_offset)) | |
1076 | p = 0; | |
1077 | else | |
1078 | p = p + ALIGN64(le64toh(o->object.size)); | |
1079 | } | |
1080 | ||
1081 | return; | |
1082 | fail: | |
1083 | log_error("File corrupt"); | |
1084 | } | |
1085 | ||
1086 | int journal_file_open( | |
1087 | const char *fname, | |
1088 | int flags, | |
1089 | mode_t mode, | |
1090 | JournalFile **ret) { | |
1091 | ||
1092 | JournalFile *f; | |
1093 | int r; | |
1094 | bool newly_created = false; | |
1095 | ||
1096 | assert(fname); | |
1097 | ||
1098 | if ((flags & O_ACCMODE) != O_RDONLY && | |
1099 | (flags & O_ACCMODE) != O_RDWR) | |
1100 | return -EINVAL; | |
1101 | ||
1102 | f = new0(JournalFile, 1); | |
1103 | if (!f) | |
1104 | return -ENOMEM; | |
1105 | ||
1106 | f->writable = (flags & O_ACCMODE) != O_RDONLY; | |
1107 | f->prot = prot_from_flags(flags); | |
1108 | ||
1109 | f->fd = open(fname, flags|O_CLOEXEC, mode); | |
1110 | if (f->fd < 0) { | |
1111 | r = -errno; | |
1112 | goto fail; | |
1113 | } | |
1114 | ||
1115 | f->path = strdup(fname); | |
1116 | if (!f->path) { | |
1117 | r = -ENOMEM; | |
1118 | goto fail; | |
1119 | } | |
1120 | ||
1121 | if (fstat(f->fd, &f->last_stat) < 0) { | |
1122 | r = -errno; | |
1123 | goto fail; | |
1124 | } | |
1125 | ||
1126 | if (f->last_stat.st_size == 0 && f->writable) { | |
1127 | newly_created = true; | |
1128 | ||
1129 | r = journal_file_init_header(f); | |
1130 | if (r < 0) | |
1131 | goto fail; | |
1132 | ||
1133 | if (fstat(f->fd, &f->last_stat) < 0) { | |
1134 | r = -errno; | |
1135 | goto fail; | |
1136 | } | |
1137 | } | |
1138 | ||
1139 | if (f->last_stat.st_size < (off_t) sizeof(Header)) { | |
1140 | r = -EIO; | |
1141 | goto fail; | |
1142 | } | |
1143 | ||
1144 | f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0); | |
1145 | if (f->header == MAP_FAILED) { | |
1146 | f->header = NULL; | |
1147 | r = -errno; | |
1148 | goto fail; | |
1149 | } | |
1150 | ||
1151 | if (!newly_created) { | |
1152 | r = journal_file_verify_header(f); | |
1153 | if (r < 0) | |
1154 | goto fail; | |
1155 | } | |
1156 | ||
1157 | if (f->writable) { | |
1158 | r = journal_file_refresh_header(f); | |
1159 | if (r < 0) | |
1160 | goto fail; | |
1161 | } | |
1162 | ||
1163 | if (newly_created) { | |
1164 | ||
1165 | r = journal_file_setup_hash_table(f); | |
1166 | if (r < 0) | |
1167 | goto fail; | |
1168 | ||
1169 | r = journal_file_setup_bisect_table(f); | |
1170 | if (r < 0) | |
1171 | goto fail; | |
1172 | } | |
1173 | ||
1174 | r = journal_file_map_hash_table(f); | |
1175 | if (r < 0) | |
1176 | goto fail; | |
1177 | ||
1178 | r = journal_file_map_bisect_table(f); | |
1179 | if (r < 0) | |
1180 | goto fail; | |
1181 | ||
1182 | if (ret) | |
1183 | *ret = f; | |
1184 | ||
1185 | return 0; | |
1186 | ||
1187 | fail: | |
1188 | journal_file_close(f); | |
1189 | ||
1190 | return r; | |
1191 | } |