]>
Commit | Line | Data |
---|---|---|
87d2c1ff LP |
1 | /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ |
2 | ||
3 | /*** | |
4 | This file is part of systemd. | |
5 | ||
6 | Copyright 2011 Lennart Poettering | |
7 | ||
8 | systemd is free software; you can redistribute it and/or modify it | |
9 | under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation; either version 2 of the License, or | |
11 | (at your option) any later version. | |
12 | ||
13 | systemd is distributed in the hope that it will be useful, but | |
14 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
20 | ***/ | |
21 | ||
22 | #include <sys/mman.h> | |
23 | #include <errno.h> | |
24 | #include <sys/uio.h> | |
25 | #include <unistd.h> | |
26 | #include <sys/statvfs.h> | |
27 | #include <fcntl.h> | |
28 | #include <stddef.h> | |
29 | ||
30 | #include "sd-journal.h" | |
31 | #include "journal-def.h" | |
32 | #include "journal-private.h" | |
33 | #include "lookup3.h" | |
34 | #include "list.h" | |
35 | ||
36 | #define DEFAULT_ARENA_MAX_SIZE (16ULL*1024ULL*1024ULL*1024ULL) | |
37 | #define DEFAULT_ARENA_MIN_SIZE (256ULL*1024ULL) | |
38 | #define DEFAULT_ARENA_KEEP_FREE (1ULL*1024ULL*1024ULL) | |
39 | ||
40 | #define DEFAULT_HASH_TABLE_SIZE (2047ULL*16ULL) | |
41 | #define DEFAULT_BISECT_TABLE_SIZE ((DEFAULT_ARENA_MAX_SIZE/(64ULL*1024ULL))*8ULL) | |
42 | ||
43 | #define DEFAULT_WINDOW_SIZE (128ULL*1024ULL*1024ULL) | |
44 | ||
45 | struct JournalFile { | |
46 | sd_journal *journal; | |
47 | ||
48 | int fd; | |
49 | char *path; | |
50 | struct stat last_stat; | |
51 | int prot; | |
52 | bool writable; | |
53 | ||
54 | Header *header; | |
55 | ||
56 | HashItem *hash_table; | |
57 | void *hash_table_window; | |
58 | uint64_t hash_table_window_size; | |
59 | ||
60 | uint64_t *bisect_table; | |
61 | void *bisect_table_window; | |
62 | uint64_t bisect_table_window_size; | |
63 | ||
64 | void *window; | |
65 | uint64_t window_offset; | |
66 | uint64_t window_size; | |
67 | ||
68 | Object *current; | |
69 | uint64_t current_offset; | |
70 | ||
71 | LIST_FIELDS(JournalFile, files); | |
72 | }; | |
73 | ||
74 | struct sd_journal { | |
75 | LIST_HEAD(JournalFile, files); | |
76 | }; | |
77 | ||
78 | static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' }; | |
79 | ||
80 | #define ALIGN64(x) (((x) + 7ULL) & ~7ULL) | |
81 | ||
82 | void journal_file_close(JournalFile *f) { | |
83 | assert(f); | |
84 | ||
85 | if (f->journal) | |
86 | LIST_REMOVE(JournalFile, files, f->journal->files, f); | |
87 | ||
88 | if (f->fd >= 0) | |
89 | close_nointr_nofail(f->fd); | |
90 | ||
91 | if (f->header) | |
92 | munmap(f->header, PAGE_ALIGN(sizeof(Header))); | |
93 | ||
94 | if (f->hash_table_window) | |
95 | munmap(f->hash_table_window, f->hash_table_window_size); | |
96 | ||
97 | if (f->bisect_table_window) | |
98 | munmap(f->bisect_table_window, f->bisect_table_window_size); | |
99 | ||
100 | if (f->window) | |
101 | munmap(f->window, f->window_size); | |
102 | ||
103 | free(f->path); | |
104 | free(f); | |
105 | } | |
106 | ||
107 | static int journal_file_init_header(JournalFile *f) { | |
108 | Header h; | |
109 | ssize_t k; | |
110 | int r; | |
111 | ||
112 | assert(f); | |
113 | ||
114 | zero(h); | |
115 | memcpy(h.signature, signature, 8); | |
116 | h.arena_offset = htole64(ALIGN64(sizeof(h))); | |
117 | h.arena_max_size = htole64(DEFAULT_ARENA_MAX_SIZE); | |
118 | h.arena_min_size = htole64(DEFAULT_ARENA_MIN_SIZE); | |
119 | h.arena_keep_free = htole64(DEFAULT_ARENA_KEEP_FREE); | |
120 | ||
121 | r = sd_id128_randomize(&h.file_id); | |
122 | if (r < 0) | |
123 | return r; | |
124 | ||
125 | k = pwrite(f->fd, &h, sizeof(h), 0); | |
126 | if (k < 0) | |
127 | return -errno; | |
128 | ||
129 | if (k != sizeof(h)) | |
130 | return -EIO; | |
131 | ||
132 | return 0; | |
133 | } | |
134 | ||
135 | static int journal_file_refresh_header(JournalFile *f) { | |
136 | int r; | |
137 | ||
138 | assert(f); | |
139 | ||
140 | r = sd_id128_get_machine(&f->header->machine_id); | |
141 | if (r < 0) | |
142 | return r; | |
143 | ||
144 | r = sd_id128_get_boot(&f->header->boot_id); | |
145 | if (r < 0) | |
146 | return r; | |
147 | ||
148 | f->header->state = htole32(STATE_ONLINE); | |
149 | return 0; | |
150 | } | |
151 | ||
152 | static int journal_file_verify_header(JournalFile *f) { | |
153 | assert(f); | |
154 | ||
155 | if (memcmp(f->header, signature, 8)) | |
156 | return -EBADMSG; | |
157 | ||
158 | if (f->header->incompatible_flags != 0) | |
159 | return -EPROTONOSUPPORT; | |
160 | ||
161 | if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->arena_offset) + le64toh(f->header->arena_size))) | |
162 | return -ENODATA; | |
163 | ||
164 | if (f->writable) { | |
165 | uint32_t state; | |
166 | sd_id128_t machine_id; | |
167 | int r; | |
168 | ||
169 | r = sd_id128_get_machine(&machine_id); | |
170 | if (r < 0) | |
171 | return r; | |
172 | ||
173 | if (!sd_id128_equal(machine_id, f->header->machine_id)) | |
174 | return -EHOSTDOWN; | |
175 | ||
176 | state = le32toh(f->header->state); | |
177 | ||
178 | if (state == STATE_ONLINE) | |
179 | log_debug("Journal file %s is already online. Assuming unclean closing. Ignoring.", f->path); | |
180 | else if (state == STATE_ARCHIVED) | |
181 | return -ESHUTDOWN; | |
182 | else if (state != STATE_OFFLINE) | |
183 | log_debug("Journal file %s has unknown state %u. Ignoring.", f->path, state); | |
184 | } | |
185 | ||
186 | return 0; | |
187 | } | |
188 | ||
189 | static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) { | |
190 | uint64_t asize; | |
191 | uint64_t old_size, new_size; | |
192 | ||
193 | assert(f); | |
194 | ||
195 | if (offset < le64toh(f->header->arena_offset)) | |
196 | return -EINVAL; | |
197 | ||
198 | new_size = PAGE_ALIGN(offset + size); | |
199 | ||
200 | /* We assume that this file is not sparse, and we know that | |
201 | * for sure, since we alway call posix_fallocate() | |
202 | * ourselves */ | |
203 | ||
204 | old_size = | |
205 | le64toh(f->header->arena_offset) + | |
206 | le64toh(f->header->arena_size); | |
207 | ||
208 | if (old_size >= new_size) | |
209 | return 0; | |
210 | ||
211 | asize = new_size - le64toh(f->header->arena_offset); | |
212 | ||
213 | if (asize > le64toh(f->header->arena_min_size)) { | |
214 | struct statvfs svfs; | |
215 | ||
216 | if (fstatvfs(f->fd, &svfs) >= 0) { | |
217 | uint64_t available; | |
218 | ||
219 | available = svfs.f_bfree * svfs.f_bsize; | |
220 | ||
221 | if (available >= f->header->arena_keep_free) | |
222 | available -= f->header->arena_keep_free; | |
223 | else | |
224 | available = 0; | |
225 | ||
226 | if (new_size - old_size > available) | |
227 | return -E2BIG; | |
228 | } | |
229 | } | |
230 | ||
231 | if (asize > le64toh(f->header->arena_max_size)) | |
232 | return -E2BIG; | |
233 | ||
234 | if (posix_fallocate(f->fd, 0, new_size) < 0) | |
235 | return -errno; | |
236 | ||
237 | if (fstat(f->fd, &f->last_stat) < 0) | |
238 | return -errno; | |
239 | ||
240 | f->header->arena_size = htole64(asize); | |
241 | ||
242 | return 0; | |
243 | } | |
244 | ||
245 | static int journal_file_map( | |
246 | JournalFile *f, | |
247 | uint64_t offset, | |
248 | uint64_t size, | |
249 | void **_window, | |
250 | uint64_t *_woffset, | |
251 | uint64_t *_wsize, | |
252 | void **ret) { | |
253 | ||
254 | uint64_t woffset, wsize; | |
255 | void *window; | |
256 | ||
257 | assert(f); | |
258 | assert(size > 0); | |
259 | assert(ret); | |
260 | ||
261 | woffset = offset & ~((uint64_t) page_size() - 1ULL); | |
262 | wsize = size + (offset - woffset); | |
263 | wsize = PAGE_ALIGN(wsize); | |
264 | ||
265 | window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset); | |
266 | if (window == MAP_FAILED) | |
267 | return -errno; | |
268 | ||
269 | if (_window) | |
270 | *_window = window; | |
271 | ||
272 | if (_woffset) | |
273 | *_woffset = woffset; | |
274 | ||
275 | if (_wsize) | |
276 | *_wsize = wsize; | |
277 | ||
278 | *ret = (uint8_t*) window + (offset - woffset); | |
279 | ||
280 | return 0; | |
281 | } | |
282 | ||
283 | static int journal_file_move_to(JournalFile *f, uint64_t offset, uint64_t size, void **ret) { | |
284 | void *p; | |
285 | uint64_t delta; | |
286 | int r; | |
287 | ||
288 | assert(f); | |
289 | assert(ret); | |
290 | ||
291 | if (_likely_(f->window && | |
292 | f->window_offset <= offset && | |
293 | f->window_offset+f->window_size >= offset + size)) { | |
294 | ||
295 | *ret = (uint8_t*) f->window + (offset - f->window_offset); | |
296 | return 0; | |
297 | } | |
298 | ||
299 | if (f->window) { | |
300 | if (munmap(f->window, f->window_size) < 0) | |
301 | return -errno; | |
302 | ||
303 | f->window = NULL; | |
304 | f->window_size = f->window_offset = 0; | |
305 | } | |
306 | ||
307 | if (size < DEFAULT_WINDOW_SIZE) { | |
308 | /* If the default window size is larger then what was | |
309 | * asked for extend the mapping a bit in the hope to | |
310 | * minimize needed remappings later on. We add half | |
311 | * the window space before and half behind the | |
312 | * requested mapping */ | |
313 | ||
314 | delta = PAGE_ALIGN((DEFAULT_WINDOW_SIZE - size) / 2); | |
315 | ||
316 | if (offset < delta) | |
317 | delta = offset; | |
318 | ||
319 | offset -= delta; | |
320 | size += (DEFAULT_WINDOW_SIZE - delta); | |
321 | } else | |
322 | delta = 0; | |
323 | ||
324 | r = journal_file_map(f, | |
325 | offset, size, | |
326 | &f->window, &f->window_offset, &f->window_size, | |
327 | & p); | |
328 | ||
329 | if (r < 0) | |
330 | return r; | |
331 | ||
332 | *ret = (uint8_t*) p + delta; | |
333 | return 0; | |
334 | } | |
335 | ||
336 | static bool verify_hash(Object *o) { | |
337 | uint64_t t; | |
338 | ||
339 | assert(o); | |
340 | ||
341 | t = le64toh(o->object.type); | |
342 | if (t == OBJECT_DATA) { | |
343 | uint64_t s, h1, h2; | |
344 | ||
345 | s = le64toh(o->object.size); | |
346 | ||
347 | h1 = le64toh(o->data.hash); | |
348 | h2 = hash64(o->data.payload, s - offsetof(Object, data.payload)); | |
349 | ||
350 | return h1 == h2; | |
351 | } | |
352 | ||
353 | return true; | |
354 | } | |
355 | ||
356 | int journal_file_move_to_object(JournalFile *f, uint64_t offset, Object **ret) { | |
357 | int r; | |
358 | void *t; | |
359 | Object *o; | |
360 | uint64_t s; | |
361 | ||
362 | assert(f); | |
363 | assert(ret); | |
364 | ||
365 | r = journal_file_move_to(f, offset, sizeof(ObjectHeader), &t); | |
366 | if (r < 0) | |
367 | return r; | |
368 | ||
369 | o = (Object*) t; | |
370 | s = le64toh(o->object.size); | |
371 | ||
372 | if (s < sizeof(ObjectHeader)) | |
373 | return -EBADMSG; | |
374 | ||
375 | if (s > sizeof(ObjectHeader)) { | |
376 | r = journal_file_move_to(f, offset, s, &t); | |
377 | if (r < 0) | |
378 | return r; | |
379 | ||
380 | o = (Object*) t; | |
381 | } | |
382 | ||
383 | if (!verify_hash(o)) | |
384 | return -EBADMSG; | |
385 | ||
386 | *ret = o; | |
387 | return 0; | |
388 | } | |
389 | ||
390 | static uint64_t journal_file_seqnum(JournalFile *f) { | |
391 | uint64_t r; | |
392 | ||
393 | assert(f); | |
394 | ||
395 | r = le64toh(f->header->seqnum) + 1; | |
396 | f->header->seqnum = htole64(r); | |
397 | ||
398 | return r; | |
399 | } | |
400 | ||
401 | static int journal_file_append_object(JournalFile *f, uint64_t size, Object **ret, uint64_t *offset) { | |
402 | int r; | |
403 | uint64_t p; | |
404 | Object *tail, *o; | |
405 | void *t; | |
406 | ||
407 | assert(f); | |
408 | assert(size >= sizeof(ObjectHeader)); | |
409 | assert(offset); | |
410 | assert(ret); | |
411 | ||
412 | p = le64toh(f->header->tail_object_offset); | |
413 | ||
414 | if (p == 0) | |
415 | p = le64toh(f->header->arena_offset); | |
416 | else { | |
417 | r = journal_file_move_to_object(f, p, &tail); | |
418 | if (r < 0) | |
419 | return r; | |
420 | ||
421 | p += ALIGN64(le64toh(tail->object.size)); | |
422 | } | |
423 | ||
424 | r = journal_file_allocate(f, p, size); | |
425 | if (r < 0) | |
426 | return r; | |
427 | ||
428 | r = journal_file_move_to(f, p, size, &t); | |
429 | if (r < 0) | |
430 | return r; | |
431 | ||
432 | o = (Object*) t; | |
433 | ||
434 | zero(o->object); | |
435 | o->object.type = htole64(OBJECT_UNUSED); | |
436 | zero(o->object.reserved); | |
437 | o->object.size = htole64(size); | |
438 | ||
439 | f->header->tail_object_offset = htole64(p); | |
440 | if (f->header->head_object_offset == 0) | |
441 | f->header->head_object_offset = htole64(p); | |
442 | ||
443 | f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1); | |
444 | ||
445 | *ret = o; | |
446 | *offset = p; | |
447 | ||
448 | return 0; | |
449 | } | |
450 | ||
451 | static int journal_file_setup_hash_table(JournalFile *f) { | |
452 | uint64_t s, p; | |
453 | Object *o; | |
454 | int r; | |
455 | ||
456 | assert(f); | |
457 | ||
458 | s = DEFAULT_HASH_TABLE_SIZE; | |
459 | r = journal_file_append_object(f, offsetof(Object, hash_table.table) + s, &o, &p); | |
460 | if (r < 0) | |
461 | return r; | |
462 | ||
463 | o->object.type = htole64(OBJECT_HASH_TABLE); | |
464 | memset(o->hash_table.table, 0, s); | |
465 | ||
466 | f->header->hash_table_offset = htole64(p + offsetof(Object, hash_table.table)); | |
467 | f->header->hash_table_size = htole64(s); | |
468 | ||
469 | return 0; | |
470 | } | |
471 | ||
472 | static int journal_file_setup_bisect_table(JournalFile *f) { | |
473 | uint64_t s, p; | |
474 | Object *o; | |
475 | int r; | |
476 | ||
477 | assert(f); | |
478 | ||
479 | s = DEFAULT_BISECT_TABLE_SIZE; | |
480 | r = journal_file_append_object(f, offsetof(Object, bisect_table.table) + s, &o, &p); | |
481 | if (r < 0) | |
482 | return r; | |
483 | ||
484 | o->object.type = htole64(OBJECT_BISECT_TABLE); | |
485 | memset(o->bisect_table.table, 0, s); | |
486 | ||
487 | f->header->bisect_table_offset = htole64(p + offsetof(Object, bisect_table.table)); | |
488 | f->header->bisect_table_size = htole64(s); | |
489 | ||
490 | return 0; | |
491 | } | |
492 | ||
493 | static int journal_file_map_hash_table(JournalFile *f) { | |
494 | uint64_t s, p; | |
495 | void *t; | |
496 | int r; | |
497 | ||
498 | assert(f); | |
499 | ||
500 | p = le64toh(f->header->hash_table_offset); | |
501 | s = le64toh(f->header->hash_table_size); | |
502 | ||
503 | r = journal_file_map(f, | |
504 | p, s, | |
505 | &f->hash_table_window, NULL, &f->hash_table_window_size, | |
506 | &t); | |
507 | if (r < 0) | |
508 | return r; | |
509 | ||
510 | f->hash_table = t; | |
511 | return 0; | |
512 | } | |
513 | ||
514 | static int journal_file_map_bisect_table(JournalFile *f) { | |
515 | uint64_t s, p; | |
516 | void *t; | |
517 | int r; | |
518 | ||
519 | assert(f); | |
520 | ||
521 | p = le64toh(f->header->bisect_table_offset); | |
522 | s = le64toh(f->header->bisect_table_size); | |
523 | ||
524 | r = journal_file_map(f, | |
525 | p, s, | |
526 | &f->bisect_table_window, NULL, &f->bisect_table_window_size, | |
527 | &t); | |
528 | ||
529 | if (r < 0) | |
530 | return r; | |
531 | ||
532 | f->bisect_table = t; | |
533 | return 0; | |
534 | } | |
535 | ||
536 | static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash_index) { | |
537 | uint64_t p; | |
538 | int r; | |
539 | ||
540 | assert(f); | |
541 | assert(o); | |
542 | assert(offset > 0); | |
543 | assert(o->object.type == htole64(OBJECT_DATA)); | |
544 | ||
545 | o->data.head_entry_offset = o->data.tail_entry_offset = 0; | |
546 | o->data.next_hash_offset = 0; | |
547 | ||
548 | p = le64toh(f->hash_table[hash_index].tail_hash_offset); | |
549 | if (p == 0) { | |
550 | /* Only entry in the hash table is easy */ | |
551 | ||
552 | o->data.prev_hash_offset = 0; | |
553 | f->hash_table[hash_index].head_hash_offset = htole64(offset); | |
554 | } else { | |
555 | o->data.prev_hash_offset = htole64(p); | |
556 | ||
557 | /* Temporarily move back to the previous data object, | |
558 | * to patch in pointer */ | |
559 | ||
560 | r = journal_file_move_to_object(f, p, &o); | |
561 | if (r < 0) | |
562 | return r; | |
563 | ||
564 | o->data.next_hash_offset = offset; | |
565 | ||
566 | r = journal_file_move_to_object(f, offset, &o); | |
567 | if (r < 0) | |
568 | return r; | |
569 | } | |
570 | ||
571 | f->hash_table[hash_index].tail_hash_offset = htole64(offset); | |
572 | ||
573 | return 0; | |
574 | } | |
575 | ||
576 | static int journal_file_append_data(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { | |
577 | uint64_t hash, h, p, np; | |
578 | uint64_t osize; | |
579 | Object *o; | |
580 | int r; | |
581 | ||
582 | assert(f); | |
583 | assert(data || size == 0); | |
584 | ||
585 | osize = offsetof(Object, data.payload) + size; | |
586 | ||
587 | hash = hash64(data, size); | |
588 | h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); | |
589 | p = le64toh(f->hash_table[h].head_hash_offset); | |
590 | ||
591 | while (p != 0) { | |
592 | /* Look for this data object in the hash table */ | |
593 | ||
594 | r = journal_file_move_to_object(f, p, &o); | |
595 | if (r < 0) | |
596 | return r; | |
597 | ||
598 | if (le64toh(o->object.type) != OBJECT_DATA) | |
599 | return -EBADMSG; | |
600 | ||
601 | if (le64toh(o->object.size) == osize && | |
602 | memcmp(o->data.payload, data, size) == 0) { | |
603 | ||
604 | if (le64toh(o->data.hash) != hash) | |
605 | return -EBADMSG; | |
606 | ||
607 | if (ret) | |
608 | *ret = o; | |
609 | ||
610 | if (offset) | |
611 | *offset = p; | |
612 | ||
613 | return 0; | |
614 | } | |
615 | ||
616 | p = le64toh(o->data.next_hash_offset); | |
617 | } | |
618 | ||
619 | r = journal_file_append_object(f, osize, &o, &np); | |
620 | if (r < 0) | |
621 | return r; | |
622 | ||
623 | o->object.type = htole64(OBJECT_DATA); | |
624 | o->data.hash = htole64(hash); | |
625 | memcpy(o->data.payload, data, size); | |
626 | ||
627 | r = journal_file_link_data(f, o, np, h); | |
628 | if (r < 0) | |
629 | return r; | |
630 | ||
631 | if (ret) | |
632 | *ret = o; | |
633 | ||
634 | if (offset) | |
635 | *offset = np; | |
636 | ||
637 | return 0; | |
638 | } | |
639 | ||
640 | uint64_t journal_file_entry_n_items(Object *o) { | |
641 | assert(o); | |
642 | assert(o->object.type == htole64(OBJECT_ENTRY)); | |
643 | ||
644 | return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem); | |
645 | } | |
646 | ||
647 | static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) { | |
648 | uint64_t p, q; | |
649 | int r; | |
650 | assert(f); | |
651 | assert(o); | |
652 | assert(offset > 0); | |
653 | ||
654 | p = le64toh(o->entry.items[i].object_offset); | |
655 | if (p == 0) | |
656 | return -EINVAL; | |
657 | ||
658 | o->entry.items[i].next_entry_offset = 0; | |
659 | ||
660 | /* Move to the data object */ | |
661 | r = journal_file_move_to_object(f, p, &o); | |
662 | if (r < 0) | |
663 | return r; | |
664 | ||
665 | if (o->object.type != htole64(OBJECT_DATA)) | |
666 | return -EBADMSG; | |
667 | ||
668 | q = le64toh(o->data.tail_entry_offset); | |
669 | o->data.tail_entry_offset = htole64(offset); | |
670 | ||
671 | if (q == 0) | |
672 | o->data.head_entry_offset = htole64(offset); | |
673 | else { | |
674 | uint64_t n, j; | |
675 | ||
676 | /* Move to previous entry */ | |
677 | r = journal_file_move_to_object(f, q, &o); | |
678 | if (r < 0) | |
679 | return r; | |
680 | ||
681 | if (o->object.type != htole64(OBJECT_ENTRY)) | |
682 | return -EBADMSG; | |
683 | ||
684 | n = journal_file_entry_n_items(o); | |
685 | for (j = 0; j < n; j++) | |
686 | if (le64toh(o->entry.items[j].object_offset) == p) | |
687 | break; | |
688 | ||
689 | if (j >= n) | |
690 | return -EBADMSG; | |
691 | ||
692 | o->entry.items[j].next_entry_offset = offset; | |
693 | } | |
694 | ||
695 | /* Move back to original entry */ | |
696 | r = journal_file_move_to_object(f, offset, &o); | |
697 | if (r < 0) | |
698 | return r; | |
699 | ||
700 | o->entry.items[i].prev_entry_offset = q; | |
701 | return 0; | |
702 | } | |
703 | ||
704 | static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) { | |
705 | uint64_t p, i, n, k, a, b; | |
706 | int r; | |
707 | ||
708 | assert(f); | |
709 | assert(o); | |
710 | assert(offset > 0); | |
711 | assert(o->object.type == htole64(OBJECT_ENTRY)); | |
712 | ||
713 | /* Link up the entry itself */ | |
714 | p = le64toh(f->header->tail_entry_offset); | |
715 | ||
716 | o->entry.prev_entry_offset = f->header->tail_entry_offset; | |
717 | o->entry.next_entry_offset = 0; | |
718 | ||
719 | if (p == 0) | |
720 | f->header->head_entry_offset = htole64(offset); | |
721 | else { | |
722 | /* Temporarily move back to the previous entry, to | |
723 | * patch in pointer */ | |
724 | ||
725 | r = journal_file_move_to_object(f, p, &o); | |
726 | if (r < 0) | |
727 | return r; | |
728 | ||
729 | o->entry.next_entry_offset = htole64(offset); | |
730 | ||
731 | r = journal_file_move_to_object(f, offset, &o); | |
732 | if (r < 0) | |
733 | return r; | |
734 | } | |
735 | ||
736 | f->header->tail_entry_offset = htole64(offset); | |
737 | ||
738 | /* Link up the items */ | |
739 | n = journal_file_entry_n_items(o); | |
740 | for (i = 0; i < n; i++) { | |
741 | r = journal_file_link_entry_item(f, o, offset, i); | |
742 | if (r < 0) | |
743 | return r; | |
744 | } | |
745 | ||
746 | /* Link up the entry in the bisect table */ | |
747 | n = le64toh(f->header->bisect_table_size) / sizeof(uint64_t); | |
748 | k = le64toh(f->header->arena_max_size) / n; | |
749 | ||
750 | a = (le64toh(f->header->last_bisect_offset) + k - 1) / k; | |
751 | b = offset / k; | |
752 | ||
753 | for (; a <= b; a++) | |
754 | f->bisect_table[a] = htole64(offset); | |
755 | ||
756 | f->header->last_bisect_offset = htole64(offset + le64toh(o->object.size)); | |
757 | ||
758 | return 0; | |
759 | } | |
760 | ||
dad50316 LP |
761 | static int journal_file_append_entry_internal( |
762 | JournalFile *f, | |
763 | const dual_timestamp *ts, | |
764 | uint64_t xor_hash, | |
765 | const EntryItem items[], unsigned n_items, | |
766 | Object **ret, uint64_t *offset) { | |
87d2c1ff LP |
767 | uint64_t np; |
768 | uint64_t osize; | |
769 | Object *o; | |
770 | int r; | |
771 | ||
772 | assert(f); | |
773 | assert(items || n_items == 0); | |
774 | ||
775 | osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem)); | |
776 | ||
777 | r = journal_file_append_object(f, osize, &o, &np); | |
778 | if (r < 0) | |
779 | return r; | |
780 | ||
781 | o->object.type = htole64(OBJECT_ENTRY); | |
782 | o->entry.seqnum = htole64(journal_file_seqnum(f)); | |
783 | memcpy(o->entry.items, items, n_items * sizeof(EntryItem)); | |
dad50316 LP |
784 | o->entry.realtime = ts ? htole64(ts->realtime) : 0; |
785 | o->entry.monotonic = ts ? htole64(ts->monotonic) : 0; | |
786 | o->entry.xor_hash = htole64(xor_hash); | |
87d2c1ff LP |
787 | |
788 | r = journal_file_link_entry(f, o, np); | |
789 | if (r < 0) | |
790 | return r; | |
791 | ||
792 | if (ret) | |
793 | *ret = o; | |
794 | ||
795 | if (offset) | |
796 | *offset = np; | |
797 | ||
798 | return 0; | |
799 | } | |
800 | ||
801 | int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, Object **ret, uint64_t *offset) { | |
802 | unsigned i; | |
803 | EntryItem *items; | |
804 | int r; | |
dad50316 | 805 | uint64_t xor_hash = 0; |
87d2c1ff LP |
806 | |
807 | assert(f); | |
dad50316 | 808 | assert(iovec || n_iovec == 0); |
87d2c1ff LP |
809 | |
810 | items = new(EntryItem, n_iovec); | |
811 | if (!items) | |
812 | return -ENOMEM; | |
813 | ||
814 | for (i = 0; i < n_iovec; i++) { | |
815 | uint64_t p; | |
dad50316 | 816 | Object *o; |
87d2c1ff | 817 | |
dad50316 | 818 | r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p); |
87d2c1ff LP |
819 | if (r < 0) |
820 | goto finish; | |
821 | ||
dad50316 | 822 | xor_hash ^= le64toh(o->data.hash); |
87d2c1ff LP |
823 | items[i].object_offset = htole64(p); |
824 | } | |
825 | ||
dad50316 | 826 | r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, ret, offset); |
87d2c1ff LP |
827 | |
828 | finish: | |
829 | free(items); | |
830 | ||
831 | return r; | |
832 | } | |
833 | ||
834 | int journal_file_move_to_entry(JournalFile *f, uint64_t seqnum, Object **ret, uint64_t *offset) { | |
835 | Object *o; | |
836 | uint64_t lower, upper, p, n, k; | |
837 | int r; | |
838 | ||
839 | assert(f); | |
840 | ||
841 | n = le64toh(f->header->bisect_table_size) / sizeof(uint64_t); | |
842 | k = le64toh(f->header->arena_max_size) / n; | |
843 | ||
844 | lower = 0; | |
845 | upper = le64toh(f->header->last_bisect_offset)/k+1; | |
846 | ||
847 | while (lower < upper) { | |
848 | k = (upper + lower) / 2; | |
849 | p = le64toh(f->bisect_table[k]); | |
850 | ||
851 | if (p == 0) { | |
852 | upper = k; | |
853 | continue; | |
854 | } | |
855 | ||
856 | r = journal_file_move_to_object(f, p, &o); | |
857 | if (r < 0) | |
858 | return r; | |
859 | ||
860 | if (o->object.type != htole64(OBJECT_ENTRY)) | |
861 | return -EBADMSG; | |
862 | ||
863 | if (o->entry.seqnum == seqnum) { | |
864 | if (ret) | |
865 | *ret = o; | |
866 | ||
867 | if (offset) | |
868 | *offset = p; | |
869 | ||
870 | return 1; | |
871 | } else if (seqnum < o->entry.seqnum) | |
872 | upper = k; | |
873 | else if (seqnum > o->entry.seqnum) | |
874 | lower = k+1; | |
875 | } | |
876 | ||
877 | assert(lower == upper); | |
878 | ||
879 | if (lower <= 0) | |
880 | return 0; | |
881 | ||
882 | /* The object we are looking for is between | |
883 | * bisect_table[lower-1] and bisect_table[lower] */ | |
884 | ||
885 | p = le64toh(f->bisect_table[lower-1]); | |
886 | ||
887 | for (;;) { | |
888 | r = journal_file_move_to_object(f, p, &o); | |
889 | if (r < 0) | |
890 | return r; | |
891 | ||
892 | if (o->entry.seqnum == seqnum) { | |
893 | if (ret) | |
894 | *ret = o; | |
895 | ||
896 | if (offset) | |
897 | *offset = p; | |
898 | ||
899 | return 1; | |
900 | ||
901 | } if (seqnum < o->entry.seqnum) | |
902 | return 0; | |
903 | ||
904 | if (o->entry.next_entry_offset == 0) | |
905 | return 0; | |
906 | ||
907 | p = le64toh(o->entry.next_entry_offset); | |
908 | } | |
909 | ||
910 | return 0; | |
911 | } | |
912 | ||
913 | int journal_file_next_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset) { | |
914 | uint64_t np; | |
915 | int r; | |
916 | ||
917 | assert(f); | |
918 | ||
919 | if (!o) | |
920 | np = le64toh(f->header->head_entry_offset); | |
921 | else { | |
922 | if (le64toh(o->object.type) != OBJECT_ENTRY) | |
923 | return -EINVAL; | |
924 | ||
925 | np = le64toh(o->entry.next_entry_offset); | |
926 | } | |
927 | ||
928 | if (np == 0) | |
929 | return 0; | |
930 | ||
931 | r = journal_file_move_to_object(f, np, &o); | |
932 | if (r < 0) | |
933 | return r; | |
934 | ||
935 | if (le64toh(o->object.type) != OBJECT_ENTRY) | |
936 | return -EBADMSG; | |
937 | ||
938 | if (ret) | |
939 | *ret = o; | |
940 | ||
941 | if (offset) | |
942 | *offset = np; | |
943 | ||
944 | return 1; | |
945 | } | |
946 | ||
947 | int journal_file_prev_entry(JournalFile *f, Object *o, Object **ret, uint64_t *offset) { | |
948 | uint64_t np; | |
949 | int r; | |
950 | ||
951 | assert(f); | |
952 | ||
953 | if (!o) | |
954 | np = le64toh(f->header->tail_entry_offset); | |
955 | else { | |
956 | if (le64toh(o->object.type) != OBJECT_ENTRY) | |
957 | return -EINVAL; | |
958 | ||
959 | np = le64toh(o->entry.prev_entry_offset); | |
960 | } | |
961 | ||
962 | if (np == 0) | |
963 | return 0; | |
964 | ||
965 | r = journal_file_move_to_object(f, np, &o); | |
966 | if (r < 0) | |
967 | return r; | |
968 | ||
969 | if (le64toh(o->object.type) != OBJECT_ENTRY) | |
970 | return -EBADMSG; | |
971 | ||
972 | if (ret) | |
973 | *ret = o; | |
974 | ||
975 | if (offset) | |
976 | *offset = np; | |
977 | ||
978 | return 1; | |
979 | } | |
980 | ||
981 | int journal_file_find_first_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { | |
982 | uint64_t p, osize, hash, h; | |
983 | int r; | |
984 | ||
985 | assert(f); | |
986 | assert(data || size == 0); | |
987 | ||
988 | osize = offsetof(Object, data.payload) + size; | |
989 | ||
990 | hash = hash64(data, size); | |
991 | h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); | |
992 | p = le64toh(f->hash_table[h].head_hash_offset); | |
993 | ||
994 | while (p != 0) { | |
995 | Object *o; | |
996 | ||
997 | r = journal_file_move_to_object(f, p, &o); | |
998 | if (r < 0) | |
999 | return r; | |
1000 | ||
1001 | if (le64toh(o->object.type) != OBJECT_DATA) | |
1002 | return -EBADMSG; | |
1003 | ||
1004 | if (le64toh(o->object.size) == osize && | |
1005 | memcmp(o->data.payload, data, size) == 0) { | |
1006 | ||
1007 | if (le64toh(o->data.hash) != hash) | |
1008 | return -EBADMSG; | |
1009 | ||
1010 | if (o->data.head_entry_offset == 0) | |
1011 | return 0; | |
1012 | ||
1013 | p = le64toh(o->data.head_entry_offset); | |
1014 | r = journal_file_move_to_object(f, p, &o); | |
1015 | if (r < 0) | |
1016 | return r; | |
1017 | ||
1018 | if (le64toh(o->object.type) != OBJECT_ENTRY) | |
1019 | return -EBADMSG; | |
1020 | ||
1021 | if (ret) | |
1022 | *ret = o; | |
1023 | ||
1024 | if (offset) | |
1025 | *offset = p; | |
1026 | ||
1027 | return 1; | |
1028 | } | |
1029 | ||
1030 | p = le64toh(o->data.next_hash_offset); | |
1031 | } | |
1032 | ||
1033 | return 0; | |
1034 | } | |
1035 | ||
1036 | int journal_file_find_last_entry(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset) { | |
1037 | uint64_t p, osize, hash, h; | |
1038 | int r; | |
1039 | ||
1040 | assert(f); | |
1041 | assert(data || size == 0); | |
1042 | ||
1043 | osize = offsetof(Object, data.payload) + size; | |
1044 | ||
1045 | hash = hash64(data, size); | |
1046 | h = hash % (le64toh(f->header->hash_table_size) / sizeof(HashItem)); | |
1047 | p = le64toh(f->hash_table[h].tail_hash_offset); | |
1048 | ||
1049 | while (p != 0) { | |
1050 | Object *o; | |
1051 | ||
1052 | r = journal_file_move_to_object(f, p, &o); | |
1053 | if (r < 0) | |
1054 | return r; | |
1055 | ||
1056 | if (le64toh(o->object.type) != OBJECT_DATA) | |
1057 | return -EBADMSG; | |
1058 | ||
1059 | if (le64toh(o->object.size) == osize && | |
1060 | memcmp(o->data.payload, data, size) == 0) { | |
1061 | ||
1062 | if (le64toh(o->data.hash) != hash) | |
1063 | return -EBADMSG; | |
1064 | ||
1065 | if (o->data.tail_entry_offset == 0) | |
1066 | return 0; | |
1067 | ||
1068 | p = le64toh(o->data.tail_entry_offset); | |
1069 | r = journal_file_move_to_object(f, p, &o); | |
1070 | if (r < 0) | |
1071 | return r; | |
1072 | ||
1073 | if (le64toh(o->object.type) != OBJECT_ENTRY) | |
1074 | return -EBADMSG; | |
1075 | ||
1076 | if (ret) | |
1077 | *ret = o; | |
1078 | ||
1079 | if (offset) | |
1080 | *offset = p; | |
1081 | ||
1082 | return 1; | |
1083 | } | |
1084 | ||
1085 | p = le64toh(o->data.prev_hash_offset); | |
1086 | } | |
1087 | ||
1088 | return 0; | |
1089 | } | |
1090 | ||
1091 | void journal_file_dump(JournalFile *f) { | |
1092 | char a[33], b[33], c[33]; | |
1093 | Object *o; | |
1094 | int r; | |
1095 | uint64_t p; | |
1096 | ||
1097 | assert(f); | |
1098 | ||
1099 | printf("File ID: %s\n" | |
1100 | "Machine ID: %s\n" | |
1101 | "Boot ID: %s\n" | |
1102 | "Arena size: %llu\n", | |
1103 | sd_id128_to_string(f->header->file_id, a), | |
1104 | sd_id128_to_string(f->header->machine_id, b), | |
1105 | sd_id128_to_string(f->header->boot_id, c), | |
1106 | (unsigned long long) le64toh(f->header->arena_size)); | |
1107 | ||
1108 | p = le64toh(f->header->head_object_offset); | |
1109 | while (p != 0) { | |
1110 | r = journal_file_move_to_object(f, p, &o); | |
1111 | if (r < 0) | |
1112 | goto fail; | |
1113 | ||
1114 | switch (o->object.type) { | |
1115 | ||
1116 | case OBJECT_UNUSED: | |
1117 | printf("Type: OBJECT_UNUSED\n"); | |
1118 | break; | |
1119 | ||
1120 | case OBJECT_DATA: | |
1121 | printf("Type: OBJECT_DATA\n"); | |
1122 | break; | |
1123 | ||
1124 | case OBJECT_ENTRY: | |
1125 | printf("Type: OBJECT_ENTRY %llu\n", (unsigned long long) le64toh(o->entry.seqnum)); | |
1126 | break; | |
1127 | ||
1128 | case OBJECT_HASH_TABLE: | |
1129 | printf("Type: OBJECT_HASH_TABLE\n"); | |
1130 | break; | |
1131 | ||
1132 | case OBJECT_BISECT_TABLE: | |
1133 | printf("Type: OBJECT_BISECT_TABLE\n"); | |
1134 | break; | |
1135 | } | |
1136 | ||
1137 | if (p == le64toh(f->header->tail_object_offset)) | |
1138 | p = 0; | |
1139 | else | |
1140 | p = p + ALIGN64(le64toh(o->object.size)); | |
1141 | } | |
1142 | ||
1143 | return; | |
1144 | fail: | |
1145 | log_error("File corrupt"); | |
1146 | } | |
1147 | ||
1148 | int journal_file_open( | |
1149 | sd_journal *j, | |
1150 | const char *fname, | |
1151 | int flags, | |
1152 | mode_t mode, | |
1153 | JournalFile **ret) { | |
1154 | ||
1155 | JournalFile *f; | |
1156 | int r; | |
1157 | bool newly_created = false; | |
1158 | ||
1159 | assert(fname); | |
1160 | ||
1161 | if ((flags & O_ACCMODE) != O_RDONLY && | |
1162 | (flags & O_ACCMODE) != O_RDWR) | |
1163 | return -EINVAL; | |
1164 | ||
1165 | f = new0(JournalFile, 1); | |
1166 | if (!f) | |
1167 | return -ENOMEM; | |
1168 | ||
1169 | f->writable = (flags & O_ACCMODE) != O_RDONLY; | |
1170 | f->prot = prot_from_flags(flags); | |
1171 | ||
1172 | f->fd = open(fname, flags|O_CLOEXEC, mode); | |
1173 | if (f->fd < 0) { | |
1174 | r = -errno; | |
1175 | goto fail; | |
1176 | } | |
1177 | ||
1178 | f->path = strdup(fname); | |
1179 | if (!f->path) { | |
1180 | r = -ENOMEM; | |
1181 | goto fail; | |
1182 | } | |
1183 | ||
1184 | if (fstat(f->fd, &f->last_stat) < 0) { | |
1185 | r = -errno; | |
1186 | goto fail; | |
1187 | } | |
1188 | ||
1189 | if (f->last_stat.st_size == 0 && f->writable) { | |
1190 | newly_created = true; | |
1191 | ||
1192 | r = journal_file_init_header(f); | |
1193 | if (r < 0) | |
1194 | goto fail; | |
1195 | ||
1196 | if (fstat(f->fd, &f->last_stat) < 0) { | |
1197 | r = -errno; | |
1198 | goto fail; | |
1199 | } | |
1200 | } | |
1201 | ||
1202 | if (f->last_stat.st_size < (off_t) sizeof(Header)) { | |
1203 | r = -EIO; | |
1204 | goto fail; | |
1205 | } | |
1206 | ||
1207 | f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0); | |
1208 | if (f->header == MAP_FAILED) { | |
1209 | f->header = NULL; | |
1210 | r = -errno; | |
1211 | goto fail; | |
1212 | } | |
1213 | ||
1214 | if (!newly_created) { | |
1215 | r = journal_file_verify_header(f); | |
1216 | if (r < 0) | |
1217 | goto fail; | |
1218 | } | |
1219 | ||
1220 | if (f->writable) { | |
1221 | r = journal_file_refresh_header(f); | |
1222 | if (r < 0) | |
1223 | goto fail; | |
1224 | } | |
1225 | ||
1226 | if (newly_created) { | |
1227 | ||
1228 | r = journal_file_setup_hash_table(f); | |
1229 | if (r < 0) | |
1230 | goto fail; | |
1231 | ||
1232 | r = journal_file_setup_bisect_table(f); | |
1233 | if (r < 0) | |
1234 | goto fail; | |
1235 | } | |
1236 | ||
1237 | r = journal_file_map_hash_table(f); | |
1238 | if (r < 0) | |
1239 | goto fail; | |
1240 | ||
1241 | r = journal_file_map_bisect_table(f); | |
1242 | if (r < 0) | |
1243 | goto fail; | |
1244 | ||
1245 | if (j) { | |
1246 | LIST_PREPEND(JournalFile, files, j->files, f); | |
1247 | f->journal = j; | |
1248 | } | |
1249 | ||
1250 | if (ret) | |
1251 | *ret = f; | |
1252 | ||
1253 | return 0; | |
1254 | ||
1255 | fail: | |
1256 | journal_file_close(f); | |
1257 | ||
1258 | return r; | |
1259 | } | |
1260 | ||
1261 | int sd_journal_open(sd_journal **ret) { | |
1262 | sd_journal *j; | |
1263 | char *fn; | |
1264 | const char *p; | |
1265 | int r = 0; | |
1266 | const char search_paths[] = | |
1267 | "/run/log/journal\0" | |
1268 | "/var/log/journal\0"; | |
1269 | ||
1270 | assert(ret); | |
1271 | ||
1272 | j = new0(sd_journal, 1); | |
1273 | if (!j) | |
1274 | return -ENOMEM; | |
1275 | ||
1276 | NULSTR_FOREACH(p, search_paths) { | |
1277 | DIR *d; | |
1278 | ||
1279 | d = opendir(p); | |
1280 | if (!d) { | |
1281 | if (errno != ENOENT && r == 0) | |
1282 | r = -errno; | |
1283 | ||
1284 | continue; | |
1285 | } | |
1286 | ||
1287 | for (;;) { | |
1288 | struct dirent buf, *de; | |
1289 | int k; | |
1290 | ||
1291 | k = readdir_r(d, &buf, &de); | |
1292 | if (k != 0) { | |
1293 | if (r == 0) | |
1294 | r = -k; | |
1295 | ||
1296 | break; | |
1297 | } | |
1298 | ||
1299 | if (!de) | |
1300 | break; | |
1301 | ||
1302 | if (!dirent_is_file_with_suffix(de, ".journal")) | |
1303 | continue; | |
1304 | ||
1305 | fn = join(p, "/", de->d_name, NULL); | |
1306 | if (!fn) { | |
1307 | r = -ENOMEM; | |
1308 | closedir(d); | |
1309 | goto fail; | |
1310 | } | |
1311 | ||
1312 | k = journal_file_open(j, fn, O_RDONLY, 0, NULL); | |
1313 | if (k < 0 && r == 0) | |
1314 | r = -k; | |
1315 | ||
1316 | free(fn); | |
1317 | } | |
1318 | } | |
1319 | ||
1320 | if (!j->files) { | |
1321 | if (r >= 0) | |
1322 | r = -ENOENT; | |
1323 | ||
1324 | goto fail; | |
1325 | } | |
1326 | ||
1327 | *ret = j; | |
1328 | return 0; | |
1329 | ||
1330 | fail: | |
1331 | sd_journal_close(j); | |
1332 | ||
1333 | return r; | |
1334 | }; | |
1335 | ||
1336 | void sd_journal_close(sd_journal *j) { | |
1337 | assert(j); | |
1338 | ||
1339 | while (j->files) | |
1340 | journal_file_close(j->files); | |
1341 | ||
1342 | free(j); | |
1343 | } |