]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/mmap-cache.c
journal: parse fsprg seed
[thirdparty/systemd.git] / src / journal / mmap-cache.c
CommitLineData
16e9f408
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2012 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <assert.h>
23#include <sys/mman.h>
24#include <errno.h>
25#include <stdlib.h>
26#include <string.h>
27
28#include "util.h"
29
30#include "mmap-cache.h"
31
32#define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
33#define WINDOWS_MAX 32
34
35typedef struct Window {
36 int fd;
37 void *ptr;
38 uint64_t offset;
39 uint64_t size;
40
41 unsigned n_ref;
42 unsigned lru_prev;
43 unsigned lru_next;
44
45 unsigned by_fd_prev;
46 unsigned by_fd_next;
47} Window;
48
49typedef struct FileDescriptor {
50 int fd;
51 unsigned windows;
52} FileDescriptor;
53
54struct MMapCache {
55 unsigned n_ref;
56
57 unsigned contexts_max;
58 unsigned windows_max;
59 unsigned fds_max;
60
61 unsigned n_windows;
62 unsigned n_fds;
63
64 unsigned lru_first, lru_last;
65
66 Window *windows;
67 unsigned *by_context;
68 FileDescriptor *by_fd;
69};
70
f65425cb
LP
71static int mmap_cache_peek_fd_index(MMapCache *m, int fd, unsigned *fd_index);
72
16e9f408
LP
73static void mmap_cache_window_unmap(MMapCache *m, unsigned w) {
74 Window *v;
75
76 assert(m);
77 assert(w < m->n_windows);
78
79 v = m->windows + w;
80 if (!v->ptr)
81 return;
82
83 munmap(v->ptr, v->size);
84 v->ptr = NULL;
85}
86
87static void mmap_cache_window_add_lru(MMapCache *m, unsigned w) {
88 Window *v;
89
90 assert(m);
91 assert(w < m->n_windows);
92
93 v = m->windows + w;
f65425cb
LP
94 assert(v->n_ref == 0);
95
96 if (m->lru_last != (unsigned) -1) {
97 assert(m->windows[m->lru_last].lru_next == (unsigned) -1);
98 m->windows[m->lru_last].lru_next = w;
99 }
100
16e9f408
LP
101 v->lru_prev = m->lru_last;
102 v->lru_next = (unsigned) -1;
103
104 m->lru_last = w;
105 if (m->lru_first == (unsigned) -1)
106 m->lru_first = w;
107}
108
109static void mmap_cache_window_remove_lru(MMapCache *m, unsigned w) {
110 Window *v;
111
112 assert(m);
113 assert(w < m->n_windows);
114
115 v = m->windows + w;
116
f65425cb
LP
117 if (v->lru_prev == (unsigned) -1) {
118 assert(m->lru_first == w);
16e9f408 119 m->lru_first = v->lru_next;
f65425cb
LP
120 } else {
121 assert(m->windows[v->lru_prev].lru_next == w);
16e9f408 122 m->windows[v->lru_prev].lru_next = v->lru_next;
f65425cb 123 }
16e9f408 124
f65425cb
LP
125 if (v->lru_next == (unsigned) -1) {
126 assert(m->lru_last == w);
16e9f408 127 m->lru_last = v->lru_prev;
f65425cb
LP
128 } else {
129 assert(m->windows[v->lru_next].lru_prev == w);
16e9f408 130 m->windows[v->lru_next].lru_prev = v->lru_prev;
f65425cb 131 }
16e9f408
LP
132}
133
134static void mmap_cache_fd_add(MMapCache *m, unsigned fd_index, unsigned w) {
135 Window *v;
136
137 assert(m);
138 assert(fd_index < m->n_fds);
139
140 v = m->windows + w;
f65425cb
LP
141 assert(m->by_fd[fd_index].fd == v->fd);
142
143 if (m->by_fd[fd_index].windows != (unsigned) -1) {
144 assert(m->windows[m->by_fd[fd_index].windows].by_fd_prev == (unsigned) -1);
145 m->windows[m->by_fd[fd_index].windows].by_fd_prev = w;
146 }
147
16e9f408
LP
148 v->by_fd_next = m->by_fd[fd_index].windows;
149 v->by_fd_prev = (unsigned) -1;
150
151 m->by_fd[fd_index].windows = w;
152}
153
154static void mmap_cache_fd_remove(MMapCache *m, unsigned fd_index, unsigned w) {
155 Window *v;
156
157 assert(m);
158 assert(fd_index < m->n_fds);
159
160 v = m->windows + w;
f65425cb
LP
161 assert(m->by_fd[fd_index].fd == v->fd);
162 assert(v->by_fd_next == (unsigned) -1 || m->windows[v->by_fd_next].fd == v->fd);
163 assert(v->by_fd_prev == (unsigned) -1 || m->windows[v->by_fd_prev].fd == v->fd);
164
165 if (v->by_fd_prev == (unsigned) -1) {
166 assert(m->by_fd[fd_index].windows == w);
16e9f408 167 m->by_fd[fd_index].windows = v->by_fd_next;
f65425cb
LP
168 } else {
169 assert(m->windows[v->by_fd_prev].by_fd_next == w);
16e9f408 170 m->windows[v->by_fd_prev].by_fd_next = v->by_fd_next;
f65425cb 171 }
16e9f408 172
f65425cb
LP
173 if (v->by_fd_next != (unsigned) -1) {
174 assert(m->windows[v->by_fd_next].by_fd_prev == w);
16e9f408 175 m->windows[v->by_fd_next].by_fd_prev = v->by_fd_prev;
f65425cb 176 }
16e9f408
LP
177}
178
179static void mmap_cache_context_unset(MMapCache *m, unsigned c) {
180 Window *v;
181 unsigned w;
182
183 assert(m);
184 assert(c < m->contexts_max);
185
186 if (m->by_context[c] == (unsigned) -1)
187 return;
188
189 w = m->by_context[c];
190 m->by_context[c] = (unsigned) -1;
191
192 v = m->windows + w;
193 assert(v->n_ref > 0);
194 v->n_ref --;
195
196 if (v->n_ref == 0)
197 mmap_cache_window_add_lru(m, w);
198}
199
200static void mmap_cache_context_set(MMapCache *m, unsigned c, unsigned w) {
201 Window *v;
202
203 assert(m);
204 assert(c < m->contexts_max);
205 assert(w < m->n_windows);
206
207 if (m->by_context[c] == w)
208 return;
209
210 mmap_cache_context_unset(m, c);
211
212 m->by_context[c] = w;
213
214 v = m->windows + w;
215 v->n_ref ++;
f65425cb 216
16e9f408
LP
217 if (v->n_ref == 1)
218 mmap_cache_window_remove_lru(m, w);
219}
220
221static void mmap_cache_free(MMapCache *m) {
222
223 assert(m);
224
225 if (m->windows) {
226 unsigned w;
227
228 for (w = 0; w < m->n_windows; w++)
229 mmap_cache_window_unmap(m, w);
230
231 free(m->windows);
232 }
233
234 free(m->by_context);
235 free(m->by_fd);
236 free(m);
237}
238
239MMapCache* mmap_cache_new(unsigned contexts_max, unsigned fds_max) {
240 MMapCache *m;
241
242 assert(contexts_max > 0);
243 assert(fds_max > 0);
244
245 m = new0(MMapCache, 1);
246 if (!m)
247 return NULL;
248
249 m->contexts_max = contexts_max;
250 m->fds_max = fds_max;
251 m->windows_max = MAX(m->contexts_max, WINDOWS_MAX);
252 m->n_ref = 1;
253 m->lru_first = (unsigned) -1;
254 m->lru_last = (unsigned) -1;
255
256 m->windows = new(Window, m->windows_max);
257 if (!m->windows) {
258 mmap_cache_free(m);
259 return NULL;
260 }
261
262 m->by_context = new(unsigned, m->contexts_max);
263 if (!m->by_context) {
264 mmap_cache_free(m);
265 return NULL;
266 }
16e9f408
LP
267 memset(m->by_context, -1, m->contexts_max * sizeof(unsigned));
268
269 m->by_fd = new(FileDescriptor, m->fds_max);
270 if (!m->by_fd) {
271 mmap_cache_free(m);
272 return NULL;
273 }
274
275 return m;
276}
277
278MMapCache* mmap_cache_ref(MMapCache *m) {
279 assert(m);
280 assert(m->n_ref > 0);
281
282 m->n_ref++;
283 return m;
284}
285
286MMapCache* mmap_cache_unref(MMapCache *m) {
287 assert(m);
288 assert(m->n_ref > 0);
289
290 if (m->n_ref == 1)
291 mmap_cache_free(m);
292 else
293 m->n_ref--;
294
295 return NULL;
296}
297
298static int mmap_cache_allocate_window(MMapCache *m, unsigned *w) {
f65425cb
LP
299 Window *v;
300 unsigned fd_index;
301
16e9f408
LP
302 assert(m);
303 assert(w);
304
305 if (m->n_windows < m->windows_max) {
306 *w = m->n_windows ++;
307 return 0;
308 }
309
310 if (m->lru_first == (unsigned) -1)
311 return -E2BIG;
312
313 *w = m->lru_first;
f65425cb
LP
314 v = m->windows + *w;
315 assert(v->n_ref == 0);
316
16e9f408 317 mmap_cache_window_unmap(m, *w);
f65425cb
LP
318
319 if (v->fd >= 0) {
320 assert_se(mmap_cache_peek_fd_index(m, v->fd, &fd_index) > 0);
321 mmap_cache_fd_remove(m, fd_index, *w);
322 }
323
16e9f408
LP
324 mmap_cache_window_remove_lru(m, *w);
325
326 return 0;
327}
328
329static int mmap_cache_make_room(MMapCache *m) {
330 unsigned w;
331
332 assert(m);
333
334 w = m->lru_first;
335 while (w != (unsigned) -1) {
336 Window *v;
337
338 v = m->windows + w;
339
340 if (v->ptr) {
341 mmap_cache_window_unmap(m, w);
342 return 1;
343 }
344
345 w = v->lru_next;
346 }
347
348 return 0;
349}
350
351static int mmap_cache_put(
352 MMapCache *m,
353 int fd,
354 unsigned fd_index,
355 int prot,
356 unsigned context,
357 uint64_t offset,
358 uint64_t size,
359 void **ret) {
360
361 unsigned w;
362 Window *v;
363 void *d;
364 uint64_t woffset, wsize;
365 int r;
366
367 assert(m);
368 assert(fd >= 0);
369 assert(context < m->contexts_max);
370 assert(size > 0);
371 assert(ret);
372
373 woffset = offset & ~((uint64_t) page_size() - 1ULL);
374 wsize = size + (offset - woffset);
375 wsize = PAGE_ALIGN(wsize);
376
377 if (wsize < WINDOW_SIZE) {
378 uint64_t delta;
379
beec0085 380 delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
16e9f408
LP
381
382 if (delta > offset)
383 woffset = 0;
384 else
385 woffset -= delta;
386
387 wsize = WINDOW_SIZE;
388 }
389
390 for (;;) {
391 d = mmap(NULL, wsize, prot, MAP_SHARED, fd, woffset);
392 if (d != MAP_FAILED)
393 break;
394 if (errno != ENOMEM)
395 return -errno;
396
397 r = mmap_cache_make_room(m);
398 if (r < 0)
399 return r;
400 if (r == 0)
401 return -ENOMEM;
402 }
403
404 r = mmap_cache_allocate_window(m, &w);
405 if (r < 0) {
406 munmap(d, wsize);
407 return r;
408 }
409
410 v = m->windows + w;
411 v->fd = fd;
412 v->ptr = d;
413 v->offset = woffset;
414 v->size = wsize;
415
416 v->n_ref = 0;
f65425cb 417 mmap_cache_window_add_lru(m, w);
16e9f408
LP
418 mmap_cache_fd_add(m, fd_index, w);
419 mmap_cache_context_set(m, context, w);
420
421 *ret = (uint8_t*) d + (offset - woffset);
422 return 1;
423}
424
425static int fd_cmp(const void *_a, const void *_b) {
426 const FileDescriptor *a = _a, *b = _b;
427
428 if (a->fd < b->fd)
429 return -1;
430 if (a->fd > b->fd)
431 return 1;
432
433 return 0;
434}
435
f65425cb
LP
436static int mmap_cache_peek_fd_index(MMapCache *m, int fd, unsigned *fd_index) {
437 FileDescriptor *j;
438 unsigned r;
439
440 assert(m);
441 assert(fd >= 0);
442 assert(fd_index);
443
444 for (r = 0; r < m->n_fds; r++)
445 assert(m->by_fd[r].windows == (unsigned) -1 ||
446 m->windows[m->by_fd[r].windows].fd == m->by_fd[r].fd);
447
448 j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(FileDescriptor), fd_cmp);
449 if (!j)
450 return 0;
451
452 *fd_index = (unsigned) (j - m->by_fd);
453 return 1;
454}
455
16e9f408
LP
456static int mmap_cache_get_fd_index(MMapCache *m, int fd, unsigned *fd_index) {
457 FileDescriptor *j;
f65425cb 458 int r;
16e9f408
LP
459
460 assert(m);
461 assert(fd >= 0);
462 assert(fd_index);
463
f65425cb
LP
464 r = mmap_cache_peek_fd_index(m, fd, fd_index);
465 if (r != 0)
466 return r;
16e9f408 467
f65425cb
LP
468 if (m->n_fds >= m->fds_max)
469 return -E2BIG;
16e9f408 470
f65425cb
LP
471 j = m->by_fd + m->n_fds ++;
472 j->fd = fd;
473 j->windows = (unsigned) -1;
16e9f408 474
f65425cb
LP
475 qsort(m->by_fd, m->n_fds, sizeof(FileDescriptor), fd_cmp);
476
477 return mmap_cache_peek_fd_index(m, fd, fd_index);
16e9f408
LP
478}
479
480static bool mmap_cache_test_window(
481 MMapCache *m,
482 unsigned w,
483 uint64_t offset,
484 uint64_t size) {
485 Window *v;
486
487 assert(m);
488 assert(w < m->n_windows);
489 assert(size > 0);
490
491 v = m->windows + w;
492
493 return offset >= v->offset &&
494 offset + size <= v->offset + v->size;
495}
496
497static int mmap_cache_current(
498 MMapCache *m,
499 int fd,
500 unsigned context,
501 uint64_t offset,
502 uint64_t size,
503 void **ret) {
504
505 Window *v;
506 unsigned w;
507
508 assert(m);
509 assert(fd >= 0);
510 assert(context < m->contexts_max);
511 assert(size > 0);
512 assert(ret);
513
514 if (m->by_context[context] == (unsigned) -1)
515 return 0;
516
517 w = m->by_context[context];
518 v = m->windows + w;
519
520 if (v->fd != fd)
521 return 0;
522
523 if (!mmap_cache_test_window(m, w, offset, size))
524 return 0;
525
526 *ret = (uint8_t*) v->ptr + (offset - v->offset);
527 return 1;
528}
529
530static int mmap_cache_find(
531 MMapCache *m,
f65425cb 532 int fd,
16e9f408
LP
533 unsigned fd_index,
534 unsigned context,
535 uint64_t offset,
536 uint64_t size,
537 void **ret) {
538
539 Window *v = NULL;
540 unsigned w;
541
542 assert(m);
f65425cb 543 assert(fd >= 0);
16e9f408
LP
544 assert(fd_index < m->n_fds);
545 assert(context < m->contexts_max);
546 assert(size > 0);
547 assert(ret);
548
549 w = m->by_fd[fd_index].windows;
550 while (w != (unsigned) -1) {
f65425cb
LP
551 v = m->windows + w;
552 assert(v->fd == fd);
553
16e9f408
LP
554 if (mmap_cache_test_window(m, w, offset, size))
555 break;
556
f65425cb 557 w = v->by_fd_next;
16e9f408
LP
558 }
559
560 if (w == (unsigned) -1)
561 return 0;
562
563 mmap_cache_context_set(m, context, w);
564
16e9f408
LP
565 *ret = (uint8_t*) v->ptr + (offset - v->offset);
566 return 1;
567}
568
569int mmap_cache_get(
570 MMapCache *m,
571 int fd,
572 int prot,
573 unsigned context,
574 uint64_t offset,
575 uint64_t size,
576 void **ret) {
577
578 unsigned fd_index;
579 int r;
580
581 assert(m);
582 assert(fd >= 0);
583 assert(context < m->contexts_max);
584 assert(size > 0);
585 assert(ret);
586
587 /* Maybe the current pointer for this context is already the
588 * right one? */
589 r = mmap_cache_current(m, fd, context, offset, size, ret);
590 if (r != 0)
591 return r;
592
f65425cb
LP
593 /* Hmm, drop the reference to the current one, since it wasn't
594 * good enough */
595 mmap_cache_context_unset(m, context);
596
16e9f408
LP
597 /* OK, let's find the chain for this FD */
598 r = mmap_cache_get_fd_index(m, fd, &fd_index);
599 if (r < 0)
600 return r;
601
602 /* And let's look through the available mmaps */
f65425cb 603 r = mmap_cache_find(m, fd, fd_index, context, offset, size, ret);
16e9f408
LP
604 if (r != 0)
605 return r;
606
607 /* Not found? Then, let's add it */
608 return mmap_cache_put(m, fd, fd_index, prot, context, offset, size, ret);
609}
610
611void mmap_cache_close_fd(MMapCache *m, int fd) {
16e9f408 612 unsigned fd_index, c, w;
f65425cb 613 int r;
16e9f408
LP
614
615 assert(m);
616 assert(fd > 0);
617
f65425cb
LP
618 r = mmap_cache_peek_fd_index(m, fd, &fd_index);
619 if (r <= 0)
16e9f408 620 return;
16e9f408
LP
621
622 for (c = 0; c < m->contexts_max; c++) {
623 w = m->by_context[c];
624 if (w == (unsigned) -1)
625 continue;
626
627 if (m->windows[w].fd == fd)
628 mmap_cache_context_unset(m, c);
629 }
630
631 w = m->by_fd[fd_index].windows;
632 while (w != (unsigned) -1) {
f65425cb
LP
633 Window *v;
634
635 v = m->windows + w;
636 assert(v->fd == fd);
16e9f408 637
16e9f408 638 mmap_cache_window_unmap(m, w);
f65425cb
LP
639 mmap_cache_fd_remove(m, fd_index, w);
640 v->fd = -1;
16e9f408
LP
641
642 w = m->by_fd[fd_index].windows;
643 }
644
645 memmove(m->by_fd + fd_index, m->by_fd + fd_index + 1, (m->n_fds - (fd_index + 1)) * sizeof(FileDescriptor));
646 m->n_fds --;
647}
648
f65425cb
LP
649void mmap_cache_close_fd_range(MMapCache *m, int fd, uint64_t p) {
650 unsigned fd_index, c, w;
651 int r;
652
653 assert(m);
654 assert(fd > 0);
655
656 /* This drops all windows that include space right of the
657 * specified offset. This is useful to ensure that after the
658 * file size is extended we drop our mappings of the end and
659 * create it anew, since otherwise it is undefined whether
660 * mapping will continue to work as intended. */
661
662 r = mmap_cache_peek_fd_index(m, fd, &fd_index);
663 if (r <= 0)
664 return;
665
666 for (c = 0; c < m->contexts_max; c++) {
667 w = m->by_context[c];
668
669 if (w != (unsigned) -1 && m->windows[w].fd == fd)
670 mmap_cache_context_unset(m, c);
671 }
672
673 w = m->by_fd[fd_index].windows;
674 while (w != (unsigned) -1) {
675 Window *v;
676
677 v = m->windows + w;
678 assert(v->fd == fd);
679 assert(v->by_fd_next == (unsigned) -1 ||
680 m->windows[v->by_fd_next].fd == fd);
681
682 if (v->offset + v->size > p) {
683
684 mmap_cache_window_unmap(m, w);
685 mmap_cache_fd_remove(m, fd_index, w);
686 v->fd = -1;
687
688 w = m->by_fd[fd_index].windows;
689 } else
690 w = v->by_fd_next;
691 }
692}
693
16e9f408
LP
694void mmap_cache_close_context(MMapCache *m, unsigned context) {
695 mmap_cache_context_unset(m, context);
696}