]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/mmap-cache.c
tree-wide: sort includes
[thirdparty/systemd.git] / src / journal / mmap-cache.c
CommitLineData
16e9f408
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2012 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
16e9f408
LP
22#include <errno.h>
23#include <stdlib.h>
f8019684 24#include <sys/mman.h>
16e9f408 25
b5efdb8a 26#include "alloc-util.h"
f8019684
LP
27#include "hashmap.h"
28#include "list.h"
29#include "log.h"
f8019684 30#include "macro.h"
16e9f408 31#include "mmap-cache.h"
cf0fbc49
TA
32#include "sigbus.h"
33#include "util.h"
16e9f408 34
f8019684
LP
35typedef struct Window Window;
36typedef struct Context Context;
37typedef struct FileDescriptor FileDescriptor;
84168d80 38
f8019684
LP
39struct Window {
40 MMapCache *cache;
41
fa6ac760 42 bool invalidated;
1b8951e5 43 bool keep_always;
f8019684 44 bool in_unused;
16e9f408 45
68667801 46 int prot;
16e9f408
LP
47 void *ptr;
48 uint64_t offset;
f8019684
LP
49 size_t size;
50
51 FileDescriptor *fd;
16e9f408 52
f8019684
LP
53 LIST_FIELDS(Window, by_fd);
54 LIST_FIELDS(Window, unused);
55
56 LIST_HEAD(Context, contexts);
57};
16e9f408 58
f8019684
LP
59struct Context {
60 MMapCache *cache;
61 unsigned id;
62 Window *window;
16e9f408 63
f8019684
LP
64 LIST_FIELDS(Context, by_window);
65};
66
67struct FileDescriptor {
68 MMapCache *cache;
16e9f408 69 int fd;
fa6ac760 70 bool sigbus;
f8019684
LP
71 LIST_HEAD(Window, windows);
72};
16e9f408
LP
73
74struct MMapCache {
f8019684 75 int n_ref;
68667801 76 unsigned n_windows;
16e9f408 77
bf807d4d
LP
78 unsigned n_hit, n_missed;
79
80
f8019684 81 Hashmap *fds;
69adae51 82 Context *contexts[MMAP_CACHE_MAX_CONTEXTS];
16e9f408 83
f8019684
LP
84 LIST_HEAD(Window, unused);
85 Window *last_unused;
16e9f408
LP
86};
87
f8019684 88#define WINDOWS_MIN 64
fad5a6c6
MS
89
90#ifdef ENABLE_DEBUG_MMAP_CACHE
91/* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
92# define WINDOW_SIZE (page_size())
93#else
94# define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
95#endif
16e9f408 96
f8019684
LP
97MMapCache* mmap_cache_new(void) {
98 MMapCache *m;
16e9f408 99
f8019684
LP
100 m = new0(MMapCache, 1);
101 if (!m)
102 return NULL;
16e9f408 103
f8019684
LP
104 m->n_ref = 1;
105 return m;
16e9f408
LP
106}
107
f8019684 108MMapCache* mmap_cache_ref(MMapCache *m) {
16e9f408 109 assert(m);
f8019684 110 assert(m->n_ref > 0);
16e9f408 111
f8019684
LP
112 m->n_ref ++;
113 return m;
114}
f65425cb 115
f8019684
LP
116static void window_unlink(Window *w) {
117 Context *c;
f65425cb 118
f8019684 119 assert(w);
16e9f408 120
f8019684
LP
121 if (w->ptr)
122 munmap(w->ptr, w->size);
16e9f408 123
f8019684 124 if (w->fd)
71fda00f 125 LIST_REMOVE(by_fd, w->fd->windows, w);
16e9f408 126
f8019684
LP
127 if (w->in_unused) {
128 if (w->cache->last_unused == w)
129 w->cache->last_unused = w->unused_prev;
16e9f408 130
71fda00f 131 LIST_REMOVE(unused, w->cache->unused, w);
f65425cb 132 }
16e9f408 133
f8019684
LP
134 LIST_FOREACH(by_window, c, w->contexts) {
135 assert(c->window == w);
136 c->window = NULL;
f65425cb 137 }
16e9f408
LP
138}
139
fa6ac760
LP
140static void window_invalidate(Window *w) {
141 assert(w);
142
143 if (w->invalidated)
144 return;
145
146 /* Replace the window with anonymous pages. This is useful
147 * when we hit a SIGBUS and want to make sure the file cannot
148 * trigger any further SIGBUS, possibly overrunning the sigbus
149 * queue. */
150
151 assert_se(mmap(w->ptr, w->size, w->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
152 w->invalidated = true;
153}
154
f8019684
LP
155static void window_free(Window *w) {
156 assert(w);
f65425cb 157
f8019684 158 window_unlink(w);
89de6947 159 w->cache->n_windows--;
f8019684
LP
160 free(w);
161}
f65425cb 162
44a6b1b6 163_pure_ static bool window_matches(Window *w, int fd, int prot, uint64_t offset, size_t size) {
f8019684
LP
164 assert(w);
165 assert(fd >= 0);
166 assert(size > 0);
16e9f408 167
f8019684
LP
168 return
169 w->fd &&
170 fd == w->fd->fd &&
171 prot == w->prot &&
172 offset >= w->offset &&
173 offset + size <= w->offset + w->size;
16e9f408
LP
174}
175
f8019684
LP
176static Window *window_add(MMapCache *m) {
177 Window *w;
16e9f408
LP
178
179 assert(m);
16e9f408 180
f8019684 181 if (!m->last_unused || m->n_windows <= WINDOWS_MIN) {
f65425cb 182
f8019684
LP
183 /* Allocate a new window */
184 w = new0(Window, 1);
185 if (!w)
186 return NULL;
89de6947 187 m->n_windows++;
f65425cb 188 } else {
16e9f408 189
f8019684
LP
190 /* Reuse an existing one */
191 w = m->last_unused;
192 window_unlink(w);
193 zero(*w);
f65425cb 194 }
f8019684
LP
195
196 w->cache = m;
197 return w;
16e9f408
LP
198}
199
f8019684
LP
200static void context_detach_window(Context *c) {
201 Window *w;
16e9f408 202
f8019684 203 assert(c);
16e9f408 204
f8019684 205 if (!c->window)
16e9f408
LP
206 return;
207
f8019684
LP
208 w = c->window;
209 c->window = NULL;
71fda00f 210 LIST_REMOVE(by_window, w->contexts, c);
16e9f408 211
1b8951e5 212 if (!w->contexts && !w->keep_always) {
f8019684 213 /* Not used anymore? */
fad5a6c6
MS
214#ifdef ENABLE_DEBUG_MMAP_CACHE
215 /* Unmap unused windows immediately to expose use-after-unmap
216 * by SIGSEGV. */
217 window_free(w);
218#else
71fda00f 219 LIST_PREPEND(unused, c->cache->unused, w);
f8019684
LP
220 if (!c->cache->last_unused)
221 c->cache->last_unused = w;
16e9f408 222
f8019684 223 w->in_unused = true;
fad5a6c6 224#endif
f8019684 225 }
16e9f408
LP
226}
227
f8019684
LP
228static void context_attach_window(Context *c, Window *w) {
229 assert(c);
230 assert(w);
16e9f408 231
f8019684 232 if (c->window == w)
16e9f408
LP
233 return;
234
f8019684 235 context_detach_window(c);
16e9f408 236
e18021f7 237 if (w->in_unused) {
f8019684 238 /* Used again? */
71fda00f 239 LIST_REMOVE(unused, c->cache->unused, w);
a2ab7ee6
CG
240 if (c->cache->last_unused == w)
241 c->cache->last_unused = w->unused_prev;
16e9f408 242
f8019684
LP
243 w->in_unused = false;
244 }
f65425cb 245
f8019684 246 c->window = w;
71fda00f 247 LIST_PREPEND(by_window, w->contexts, c);
16e9f408
LP
248}
249
f8019684
LP
250static Context *context_add(MMapCache *m, unsigned id) {
251 Context *c;
16e9f408
LP
252
253 assert(m);
254
69adae51 255 c = m->contexts[id];
f8019684
LP
256 if (c)
257 return c;
258
f8019684
LP
259 c = new0(Context, 1);
260 if (!c)
261 return NULL;
16e9f408 262
f8019684
LP
263 c->cache = m;
264 c->id = id;
16e9f408 265
69adae51
MS
266 assert(!m->contexts[id]);
267 m->contexts[id] = c;
16e9f408 268
f8019684 269 return c;
16e9f408
LP
270}
271
f8019684
LP
272static void context_free(Context *c) {
273 assert(c);
16e9f408 274
f8019684 275 context_detach_window(c);
16e9f408 276
69adae51
MS
277 if (c->cache) {
278 assert(c->cache->contexts[c->id] == c);
279 c->cache->contexts[c->id] = NULL;
280 }
16e9f408 281
f8019684
LP
282 free(c);
283}
284
285static void fd_free(FileDescriptor *f) {
286 assert(f);
287
288 while (f->windows)
289 window_free(f->windows);
290
291 if (f->cache)
292 assert_se(hashmap_remove(f->cache->fds, INT_TO_PTR(f->fd + 1)));
293
294 free(f);
295}
296
297static FileDescriptor* fd_add(MMapCache *m, int fd) {
298 FileDescriptor *f;
299 int r;
300
301 assert(m);
302 assert(fd >= 0);
303
304 f = hashmap_get(m->fds, INT_TO_PTR(fd + 1));
305 if (f)
306 return f;
307
d5099efc 308 r = hashmap_ensure_allocated(&m->fds, NULL);
f8019684 309 if (r < 0)
16e9f408 310 return NULL;
16e9f408 311
f8019684
LP
312 f = new0(FileDescriptor, 1);
313 if (!f)
16e9f408 314 return NULL;
16e9f408 315
f8019684
LP
316 f->cache = m;
317 f->fd = fd;
318
319 r = hashmap_put(m->fds, UINT_TO_PTR(fd + 1), f);
320 if (r < 0) {
321 free(f);
16e9f408
LP
322 return NULL;
323 }
324
f8019684 325 return f;
16e9f408
LP
326}
327
f8019684 328static void mmap_cache_free(MMapCache *m) {
f8019684 329 FileDescriptor *f;
69adae51 330 int i;
f8019684 331
16e9f408 332 assert(m);
16e9f408 333
69adae51
MS
334 for (i = 0; i < MMAP_CACHE_MAX_CONTEXTS; i++)
335 if (m->contexts[i])
336 context_free(m->contexts[i]);
8e6d9397 337
f8019684
LP
338 while ((f = hashmap_first(m->fds)))
339 fd_free(f);
340
8e6d9397
GM
341 hashmap_free(m->fds);
342
f8019684
LP
343 while (m->unused)
344 window_free(m->unused);
345
346 free(m);
16e9f408
LP
347}
348
349MMapCache* mmap_cache_unref(MMapCache *m) {
350 assert(m);
351 assert(m->n_ref > 0);
352
f8019684
LP
353 m->n_ref --;
354 if (m->n_ref == 0)
16e9f408 355 mmap_cache_free(m);
16e9f408
LP
356
357 return NULL;
358}
359
f8019684
LP
360static int make_room(MMapCache *m) {
361 assert(m);
362
363 if (!m->last_unused)
364 return 0;
365
366 window_free(m->last_unused);
367 return 1;
368}
369
370static int try_context(
371 MMapCache *m,
372 int fd,
373 int prot,
374 unsigned context,
375 bool keep_always,
376 uint64_t offset,
377 size_t size,
1b8951e5 378 void **ret) {
f8019684
LP
379
380 Context *c;
f65425cb 381
16e9f408 382 assert(m);
f8019684
LP
383 assert(m->n_ref > 0);
384 assert(fd >= 0);
385 assert(size > 0);
1b8951e5 386 assert(ret);
16e9f408 387
69adae51 388 c = m->contexts[context];
f8019684 389 if (!c)
16e9f408 390 return 0;
16e9f408 391
f8019684 392 assert(c->id == context);
16e9f408 393
f8019684
LP
394 if (!c->window)
395 return 0;
f65425cb 396
f8019684 397 if (!window_matches(c->window, fd, prot, offset, size)) {
f65425cb 398
f8019684
LP
399 /* Drop the reference to the window, since it's unnecessary now */
400 context_detach_window(c);
401 return 0;
f65425cb
LP
402 }
403
fa6ac760
LP
404 if (c->window->fd->sigbus)
405 return -EIO;
406
1b8951e5 407 c->window->keep_always |= keep_always;
16e9f408 408
1b8951e5 409 *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset);
f8019684 410 return 1;
16e9f408
LP
411}
412
f8019684
LP
413static int find_mmap(
414 MMapCache *m,
415 int fd,
416 int prot,
417 unsigned context,
418 bool keep_always,
419 uint64_t offset,
420 size_t size,
1b8951e5 421 void **ret) {
f8019684
LP
422
423 FileDescriptor *f;
424 Window *w;
425 Context *c;
16e9f408
LP
426
427 assert(m);
f8019684
LP
428 assert(m->n_ref > 0);
429 assert(fd >= 0);
430 assert(size > 0);
16e9f408 431
f8019684
LP
432 f = hashmap_get(m->fds, INT_TO_PTR(fd + 1));
433 if (!f)
434 return 0;
16e9f408 435
f8019684 436 assert(f->fd == fd);
16e9f408 437
fa6ac760
LP
438 if (f->sigbus)
439 return -EIO;
440
f8019684
LP
441 LIST_FOREACH(by_fd, w, f->windows)
442 if (window_matches(w, fd, prot, offset, size))
443 break;
16e9f408 444
f8019684
LP
445 if (!w)
446 return 0;
447
448 c = context_add(m, context);
449 if (!c)
450 return -ENOMEM;
451
452 context_attach_window(c, w);
ae97089d 453 w->keep_always += keep_always;
16e9f408 454
1b8951e5 455 *ret = (uint8_t*) w->ptr + (offset - w->offset);
f8019684 456 return 1;
16e9f408
LP
457}
458
f8019684 459static int add_mmap(
16e9f408
LP
460 MMapCache *m,
461 int fd,
16e9f408
LP
462 int prot,
463 unsigned context,
fcde2389 464 bool keep_always,
16e9f408 465 uint64_t offset,
f8019684 466 size_t size,
fcde2389 467 struct stat *st,
1b8951e5 468 void **ret) {
16e9f408 469
16e9f408 470 uint64_t woffset, wsize;
f8019684
LP
471 Context *c;
472 FileDescriptor *f;
473 Window *w;
474 void *d;
16e9f408
LP
475 int r;
476
477 assert(m);
f8019684 478 assert(m->n_ref > 0);
16e9f408 479 assert(fd >= 0);
16e9f408 480 assert(size > 0);
1b8951e5 481 assert(ret);
16e9f408
LP
482
483 woffset = offset & ~((uint64_t) page_size() - 1ULL);
484 wsize = size + (offset - woffset);
485 wsize = PAGE_ALIGN(wsize);
486
487 if (wsize < WINDOW_SIZE) {
488 uint64_t delta;
489
beec0085 490 delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
16e9f408
LP
491
492 if (delta > offset)
493 woffset = 0;
494 else
495 woffset -= delta;
496
497 wsize = WINDOW_SIZE;
498 }
499
fcde2389
LP
500 if (st) {
501 /* Memory maps that are larger then the files
c5315881 502 underneath have undefined behavior. Hence, clamp
fcde2389
LP
503 things to the file size if we know it */
504
505 if (woffset >= (uint64_t) st->st_size)
506 return -EADDRNOTAVAIL;
507
508 if (woffset + wsize > (uint64_t) st->st_size)
509 wsize = PAGE_ALIGN(st->st_size - woffset);
510 }
511
16e9f408
LP
512 for (;;) {
513 d = mmap(NULL, wsize, prot, MAP_SHARED, fd, woffset);
514 if (d != MAP_FAILED)
515 break;
516 if (errno != ENOMEM)
517 return -errno;
518
f8019684 519 r = make_room(m);
16e9f408
LP
520 if (r < 0)
521 return r;
522 if (r == 0)
523 return -ENOMEM;
524 }
525
f8019684
LP
526 c = context_add(m, context);
527 if (!c)
b67ddc7b 528 goto outofmem;
16e9f408 529
f8019684
LP
530 f = fd_add(m, fd);
531 if (!f)
b67ddc7b 532 goto outofmem;
16e9f408 533
f8019684
LP
534 w = window_add(m);
535 if (!w)
b67ddc7b 536 goto outofmem;
16e9f408 537
f8019684
LP
538 w->keep_always = keep_always;
539 w->ptr = d;
540 w->offset = woffset;
541 w->prot = prot;
542 w->size = wsize;
543 w->fd = f;
16e9f408 544
71fda00f 545 LIST_PREPEND(by_fd, f->windows, w);
16e9f408 546
f8019684
LP
547 context_detach_window(c);
548 c->window = w;
71fda00f 549 LIST_PREPEND(by_window, w->contexts, c);
16e9f408 550
1b8951e5 551 *ret = (uint8_t*) w->ptr + (offset - w->offset);
16e9f408 552 return 1;
b67ddc7b
PDS
553
554outofmem:
555 munmap(d, wsize);
556 return -ENOMEM;
16e9f408
LP
557}
558
559int mmap_cache_get(
560 MMapCache *m,
561 int fd,
562 int prot,
563 unsigned context,
fcde2389 564 bool keep_always,
16e9f408 565 uint64_t offset,
f8019684 566 size_t size,
fcde2389 567 struct stat *st,
1b8951e5 568 void **ret) {
16e9f408 569
16e9f408
LP
570 int r;
571
572 assert(m);
f8019684 573 assert(m->n_ref > 0);
16e9f408 574 assert(fd >= 0);
16e9f408 575 assert(size > 0);
1b8951e5 576 assert(ret);
69adae51 577 assert(context < MMAP_CACHE_MAX_CONTEXTS);
16e9f408 578
f8019684 579 /* Check whether the current context is the right one already */
1b8951e5 580 r = try_context(m, fd, prot, context, keep_always, offset, size, ret);
bf807d4d
LP
581 if (r != 0) {
582 m->n_hit ++;
16e9f408 583 return r;
bf807d4d 584 }
16e9f408 585
f8019684 586 /* Search for a matching mmap */
1b8951e5 587 r = find_mmap(m, fd, prot, context, keep_always, offset, size, ret);
bf807d4d
LP
588 if (r != 0) {
589 m->n_hit ++;
16e9f408 590 return r;
bf807d4d
LP
591 }
592
593 m->n_missed++;
16e9f408 594
f8019684 595 /* Create a new mmap */
1b8951e5 596 return add_mmap(m, fd, prot, context, keep_always, offset, size, st, ret);
ae97089d
ZJS
597}
598
fa6ac760
LP
599unsigned mmap_cache_get_hit(MMapCache *m) {
600 assert(m);
601
602 return m->n_hit;
603}
604
605unsigned mmap_cache_get_missed(MMapCache *m) {
606 assert(m);
607
608 return m->n_missed;
609}
610
611static void mmap_cache_process_sigbus(MMapCache *m) {
612 bool found = false;
f8019684 613 FileDescriptor *f;
fa6ac760
LP
614 Iterator i;
615 int r;
16e9f408
LP
616
617 assert(m);
16e9f408 618
fa6ac760
LP
619 /* Iterate through all triggered pages and mark their files as
620 * invalidated */
621 for (;;) {
622 bool ours;
623 void *addr;
624
625 r = sigbus_pop(&addr);
626 if (_likely_(r == 0))
627 break;
628 if (r < 0) {
629 log_error_errno(r, "SIGBUS handling failed: %m");
630 abort();
631 }
632
633 ours = false;
634 HASHMAP_FOREACH(f, m->fds, i) {
635 Window *w;
636
637 LIST_FOREACH(by_fd, w, f->windows) {
638 if ((uint8_t*) addr >= (uint8_t*) w->ptr &&
639 (uint8_t*) addr < (uint8_t*) w->ptr + w->size) {
640 found = ours = f->sigbus = true;
641 break;
642 }
643 }
644
645 if (ours)
646 break;
647 }
648
649 /* Didn't find a matching window, give up */
650 if (!ours) {
651 log_error("Unknown SIGBUS page, aborting.");
652 abort();
653 }
654 }
655
656 /* The list of triggered pages is now empty. Now, let's remap
657 * all windows of the triggered file to anonymous maps, so
658 * that no page of the file in question is triggered again, so
659 * that we can be sure not to hit the queue size limit. */
660 if (_likely_(!found))
16e9f408 661 return;
16e9f408 662
fa6ac760
LP
663 HASHMAP_FOREACH(f, m->fds, i) {
664 Window *w;
665
666 if (!f->sigbus)
667 continue;
668
669 LIST_FOREACH(by_fd, w, f->windows)
670 window_invalidate(w);
671 }
f8019684 672}
16e9f408 673
fa6ac760
LP
674bool mmap_cache_got_sigbus(MMapCache *m, int fd) {
675 FileDescriptor *f;
676
bf807d4d 677 assert(m);
fa6ac760 678 assert(fd >= 0);
bf807d4d 679
fa6ac760
LP
680 mmap_cache_process_sigbus(m);
681
682 f = hashmap_get(m->fds, INT_TO_PTR(fd + 1));
683 if (!f)
684 return false;
685
686 return f->sigbus;
bf807d4d
LP
687}
688
fa6ac760
LP
689void mmap_cache_close_fd(MMapCache *m, int fd) {
690 FileDescriptor *f;
691
bf807d4d 692 assert(m);
fa6ac760 693 assert(fd >= 0);
bf807d4d 694
fa6ac760
LP
695 /* Make sure that any queued SIGBUS are first dispatched, so
696 * that we don't end up with a SIGBUS entry we cannot relate
697 * to any existing memory map */
698
699 mmap_cache_process_sigbus(m);
700
701 f = hashmap_get(m->fds, INT_TO_PTR(fd + 1));
702 if (!f)
703 return;
704
705 fd_free(f);
bf807d4d 706}