]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/mmap-cache.c
Merge pull request #2533 from keszybz/read-only-seed
[thirdparty/systemd.git] / src / journal / mmap-cache.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2012 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <stdlib.h>
24 #include <sys/mman.h>
25
26 #include "alloc-util.h"
27 #include "fd-util.h"
28 #include "hashmap.h"
29 #include "list.h"
30 #include "log.h"
31 #include "macro.h"
32 #include "mmap-cache.h"
33 #include "sigbus.h"
34 #include "util.h"
35
36 typedef struct Window Window;
37 typedef struct Context Context;
38 typedef struct FileDescriptor FileDescriptor;
39
40 struct Window {
41 MMapCache *cache;
42
43 bool invalidated:1;
44 bool keep_always:1;
45 bool in_unused:1;
46
47 int prot;
48 void *ptr;
49 uint64_t offset;
50 size_t size;
51
52 FileDescriptor *fd;
53
54 LIST_FIELDS(Window, by_fd);
55 LIST_FIELDS(Window, unused);
56
57 LIST_HEAD(Context, contexts);
58 };
59
60 struct Context {
61 MMapCache *cache;
62 unsigned id;
63 Window *window;
64
65 LIST_FIELDS(Context, by_window);
66 };
67
68 struct FileDescriptor {
69 MMapCache *cache;
70 int fd;
71 bool sigbus;
72 LIST_HEAD(Window, windows);
73 };
74
75 struct MMapCache {
76 int n_ref;
77 unsigned n_windows;
78
79 unsigned n_hit, n_missed;
80
81 Hashmap *fds;
82 Context *contexts[MMAP_CACHE_MAX_CONTEXTS];
83
84 LIST_HEAD(Window, unused);
85 Window *last_unused;
86 };
87
88 #define WINDOWS_MIN 64
89
90 #ifdef ENABLE_DEBUG_MMAP_CACHE
91 /* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
92 # define WINDOW_SIZE (page_size())
93 #else
94 # define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
95 #endif
96
97 MMapCache* mmap_cache_new(void) {
98 MMapCache *m;
99
100 m = new0(MMapCache, 1);
101 if (!m)
102 return NULL;
103
104 m->n_ref = 1;
105 return m;
106 }
107
108 MMapCache* mmap_cache_ref(MMapCache *m) {
109 assert(m);
110 assert(m->n_ref > 0);
111
112 m->n_ref ++;
113 return m;
114 }
115
116 static void window_unlink(Window *w) {
117 Context *c;
118
119 assert(w);
120
121 if (w->ptr)
122 munmap(w->ptr, w->size);
123
124 if (w->fd)
125 LIST_REMOVE(by_fd, w->fd->windows, w);
126
127 if (w->in_unused) {
128 if (w->cache->last_unused == w)
129 w->cache->last_unused = w->unused_prev;
130
131 LIST_REMOVE(unused, w->cache->unused, w);
132 }
133
134 LIST_FOREACH(by_window, c, w->contexts) {
135 assert(c->window == w);
136 c->window = NULL;
137 }
138 }
139
140 static void window_invalidate(Window *w) {
141 assert(w);
142
143 if (w->invalidated)
144 return;
145
146 /* Replace the window with anonymous pages. This is useful
147 * when we hit a SIGBUS and want to make sure the file cannot
148 * trigger any further SIGBUS, possibly overrunning the sigbus
149 * queue. */
150
151 assert_se(mmap(w->ptr, w->size, w->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
152 w->invalidated = true;
153 }
154
155 static void window_free(Window *w) {
156 assert(w);
157
158 window_unlink(w);
159 w->cache->n_windows--;
160 free(w);
161 }
162
163 _pure_ static bool window_matches(Window *w, int fd, int prot, uint64_t offset, size_t size) {
164 assert(w);
165 assert(fd >= 0);
166 assert(size > 0);
167
168 return
169 w->fd &&
170 fd == w->fd->fd &&
171 prot == w->prot &&
172 offset >= w->offset &&
173 offset + size <= w->offset + w->size;
174 }
175
176 static Window *window_add(MMapCache *m, FileDescriptor *fd, int prot, bool keep_always, uint64_t offset, size_t size, void *ptr) {
177 Window *w;
178
179 assert(m);
180 assert(fd);
181
182 if (!m->last_unused || m->n_windows <= WINDOWS_MIN) {
183
184 /* Allocate a new window */
185 w = new0(Window, 1);
186 if (!w)
187 return NULL;
188 m->n_windows++;
189 } else {
190
191 /* Reuse an existing one */
192 w = m->last_unused;
193 window_unlink(w);
194 zero(*w);
195 }
196
197 w->cache = m;
198 w->fd = fd;
199 w->prot = prot;
200 w->keep_always = keep_always;
201 w->offset = offset;
202 w->size = size;
203 w->ptr = ptr;
204
205 LIST_PREPEND(by_fd, fd->windows, w);
206
207 return w;
208 }
209
210 static void context_detach_window(Context *c) {
211 Window *w;
212
213 assert(c);
214
215 if (!c->window)
216 return;
217
218 w = c->window;
219 c->window = NULL;
220 LIST_REMOVE(by_window, w->contexts, c);
221
222 if (!w->contexts && !w->keep_always) {
223 /* Not used anymore? */
224 #ifdef ENABLE_DEBUG_MMAP_CACHE
225 /* Unmap unused windows immediately to expose use-after-unmap
226 * by SIGSEGV. */
227 window_free(w);
228 #else
229 LIST_PREPEND(unused, c->cache->unused, w);
230 if (!c->cache->last_unused)
231 c->cache->last_unused = w;
232
233 w->in_unused = true;
234 #endif
235 }
236 }
237
238 static void context_attach_window(Context *c, Window *w) {
239 assert(c);
240 assert(w);
241
242 if (c->window == w)
243 return;
244
245 context_detach_window(c);
246
247 if (w->in_unused) {
248 /* Used again? */
249 LIST_REMOVE(unused, c->cache->unused, w);
250 if (c->cache->last_unused == w)
251 c->cache->last_unused = w->unused_prev;
252
253 w->in_unused = false;
254 }
255
256 c->window = w;
257 LIST_PREPEND(by_window, w->contexts, c);
258 }
259
260 static Context *context_add(MMapCache *m, unsigned id) {
261 Context *c;
262
263 assert(m);
264
265 c = m->contexts[id];
266 if (c)
267 return c;
268
269 c = new0(Context, 1);
270 if (!c)
271 return NULL;
272
273 c->cache = m;
274 c->id = id;
275
276 assert(!m->contexts[id]);
277 m->contexts[id] = c;
278
279 return c;
280 }
281
282 static void context_free(Context *c) {
283 assert(c);
284
285 context_detach_window(c);
286
287 if (c->cache) {
288 assert(c->cache->contexts[c->id] == c);
289 c->cache->contexts[c->id] = NULL;
290 }
291
292 free(c);
293 }
294
295 static void fd_free(FileDescriptor *f) {
296 assert(f);
297
298 while (f->windows)
299 window_free(f->windows);
300
301 if (f->cache)
302 assert_se(hashmap_remove(f->cache->fds, FD_TO_PTR(f->fd)));
303
304 free(f);
305 }
306
307 static FileDescriptor* fd_add(MMapCache *m, int fd) {
308 FileDescriptor *f;
309 int r;
310
311 assert(m);
312 assert(fd >= 0);
313
314 f = hashmap_get(m->fds, FD_TO_PTR(fd));
315 if (f)
316 return f;
317
318 r = hashmap_ensure_allocated(&m->fds, NULL);
319 if (r < 0)
320 return NULL;
321
322 f = new0(FileDescriptor, 1);
323 if (!f)
324 return NULL;
325
326 f->cache = m;
327 f->fd = fd;
328
329 r = hashmap_put(m->fds, FD_TO_PTR(fd), f);
330 if (r < 0) {
331 free(f);
332 return NULL;
333 }
334
335 return f;
336 }
337
338 static void mmap_cache_free(MMapCache *m) {
339 FileDescriptor *f;
340 int i;
341
342 assert(m);
343
344 for (i = 0; i < MMAP_CACHE_MAX_CONTEXTS; i++)
345 if (m->contexts[i])
346 context_free(m->contexts[i]);
347
348 while ((f = hashmap_first(m->fds)))
349 fd_free(f);
350
351 hashmap_free(m->fds);
352
353 while (m->unused)
354 window_free(m->unused);
355
356 free(m);
357 }
358
359 MMapCache* mmap_cache_unref(MMapCache *m) {
360
361 if (!m)
362 return NULL;
363
364 assert(m->n_ref > 0);
365
366 m->n_ref --;
367 if (m->n_ref == 0)
368 mmap_cache_free(m);
369
370 return NULL;
371 }
372
373 static int make_room(MMapCache *m) {
374 assert(m);
375
376 if (!m->last_unused)
377 return 0;
378
379 window_free(m->last_unused);
380 return 1;
381 }
382
383 static int try_context(
384 MMapCache *m,
385 int fd,
386 int prot,
387 unsigned context,
388 bool keep_always,
389 uint64_t offset,
390 size_t size,
391 void **ret) {
392
393 Context *c;
394
395 assert(m);
396 assert(m->n_ref > 0);
397 assert(fd >= 0);
398 assert(size > 0);
399 assert(ret);
400
401 c = m->contexts[context];
402 if (!c)
403 return 0;
404
405 assert(c->id == context);
406
407 if (!c->window)
408 return 0;
409
410 if (!window_matches(c->window, fd, prot, offset, size)) {
411
412 /* Drop the reference to the window, since it's unnecessary now */
413 context_detach_window(c);
414 return 0;
415 }
416
417 if (c->window->fd->sigbus)
418 return -EIO;
419
420 c->window->keep_always = c->window->keep_always || keep_always;
421
422 *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset);
423 return 1;
424 }
425
426 static int find_mmap(
427 MMapCache *m,
428 int fd,
429 int prot,
430 unsigned context,
431 bool keep_always,
432 uint64_t offset,
433 size_t size,
434 void **ret) {
435
436 FileDescriptor *f;
437 Window *w;
438 Context *c;
439
440 assert(m);
441 assert(m->n_ref > 0);
442 assert(fd >= 0);
443 assert(size > 0);
444
445 f = hashmap_get(m->fds, FD_TO_PTR(fd));
446 if (!f)
447 return 0;
448
449 assert(f->fd == fd);
450
451 if (f->sigbus)
452 return -EIO;
453
454 LIST_FOREACH(by_fd, w, f->windows)
455 if (window_matches(w, fd, prot, offset, size))
456 break;
457
458 if (!w)
459 return 0;
460
461 c = context_add(m, context);
462 if (!c)
463 return -ENOMEM;
464
465 context_attach_window(c, w);
466 w->keep_always = w->keep_always || keep_always;
467
468 *ret = (uint8_t*) w->ptr + (offset - w->offset);
469 return 1;
470 }
471
472 static int mmap_try_harder(MMapCache *m, void *addr, int fd, int prot, int flags, uint64_t offset, size_t size, void **res) {
473 void *ptr;
474
475 assert(m);
476 assert(fd >= 0);
477 assert(res);
478
479 for (;;) {
480 int r;
481
482 ptr = mmap(addr, size, prot, flags, fd, offset);
483 if (ptr != MAP_FAILED)
484 break;
485 if (errno != ENOMEM)
486 return -errno;
487
488 r = make_room(m);
489 if (r < 0)
490 return r;
491 if (r == 0)
492 return -ENOMEM;
493 }
494
495 *res = ptr;
496 return 0;
497 }
498
499 static int add_mmap(
500 MMapCache *m,
501 int fd,
502 int prot,
503 unsigned context,
504 bool keep_always,
505 uint64_t offset,
506 size_t size,
507 struct stat *st,
508 void **ret) {
509
510 uint64_t woffset, wsize;
511 Context *c;
512 FileDescriptor *f;
513 Window *w;
514 void *d;
515 int r;
516
517 assert(m);
518 assert(m->n_ref > 0);
519 assert(fd >= 0);
520 assert(size > 0);
521 assert(ret);
522
523 woffset = offset & ~((uint64_t) page_size() - 1ULL);
524 wsize = size + (offset - woffset);
525 wsize = PAGE_ALIGN(wsize);
526
527 if (wsize < WINDOW_SIZE) {
528 uint64_t delta;
529
530 delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
531
532 if (delta > offset)
533 woffset = 0;
534 else
535 woffset -= delta;
536
537 wsize = WINDOW_SIZE;
538 }
539
540 if (st) {
541 /* Memory maps that are larger then the files
542 underneath have undefined behavior. Hence, clamp
543 things to the file size if we know it */
544
545 if (woffset >= (uint64_t) st->st_size)
546 return -EADDRNOTAVAIL;
547
548 if (woffset + wsize > (uint64_t) st->st_size)
549 wsize = PAGE_ALIGN(st->st_size - woffset);
550 }
551
552 r = mmap_try_harder(m, NULL, fd, prot, MAP_SHARED, woffset, wsize, &d);
553 if (r < 0)
554 return r;
555
556 c = context_add(m, context);
557 if (!c)
558 goto outofmem;
559
560 f = fd_add(m, fd);
561 if (!f)
562 goto outofmem;
563
564 w = window_add(m, f, prot, keep_always, woffset, wsize, d);
565 if (!w)
566 goto outofmem;
567
568 context_detach_window(c);
569 c->window = w;
570 LIST_PREPEND(by_window, w->contexts, c);
571
572 *ret = (uint8_t*) w->ptr + (offset - w->offset);
573 return 1;
574
575 outofmem:
576 munmap(d, wsize);
577 return -ENOMEM;
578 }
579
580 int mmap_cache_get(
581 MMapCache *m,
582 int fd,
583 int prot,
584 unsigned context,
585 bool keep_always,
586 uint64_t offset,
587 size_t size,
588 struct stat *st,
589 void **ret) {
590
591 int r;
592
593 assert(m);
594 assert(m->n_ref > 0);
595 assert(fd >= 0);
596 assert(size > 0);
597 assert(ret);
598 assert(context < MMAP_CACHE_MAX_CONTEXTS);
599
600 /* Check whether the current context is the right one already */
601 r = try_context(m, fd, prot, context, keep_always, offset, size, ret);
602 if (r != 0) {
603 m->n_hit ++;
604 return r;
605 }
606
607 /* Search for a matching mmap */
608 r = find_mmap(m, fd, prot, context, keep_always, offset, size, ret);
609 if (r != 0) {
610 m->n_hit ++;
611 return r;
612 }
613
614 m->n_missed++;
615
616 /* Create a new mmap */
617 return add_mmap(m, fd, prot, context, keep_always, offset, size, st, ret);
618 }
619
620 unsigned mmap_cache_get_hit(MMapCache *m) {
621 assert(m);
622
623 return m->n_hit;
624 }
625
626 unsigned mmap_cache_get_missed(MMapCache *m) {
627 assert(m);
628
629 return m->n_missed;
630 }
631
632 static void mmap_cache_process_sigbus(MMapCache *m) {
633 bool found = false;
634 FileDescriptor *f;
635 Iterator i;
636 int r;
637
638 assert(m);
639
640 /* Iterate through all triggered pages and mark their files as
641 * invalidated */
642 for (;;) {
643 bool ours;
644 void *addr;
645
646 r = sigbus_pop(&addr);
647 if (_likely_(r == 0))
648 break;
649 if (r < 0) {
650 log_error_errno(r, "SIGBUS handling failed: %m");
651 abort();
652 }
653
654 ours = false;
655 HASHMAP_FOREACH(f, m->fds, i) {
656 Window *w;
657
658 LIST_FOREACH(by_fd, w, f->windows) {
659 if ((uint8_t*) addr >= (uint8_t*) w->ptr &&
660 (uint8_t*) addr < (uint8_t*) w->ptr + w->size) {
661 found = ours = f->sigbus = true;
662 break;
663 }
664 }
665
666 if (ours)
667 break;
668 }
669
670 /* Didn't find a matching window, give up */
671 if (!ours) {
672 log_error("Unknown SIGBUS page, aborting.");
673 abort();
674 }
675 }
676
677 /* The list of triggered pages is now empty. Now, let's remap
678 * all windows of the triggered file to anonymous maps, so
679 * that no page of the file in question is triggered again, so
680 * that we can be sure not to hit the queue size limit. */
681 if (_likely_(!found))
682 return;
683
684 HASHMAP_FOREACH(f, m->fds, i) {
685 Window *w;
686
687 if (!f->sigbus)
688 continue;
689
690 LIST_FOREACH(by_fd, w, f->windows)
691 window_invalidate(w);
692 }
693 }
694
695 bool mmap_cache_got_sigbus(MMapCache *m, int fd) {
696 FileDescriptor *f;
697
698 assert(m);
699 assert(fd >= 0);
700
701 mmap_cache_process_sigbus(m);
702
703 f = hashmap_get(m->fds, FD_TO_PTR(fd));
704 if (!f)
705 return false;
706
707 return f->sigbus;
708 }
709
710 void mmap_cache_close_fd(MMapCache *m, int fd) {
711 FileDescriptor *f;
712
713 assert(m);
714 assert(fd >= 0);
715
716 /* Make sure that any queued SIGBUS are first dispatched, so
717 * that we don't end up with a SIGBUS entry we cannot relate
718 * to any existing memory map */
719
720 mmap_cache_process_sigbus(m);
721
722 f = hashmap_get(m->fds, FD_TO_PTR(fd));
723 if (!f)
724 return;
725
726 fd_free(f);
727 }