]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/mmap-cache.c
Merge pull request #6300 from keszybz/refuse-to-load-some-units
[thirdparty/systemd.git] / src / journal / mmap-cache.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2012 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <errno.h>
21 #include <stdlib.h>
22 #include <sys/mman.h>
23
24 #include "alloc-util.h"
25 #include "fd-util.h"
26 #include "hashmap.h"
27 #include "list.h"
28 #include "log.h"
29 #include "macro.h"
30 #include "mmap-cache.h"
31 #include "sigbus.h"
32 #include "util.h"
33
34 typedef struct Window Window;
35 typedef struct Context Context;
36
37 struct Window {
38 MMapCache *cache;
39
40 bool invalidated:1;
41 bool keep_always:1;
42 bool in_unused:1;
43
44 int prot;
45 void *ptr;
46 uint64_t offset;
47 size_t size;
48
49 MMapFileDescriptor *fd;
50
51 LIST_FIELDS(Window, by_fd);
52 LIST_FIELDS(Window, unused);
53
54 LIST_HEAD(Context, contexts);
55 };
56
57 struct Context {
58 MMapCache *cache;
59 unsigned id;
60 Window *window;
61
62 LIST_FIELDS(Context, by_window);
63 };
64
65 struct MMapFileDescriptor {
66 MMapCache *cache;
67 int fd;
68 bool sigbus;
69 LIST_HEAD(Window, windows);
70 };
71
72 struct MMapCache {
73 int n_ref;
74 unsigned n_windows;
75
76 unsigned n_hit, n_missed;
77
78 Hashmap *fds;
79 Context *contexts[MMAP_CACHE_MAX_CONTEXTS];
80
81 LIST_HEAD(Window, unused);
82 Window *last_unused;
83 };
84
85 #define WINDOWS_MIN 64
86
87 #ifdef ENABLE_DEBUG_MMAP_CACHE
88 /* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
89 # define WINDOW_SIZE (page_size())
90 #else
91 # define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
92 #endif
93
94 MMapCache* mmap_cache_new(void) {
95 MMapCache *m;
96
97 m = new0(MMapCache, 1);
98 if (!m)
99 return NULL;
100
101 m->n_ref = 1;
102 return m;
103 }
104
105 MMapCache* mmap_cache_ref(MMapCache *m) {
106 assert(m);
107 assert(m->n_ref > 0);
108
109 m->n_ref++;
110 return m;
111 }
112
113 static void window_unlink(Window *w) {
114 Context *c;
115
116 assert(w);
117
118 if (w->ptr)
119 munmap(w->ptr, w->size);
120
121 if (w->fd)
122 LIST_REMOVE(by_fd, w->fd->windows, w);
123
124 if (w->in_unused) {
125 if (w->cache->last_unused == w)
126 w->cache->last_unused = w->unused_prev;
127
128 LIST_REMOVE(unused, w->cache->unused, w);
129 }
130
131 LIST_FOREACH(by_window, c, w->contexts) {
132 assert(c->window == w);
133 c->window = NULL;
134 }
135 }
136
137 static void window_invalidate(Window *w) {
138 assert(w);
139
140 if (w->invalidated)
141 return;
142
143 /* Replace the window with anonymous pages. This is useful
144 * when we hit a SIGBUS and want to make sure the file cannot
145 * trigger any further SIGBUS, possibly overrunning the sigbus
146 * queue. */
147
148 assert_se(mmap(w->ptr, w->size, w->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
149 w->invalidated = true;
150 }
151
152 static void window_free(Window *w) {
153 assert(w);
154
155 window_unlink(w);
156 w->cache->n_windows--;
157 free(w);
158 }
159
160 _pure_ static bool window_matches(Window *w, MMapFileDescriptor *f, int prot, uint64_t offset, size_t size) {
161 assert(w);
162 assert(f);
163 assert(size > 0);
164
165 return
166 w->fd &&
167 f->fd == w->fd->fd &&
168 prot == w->prot &&
169 offset >= w->offset &&
170 offset + size <= w->offset + w->size;
171 }
172
173 static Window *window_add(MMapCache *m, MMapFileDescriptor *f, int prot, bool keep_always, uint64_t offset, size_t size, void *ptr) {
174 Window *w;
175
176 assert(m);
177 assert(f);
178
179 if (!m->last_unused || m->n_windows <= WINDOWS_MIN) {
180
181 /* Allocate a new window */
182 w = new0(Window, 1);
183 if (!w)
184 return NULL;
185 m->n_windows++;
186 } else {
187
188 /* Reuse an existing one */
189 w = m->last_unused;
190 window_unlink(w);
191 zero(*w);
192 }
193
194 w->cache = m;
195 w->fd = f;
196 w->prot = prot;
197 w->keep_always = keep_always;
198 w->offset = offset;
199 w->size = size;
200 w->ptr = ptr;
201
202 LIST_PREPEND(by_fd, f->windows, w);
203
204 return w;
205 }
206
207 static void context_detach_window(Context *c) {
208 Window *w;
209
210 assert(c);
211
212 if (!c->window)
213 return;
214
215 w = c->window;
216 c->window = NULL;
217 LIST_REMOVE(by_window, w->contexts, c);
218
219 if (!w->contexts && !w->keep_always) {
220 /* Not used anymore? */
221 #ifdef ENABLE_DEBUG_MMAP_CACHE
222 /* Unmap unused windows immediately to expose use-after-unmap
223 * by SIGSEGV. */
224 window_free(w);
225 #else
226 LIST_PREPEND(unused, c->cache->unused, w);
227 if (!c->cache->last_unused)
228 c->cache->last_unused = w;
229
230 w->in_unused = true;
231 #endif
232 }
233 }
234
235 static void context_attach_window(Context *c, Window *w) {
236 assert(c);
237 assert(w);
238
239 if (c->window == w)
240 return;
241
242 context_detach_window(c);
243
244 if (w->in_unused) {
245 /* Used again? */
246 LIST_REMOVE(unused, c->cache->unused, w);
247 if (c->cache->last_unused == w)
248 c->cache->last_unused = w->unused_prev;
249
250 w->in_unused = false;
251 }
252
253 c->window = w;
254 LIST_PREPEND(by_window, w->contexts, c);
255 }
256
257 static Context *context_add(MMapCache *m, unsigned id) {
258 Context *c;
259
260 assert(m);
261
262 c = m->contexts[id];
263 if (c)
264 return c;
265
266 c = new0(Context, 1);
267 if (!c)
268 return NULL;
269
270 c->cache = m;
271 c->id = id;
272
273 assert(!m->contexts[id]);
274 m->contexts[id] = c;
275
276 return c;
277 }
278
279 static void context_free(Context *c) {
280 assert(c);
281
282 context_detach_window(c);
283
284 if (c->cache) {
285 assert(c->cache->contexts[c->id] == c);
286 c->cache->contexts[c->id] = NULL;
287 }
288
289 free(c);
290 }
291
292 static void mmap_cache_free(MMapCache *m) {
293 int i;
294
295 assert(m);
296
297 for (i = 0; i < MMAP_CACHE_MAX_CONTEXTS; i++)
298 if (m->contexts[i])
299 context_free(m->contexts[i]);
300
301 hashmap_free(m->fds);
302
303 while (m->unused)
304 window_free(m->unused);
305
306 free(m);
307 }
308
309 MMapCache* mmap_cache_unref(MMapCache *m) {
310
311 if (!m)
312 return NULL;
313
314 assert(m->n_ref > 0);
315
316 m->n_ref--;
317 if (m->n_ref == 0)
318 mmap_cache_free(m);
319
320 return NULL;
321 }
322
323 static int make_room(MMapCache *m) {
324 assert(m);
325
326 if (!m->last_unused)
327 return 0;
328
329 window_free(m->last_unused);
330 return 1;
331 }
332
333 static int try_context(
334 MMapCache *m,
335 MMapFileDescriptor *f,
336 int prot,
337 unsigned context,
338 bool keep_always,
339 uint64_t offset,
340 size_t size,
341 void **ret) {
342
343 Context *c;
344
345 assert(m);
346 assert(m->n_ref > 0);
347 assert(f);
348 assert(size > 0);
349 assert(ret);
350
351 c = m->contexts[context];
352 if (!c)
353 return 0;
354
355 assert(c->id == context);
356
357 if (!c->window)
358 return 0;
359
360 if (!window_matches(c->window, f, prot, offset, size)) {
361
362 /* Drop the reference to the window, since it's unnecessary now */
363 context_detach_window(c);
364 return 0;
365 }
366
367 if (c->window->fd->sigbus)
368 return -EIO;
369
370 c->window->keep_always = c->window->keep_always || keep_always;
371
372 *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset);
373 return 1;
374 }
375
376 static int find_mmap(
377 MMapCache *m,
378 MMapFileDescriptor *f,
379 int prot,
380 unsigned context,
381 bool keep_always,
382 uint64_t offset,
383 size_t size,
384 void **ret) {
385
386 Window *w;
387 Context *c;
388
389 assert(m);
390 assert(m->n_ref > 0);
391 assert(f);
392 assert(size > 0);
393
394 if (f->sigbus)
395 return -EIO;
396
397 LIST_FOREACH(by_fd, w, f->windows)
398 if (window_matches(w, f, prot, offset, size))
399 break;
400
401 if (!w)
402 return 0;
403
404 c = context_add(m, context);
405 if (!c)
406 return -ENOMEM;
407
408 context_attach_window(c, w);
409 w->keep_always = w->keep_always || keep_always;
410
411 *ret = (uint8_t*) w->ptr + (offset - w->offset);
412 return 1;
413 }
414
415 static int mmap_try_harder(MMapCache *m, void *addr, MMapFileDescriptor *f, int prot, int flags, uint64_t offset, size_t size, void **res) {
416 void *ptr;
417
418 assert(m);
419 assert(f);
420 assert(res);
421
422 for (;;) {
423 int r;
424
425 ptr = mmap(addr, size, prot, flags, f->fd, offset);
426 if (ptr != MAP_FAILED)
427 break;
428 if (errno != ENOMEM)
429 return negative_errno();
430
431 r = make_room(m);
432 if (r < 0)
433 return r;
434 if (r == 0)
435 return -ENOMEM;
436 }
437
438 *res = ptr;
439 return 0;
440 }
441
442 static int add_mmap(
443 MMapCache *m,
444 MMapFileDescriptor *f,
445 int prot,
446 unsigned context,
447 bool keep_always,
448 uint64_t offset,
449 size_t size,
450 struct stat *st,
451 void **ret) {
452
453 uint64_t woffset, wsize;
454 Context *c;
455 Window *w;
456 void *d;
457 int r;
458
459 assert(m);
460 assert(m->n_ref > 0);
461 assert(f);
462 assert(size > 0);
463 assert(ret);
464
465 woffset = offset & ~((uint64_t) page_size() - 1ULL);
466 wsize = size + (offset - woffset);
467 wsize = PAGE_ALIGN(wsize);
468
469 if (wsize < WINDOW_SIZE) {
470 uint64_t delta;
471
472 delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
473
474 if (delta > offset)
475 woffset = 0;
476 else
477 woffset -= delta;
478
479 wsize = WINDOW_SIZE;
480 }
481
482 if (st) {
483 /* Memory maps that are larger then the files
484 underneath have undefined behavior. Hence, clamp
485 things to the file size if we know it */
486
487 if (woffset >= (uint64_t) st->st_size)
488 return -EADDRNOTAVAIL;
489
490 if (woffset + wsize > (uint64_t) st->st_size)
491 wsize = PAGE_ALIGN(st->st_size - woffset);
492 }
493
494 r = mmap_try_harder(m, NULL, f, prot, MAP_SHARED, woffset, wsize, &d);
495 if (r < 0)
496 return r;
497
498 c = context_add(m, context);
499 if (!c)
500 goto outofmem;
501
502 w = window_add(m, f, prot, keep_always, woffset, wsize, d);
503 if (!w)
504 goto outofmem;
505
506 context_detach_window(c);
507 c->window = w;
508 LIST_PREPEND(by_window, w->contexts, c);
509
510 *ret = (uint8_t*) w->ptr + (offset - w->offset);
511 return 1;
512
513 outofmem:
514 (void) munmap(d, wsize);
515 return -ENOMEM;
516 }
517
518 int mmap_cache_get(
519 MMapCache *m,
520 MMapFileDescriptor *f,
521 int prot,
522 unsigned context,
523 bool keep_always,
524 uint64_t offset,
525 size_t size,
526 struct stat *st,
527 void **ret) {
528
529 int r;
530
531 assert(m);
532 assert(m->n_ref > 0);
533 assert(f);
534 assert(size > 0);
535 assert(ret);
536 assert(context < MMAP_CACHE_MAX_CONTEXTS);
537
538 /* Check whether the current context is the right one already */
539 r = try_context(m, f, prot, context, keep_always, offset, size, ret);
540 if (r != 0) {
541 m->n_hit++;
542 return r;
543 }
544
545 /* Search for a matching mmap */
546 r = find_mmap(m, f, prot, context, keep_always, offset, size, ret);
547 if (r != 0) {
548 m->n_hit++;
549 return r;
550 }
551
552 m->n_missed++;
553
554 /* Create a new mmap */
555 return add_mmap(m, f, prot, context, keep_always, offset, size, st, ret);
556 }
557
558 unsigned mmap_cache_get_hit(MMapCache *m) {
559 assert(m);
560
561 return m->n_hit;
562 }
563
564 unsigned mmap_cache_get_missed(MMapCache *m) {
565 assert(m);
566
567 return m->n_missed;
568 }
569
570 static void mmap_cache_process_sigbus(MMapCache *m) {
571 bool found = false;
572 MMapFileDescriptor *f;
573 Iterator i;
574 int r;
575
576 assert(m);
577
578 /* Iterate through all triggered pages and mark their files as
579 * invalidated */
580 for (;;) {
581 bool ours;
582 void *addr;
583
584 r = sigbus_pop(&addr);
585 if (_likely_(r == 0))
586 break;
587 if (r < 0) {
588 log_error_errno(r, "SIGBUS handling failed: %m");
589 abort();
590 }
591
592 ours = false;
593 HASHMAP_FOREACH(f, m->fds, i) {
594 Window *w;
595
596 LIST_FOREACH(by_fd, w, f->windows) {
597 if ((uint8_t*) addr >= (uint8_t*) w->ptr &&
598 (uint8_t*) addr < (uint8_t*) w->ptr + w->size) {
599 found = ours = f->sigbus = true;
600 break;
601 }
602 }
603
604 if (ours)
605 break;
606 }
607
608 /* Didn't find a matching window, give up */
609 if (!ours) {
610 log_error("Unknown SIGBUS page, aborting.");
611 abort();
612 }
613 }
614
615 /* The list of triggered pages is now empty. Now, let's remap
616 * all windows of the triggered file to anonymous maps, so
617 * that no page of the file in question is triggered again, so
618 * that we can be sure not to hit the queue size limit. */
619 if (_likely_(!found))
620 return;
621
622 HASHMAP_FOREACH(f, m->fds, i) {
623 Window *w;
624
625 if (!f->sigbus)
626 continue;
627
628 LIST_FOREACH(by_fd, w, f->windows)
629 window_invalidate(w);
630 }
631 }
632
633 bool mmap_cache_got_sigbus(MMapCache *m, MMapFileDescriptor *f) {
634 assert(m);
635 assert(f);
636
637 mmap_cache_process_sigbus(m);
638
639 return f->sigbus;
640 }
641
642 MMapFileDescriptor* mmap_cache_add_fd(MMapCache *m, int fd) {
643 MMapFileDescriptor *f;
644 int r;
645
646 assert(m);
647 assert(fd >= 0);
648
649 f = hashmap_get(m->fds, FD_TO_PTR(fd));
650 if (f)
651 return f;
652
653 r = hashmap_ensure_allocated(&m->fds, NULL);
654 if (r < 0)
655 return NULL;
656
657 f = new0(MMapFileDescriptor, 1);
658 if (!f)
659 return NULL;
660
661 f->cache = m;
662 f->fd = fd;
663
664 r = hashmap_put(m->fds, FD_TO_PTR(fd), f);
665 if (r < 0)
666 return mfree(f);
667
668 return f;
669 }
670
671 void mmap_cache_free_fd(MMapCache *m, MMapFileDescriptor *f) {
672 assert(m);
673 assert(f);
674
675 /* Make sure that any queued SIGBUS are first dispatched, so
676 * that we don't end up with a SIGBUS entry we cannot relate
677 * to any existing memory map */
678
679 mmap_cache_process_sigbus(m);
680
681 while (f->windows)
682 window_free(f->windows);
683
684 if (f->cache)
685 assert_se(hashmap_remove(f->cache->fds, FD_TO_PTR(f->fd)));
686
687 free(f);
688 }