]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/mmap-cache.c
91ed3cd519a6c773a65fe5e10a92cf6f414c96a4
[thirdparty/systemd.git] / src / journal / mmap-cache.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2012 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <stdlib.h>
23 #include <sys/mman.h>
24
25 #include "alloc-util.h"
26 #include "fd-util.h"
27 #include "hashmap.h"
28 #include "list.h"
29 #include "log.h"
30 #include "macro.h"
31 #include "mmap-cache.h"
32 #include "sigbus.h"
33 #include "util.h"
34
35 typedef struct Window Window;
36 typedef struct Context Context;
37
38 struct Window {
39 MMapCache *cache;
40
41 bool invalidated:1;
42 bool keep_always:1;
43 bool in_unused:1;
44
45 int prot;
46 void *ptr;
47 uint64_t offset;
48 size_t size;
49
50 MMapFileDescriptor *fd;
51
52 LIST_FIELDS(Window, by_fd);
53 LIST_FIELDS(Window, unused);
54
55 LIST_HEAD(Context, contexts);
56 };
57
58 struct Context {
59 MMapCache *cache;
60 unsigned id;
61 Window *window;
62
63 LIST_FIELDS(Context, by_window);
64 };
65
66 struct MMapFileDescriptor {
67 MMapCache *cache;
68 int fd;
69 bool sigbus;
70 LIST_HEAD(Window, windows);
71 };
72
73 struct MMapCache {
74 int n_ref;
75 unsigned n_windows;
76
77 unsigned n_hit, n_missed;
78
79 Hashmap *fds;
80 Context *contexts[MMAP_CACHE_MAX_CONTEXTS];
81
82 LIST_HEAD(Window, unused);
83 Window *last_unused;
84 };
85
86 #define WINDOWS_MIN 64
87
88 #if ENABLE_DEBUG_MMAP_CACHE
89 /* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
90 # define WINDOW_SIZE (page_size())
91 #else
92 # define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
93 #endif
94
95 MMapCache* mmap_cache_new(void) {
96 MMapCache *m;
97
98 m = new0(MMapCache, 1);
99 if (!m)
100 return NULL;
101
102 m->n_ref = 1;
103 return m;
104 }
105
106 MMapCache* mmap_cache_ref(MMapCache *m) {
107 assert(m);
108 assert(m->n_ref > 0);
109
110 m->n_ref++;
111 return m;
112 }
113
114 static void window_unlink(Window *w) {
115 Context *c;
116
117 assert(w);
118
119 if (w->ptr)
120 munmap(w->ptr, w->size);
121
122 if (w->fd)
123 LIST_REMOVE(by_fd, w->fd->windows, w);
124
125 if (w->in_unused) {
126 if (w->cache->last_unused == w)
127 w->cache->last_unused = w->unused_prev;
128
129 LIST_REMOVE(unused, w->cache->unused, w);
130 }
131
132 LIST_FOREACH(by_window, c, w->contexts) {
133 assert(c->window == w);
134 c->window = NULL;
135 }
136 }
137
138 static void window_invalidate(Window *w) {
139 assert(w);
140
141 if (w->invalidated)
142 return;
143
144 /* Replace the window with anonymous pages. This is useful
145 * when we hit a SIGBUS and want to make sure the file cannot
146 * trigger any further SIGBUS, possibly overrunning the sigbus
147 * queue. */
148
149 assert_se(mmap(w->ptr, w->size, w->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
150 w->invalidated = true;
151 }
152
153 static void window_free(Window *w) {
154 assert(w);
155
156 window_unlink(w);
157 w->cache->n_windows--;
158 free(w);
159 }
160
161 _pure_ static inline bool window_matches(Window *w, int prot, uint64_t offset, size_t size) {
162 assert(w);
163 assert(size > 0);
164
165 return
166 prot == w->prot &&
167 offset >= w->offset &&
168 offset + size <= w->offset + w->size;
169 }
170
171 _pure_ static bool window_matches_fd(Window *w, MMapFileDescriptor *f, int prot, uint64_t offset, size_t size) {
172 assert(w);
173 assert(f);
174
175 return
176 w->fd &&
177 f->fd == w->fd->fd &&
178 window_matches(w, prot, offset, size);
179 }
180
181 static Window *window_add(MMapCache *m, MMapFileDescriptor *f, int prot, bool keep_always, uint64_t offset, size_t size, void *ptr) {
182 Window *w;
183
184 assert(m);
185 assert(f);
186
187 if (!m->last_unused || m->n_windows <= WINDOWS_MIN) {
188
189 /* Allocate a new window */
190 w = new0(Window, 1);
191 if (!w)
192 return NULL;
193 m->n_windows++;
194 } else {
195
196 /* Reuse an existing one */
197 w = m->last_unused;
198 window_unlink(w);
199 zero(*w);
200 }
201
202 w->cache = m;
203 w->fd = f;
204 w->prot = prot;
205 w->keep_always = keep_always;
206 w->offset = offset;
207 w->size = size;
208 w->ptr = ptr;
209
210 LIST_PREPEND(by_fd, f->windows, w);
211
212 return w;
213 }
214
215 static void context_detach_window(Context *c) {
216 Window *w;
217
218 assert(c);
219
220 if (!c->window)
221 return;
222
223 w = TAKE_PTR(c->window);
224 LIST_REMOVE(by_window, w->contexts, c);
225
226 if (!w->contexts && !w->keep_always) {
227 /* Not used anymore? */
228 #if ENABLE_DEBUG_MMAP_CACHE
229 /* Unmap unused windows immediately to expose use-after-unmap
230 * by SIGSEGV. */
231 window_free(w);
232 #else
233 LIST_PREPEND(unused, c->cache->unused, w);
234 if (!c->cache->last_unused)
235 c->cache->last_unused = w;
236
237 w->in_unused = true;
238 #endif
239 }
240 }
241
242 static void context_attach_window(Context *c, Window *w) {
243 assert(c);
244 assert(w);
245
246 if (c->window == w)
247 return;
248
249 context_detach_window(c);
250
251 if (w->in_unused) {
252 /* Used again? */
253 LIST_REMOVE(unused, c->cache->unused, w);
254 if (c->cache->last_unused == w)
255 c->cache->last_unused = w->unused_prev;
256
257 w->in_unused = false;
258 }
259
260 c->window = w;
261 LIST_PREPEND(by_window, w->contexts, c);
262 }
263
264 static Context *context_add(MMapCache *m, unsigned id) {
265 Context *c;
266
267 assert(m);
268
269 c = m->contexts[id];
270 if (c)
271 return c;
272
273 c = new0(Context, 1);
274 if (!c)
275 return NULL;
276
277 c->cache = m;
278 c->id = id;
279
280 assert(!m->contexts[id]);
281 m->contexts[id] = c;
282
283 return c;
284 }
285
286 static void context_free(Context *c) {
287 assert(c);
288
289 context_detach_window(c);
290
291 if (c->cache) {
292 assert(c->cache->contexts[c->id] == c);
293 c->cache->contexts[c->id] = NULL;
294 }
295
296 free(c);
297 }
298
299 static void mmap_cache_free(MMapCache *m) {
300 int i;
301
302 assert(m);
303
304 for (i = 0; i < MMAP_CACHE_MAX_CONTEXTS; i++)
305 if (m->contexts[i])
306 context_free(m->contexts[i]);
307
308 hashmap_free(m->fds);
309
310 while (m->unused)
311 window_free(m->unused);
312
313 free(m);
314 }
315
316 MMapCache* mmap_cache_unref(MMapCache *m) {
317
318 if (!m)
319 return NULL;
320
321 assert(m->n_ref > 0);
322
323 m->n_ref--;
324 if (m->n_ref == 0)
325 mmap_cache_free(m);
326
327 return NULL;
328 }
329
330 static int make_room(MMapCache *m) {
331 assert(m);
332
333 if (!m->last_unused)
334 return 0;
335
336 window_free(m->last_unused);
337 return 1;
338 }
339
340 static int try_context(
341 MMapCache *m,
342 MMapFileDescriptor *f,
343 int prot,
344 unsigned context,
345 bool keep_always,
346 uint64_t offset,
347 size_t size,
348 void **ret,
349 size_t *ret_size) {
350
351 Context *c;
352
353 assert(m);
354 assert(m->n_ref > 0);
355 assert(f);
356 assert(size > 0);
357 assert(ret);
358
359 c = m->contexts[context];
360 if (!c)
361 return 0;
362
363 assert(c->id == context);
364
365 if (!c->window)
366 return 0;
367
368 if (!window_matches_fd(c->window, f, prot, offset, size)) {
369
370 /* Drop the reference to the window, since it's unnecessary now */
371 context_detach_window(c);
372 return 0;
373 }
374
375 if (c->window->fd->sigbus)
376 return -EIO;
377
378 c->window->keep_always = c->window->keep_always || keep_always;
379
380 *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset);
381 if (ret_size)
382 *ret_size = c->window->size - (offset - c->window->offset);
383
384 return 1;
385 }
386
387 static int find_mmap(
388 MMapCache *m,
389 MMapFileDescriptor *f,
390 int prot,
391 unsigned context,
392 bool keep_always,
393 uint64_t offset,
394 size_t size,
395 void **ret,
396 size_t *ret_size) {
397
398 Window *w;
399 Context *c;
400
401 assert(m);
402 assert(m->n_ref > 0);
403 assert(f);
404 assert(size > 0);
405
406 if (f->sigbus)
407 return -EIO;
408
409 LIST_FOREACH(by_fd, w, f->windows)
410 if (window_matches(w, prot, offset, size))
411 break;
412
413 if (!w)
414 return 0;
415
416 c = context_add(m, context);
417 if (!c)
418 return -ENOMEM;
419
420 context_attach_window(c, w);
421 w->keep_always = w->keep_always || keep_always;
422
423 *ret = (uint8_t*) w->ptr + (offset - w->offset);
424 if (ret_size)
425 *ret_size = w->size - (offset - w->offset);
426
427 return 1;
428 }
429
430 static int mmap_try_harder(MMapCache *m, void *addr, MMapFileDescriptor *f, int prot, int flags, uint64_t offset, size_t size, void **res) {
431 void *ptr;
432
433 assert(m);
434 assert(f);
435 assert(res);
436
437 for (;;) {
438 int r;
439
440 ptr = mmap(addr, size, prot, flags, f->fd, offset);
441 if (ptr != MAP_FAILED)
442 break;
443 if (errno != ENOMEM)
444 return negative_errno();
445
446 r = make_room(m);
447 if (r < 0)
448 return r;
449 if (r == 0)
450 return -ENOMEM;
451 }
452
453 *res = ptr;
454 return 0;
455 }
456
457 static int add_mmap(
458 MMapCache *m,
459 MMapFileDescriptor *f,
460 int prot,
461 unsigned context,
462 bool keep_always,
463 uint64_t offset,
464 size_t size,
465 struct stat *st,
466 void **ret,
467 size_t *ret_size) {
468
469 uint64_t woffset, wsize;
470 Context *c;
471 Window *w;
472 void *d;
473 int r;
474
475 assert(m);
476 assert(m->n_ref > 0);
477 assert(f);
478 assert(size > 0);
479 assert(ret);
480
481 woffset = offset & ~((uint64_t) page_size() - 1ULL);
482 wsize = size + (offset - woffset);
483 wsize = PAGE_ALIGN(wsize);
484
485 if (wsize < WINDOW_SIZE) {
486 uint64_t delta;
487
488 delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
489
490 if (delta > offset)
491 woffset = 0;
492 else
493 woffset -= delta;
494
495 wsize = WINDOW_SIZE;
496 }
497
498 if (st) {
499 /* Memory maps that are larger then the files
500 underneath have undefined behavior. Hence, clamp
501 things to the file size if we know it */
502
503 if (woffset >= (uint64_t) st->st_size)
504 return -EADDRNOTAVAIL;
505
506 if (woffset + wsize > (uint64_t) st->st_size)
507 wsize = PAGE_ALIGN(st->st_size - woffset);
508 }
509
510 r = mmap_try_harder(m, NULL, f, prot, MAP_SHARED, woffset, wsize, &d);
511 if (r < 0)
512 return r;
513
514 c = context_add(m, context);
515 if (!c)
516 goto outofmem;
517
518 w = window_add(m, f, prot, keep_always, woffset, wsize, d);
519 if (!w)
520 goto outofmem;
521
522 context_attach_window(c, w);
523
524 *ret = (uint8_t*) w->ptr + (offset - w->offset);
525 if (ret_size)
526 *ret_size = w->size - (offset - w->offset);
527
528 return 1;
529
530 outofmem:
531 (void) munmap(d, wsize);
532 return -ENOMEM;
533 }
534
535 int mmap_cache_get(
536 MMapCache *m,
537 MMapFileDescriptor *f,
538 int prot,
539 unsigned context,
540 bool keep_always,
541 uint64_t offset,
542 size_t size,
543 struct stat *st,
544 void **ret,
545 size_t *ret_size) {
546
547 int r;
548
549 assert(m);
550 assert(m->n_ref > 0);
551 assert(f);
552 assert(size > 0);
553 assert(ret);
554 assert(context < MMAP_CACHE_MAX_CONTEXTS);
555
556 /* Check whether the current context is the right one already */
557 r = try_context(m, f, prot, context, keep_always, offset, size, ret, ret_size);
558 if (r != 0) {
559 m->n_hit++;
560 return r;
561 }
562
563 /* Search for a matching mmap */
564 r = find_mmap(m, f, prot, context, keep_always, offset, size, ret, ret_size);
565 if (r != 0) {
566 m->n_hit++;
567 return r;
568 }
569
570 m->n_missed++;
571
572 /* Create a new mmap */
573 return add_mmap(m, f, prot, context, keep_always, offset, size, st, ret, ret_size);
574 }
575
576 unsigned mmap_cache_get_hit(MMapCache *m) {
577 assert(m);
578
579 return m->n_hit;
580 }
581
582 unsigned mmap_cache_get_missed(MMapCache *m) {
583 assert(m);
584
585 return m->n_missed;
586 }
587
588 static void mmap_cache_process_sigbus(MMapCache *m) {
589 bool found = false;
590 MMapFileDescriptor *f;
591 Iterator i;
592 int r;
593
594 assert(m);
595
596 /* Iterate through all triggered pages and mark their files as
597 * invalidated */
598 for (;;) {
599 bool ours;
600 void *addr;
601
602 r = sigbus_pop(&addr);
603 if (_likely_(r == 0))
604 break;
605 if (r < 0) {
606 log_error_errno(r, "SIGBUS handling failed: %m");
607 abort();
608 }
609
610 ours = false;
611 HASHMAP_FOREACH(f, m->fds, i) {
612 Window *w;
613
614 LIST_FOREACH(by_fd, w, f->windows) {
615 if ((uint8_t*) addr >= (uint8_t*) w->ptr &&
616 (uint8_t*) addr < (uint8_t*) w->ptr + w->size) {
617 found = ours = f->sigbus = true;
618 break;
619 }
620 }
621
622 if (ours)
623 break;
624 }
625
626 /* Didn't find a matching window, give up */
627 if (!ours) {
628 log_error("Unknown SIGBUS page, aborting.");
629 abort();
630 }
631 }
632
633 /* The list of triggered pages is now empty. Now, let's remap
634 * all windows of the triggered file to anonymous maps, so
635 * that no page of the file in question is triggered again, so
636 * that we can be sure not to hit the queue size limit. */
637 if (_likely_(!found))
638 return;
639
640 HASHMAP_FOREACH(f, m->fds, i) {
641 Window *w;
642
643 if (!f->sigbus)
644 continue;
645
646 LIST_FOREACH(by_fd, w, f->windows)
647 window_invalidate(w);
648 }
649 }
650
651 bool mmap_cache_got_sigbus(MMapCache *m, MMapFileDescriptor *f) {
652 assert(m);
653 assert(f);
654
655 mmap_cache_process_sigbus(m);
656
657 return f->sigbus;
658 }
659
660 MMapFileDescriptor* mmap_cache_add_fd(MMapCache *m, int fd) {
661 MMapFileDescriptor *f;
662 int r;
663
664 assert(m);
665 assert(fd >= 0);
666
667 f = hashmap_get(m->fds, FD_TO_PTR(fd));
668 if (f)
669 return f;
670
671 r = hashmap_ensure_allocated(&m->fds, NULL);
672 if (r < 0)
673 return NULL;
674
675 f = new0(MMapFileDescriptor, 1);
676 if (!f)
677 return NULL;
678
679 f->cache = m;
680 f->fd = fd;
681
682 r = hashmap_put(m->fds, FD_TO_PTR(fd), f);
683 if (r < 0)
684 return mfree(f);
685
686 return f;
687 }
688
689 void mmap_cache_free_fd(MMapCache *m, MMapFileDescriptor *f) {
690 assert(m);
691 assert(f);
692
693 /* Make sure that any queued SIGBUS are first dispatched, so
694 * that we don't end up with a SIGBUS entry we cannot relate
695 * to any existing memory map */
696
697 mmap_cache_process_sigbus(m);
698
699 while (f->windows)
700 window_free(f->windows);
701
702 if (f->cache)
703 assert_se(hashmap_remove(f->cache->fds, FD_TO_PTR(f->fd)));
704
705 free(f);
706 }