]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/mmap-cache.c
Merge pull request #11988 from keszybz/test-binaries-installation
[thirdparty/systemd.git] / src / journal / mmap-cache.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <errno.h>
4 #include <stdlib.h>
5 #include <sys/mman.h>
6
7 #include "alloc-util.h"
8 #include "errno-util.h"
9 #include "fd-util.h"
10 #include "hashmap.h"
11 #include "list.h"
12 #include "log.h"
13 #include "macro.h"
14 #include "memory-util.h"
15 #include "mmap-cache.h"
16 #include "sigbus.h"
17
18 typedef struct Window Window;
19 typedef struct Context Context;
20
21 struct Window {
22 MMapCache *cache;
23
24 bool invalidated:1;
25 bool keep_always:1;
26 bool in_unused:1;
27
28 int prot;
29 void *ptr;
30 uint64_t offset;
31 size_t size;
32
33 MMapFileDescriptor *fd;
34
35 LIST_FIELDS(Window, by_fd);
36 LIST_FIELDS(Window, unused);
37
38 LIST_HEAD(Context, contexts);
39 };
40
41 struct Context {
42 MMapCache *cache;
43 unsigned id;
44 Window *window;
45
46 LIST_FIELDS(Context, by_window);
47 };
48
49 struct MMapFileDescriptor {
50 MMapCache *cache;
51 int fd;
52 bool sigbus;
53 LIST_HEAD(Window, windows);
54 };
55
56 struct MMapCache {
57 unsigned n_ref;
58 unsigned n_windows;
59
60 unsigned n_hit, n_missed;
61
62 Hashmap *fds;
63 Context *contexts[MMAP_CACHE_MAX_CONTEXTS];
64
65 LIST_HEAD(Window, unused);
66 Window *last_unused;
67 };
68
69 #define WINDOWS_MIN 64
70
71 #if ENABLE_DEBUG_MMAP_CACHE
72 /* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
73 # define WINDOW_SIZE (page_size())
74 #else
75 # define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
76 #endif
77
78 MMapCache* mmap_cache_new(void) {
79 MMapCache *m;
80
81 m = new0(MMapCache, 1);
82 if (!m)
83 return NULL;
84
85 m->n_ref = 1;
86 return m;
87 }
88
89 static void window_unlink(Window *w) {
90 Context *c;
91
92 assert(w);
93
94 if (w->ptr)
95 munmap(w->ptr, w->size);
96
97 if (w->fd)
98 LIST_REMOVE(by_fd, w->fd->windows, w);
99
100 if (w->in_unused) {
101 if (w->cache->last_unused == w)
102 w->cache->last_unused = w->unused_prev;
103
104 LIST_REMOVE(unused, w->cache->unused, w);
105 }
106
107 LIST_FOREACH(by_window, c, w->contexts) {
108 assert(c->window == w);
109 c->window = NULL;
110 }
111 }
112
113 static void window_invalidate(Window *w) {
114 assert(w);
115
116 if (w->invalidated)
117 return;
118
119 /* Replace the window with anonymous pages. This is useful
120 * when we hit a SIGBUS and want to make sure the file cannot
121 * trigger any further SIGBUS, possibly overrunning the sigbus
122 * queue. */
123
124 assert_se(mmap(w->ptr, w->size, w->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
125 w->invalidated = true;
126 }
127
128 static void window_free(Window *w) {
129 assert(w);
130
131 window_unlink(w);
132 w->cache->n_windows--;
133 free(w);
134 }
135
136 _pure_ static bool window_matches(Window *w, int prot, uint64_t offset, size_t size) {
137 assert(w);
138 assert(size > 0);
139
140 return
141 prot == w->prot &&
142 offset >= w->offset &&
143 offset + size <= w->offset + w->size;
144 }
145
146 _pure_ static bool window_matches_fd(Window *w, MMapFileDescriptor *f, int prot, uint64_t offset, size_t size) {
147 assert(w);
148 assert(f);
149
150 return
151 w->fd &&
152 f->fd == w->fd->fd &&
153 window_matches(w, prot, offset, size);
154 }
155
156 static Window *window_add(MMapCache *m, MMapFileDescriptor *f, int prot, bool keep_always, uint64_t offset, size_t size, void *ptr) {
157 Window *w;
158
159 assert(m);
160 assert(f);
161
162 if (!m->last_unused || m->n_windows <= WINDOWS_MIN) {
163
164 /* Allocate a new window */
165 w = new0(Window, 1);
166 if (!w)
167 return NULL;
168 m->n_windows++;
169 } else {
170
171 /* Reuse an existing one */
172 w = m->last_unused;
173 window_unlink(w);
174 zero(*w);
175 }
176
177 w->cache = m;
178 w->fd = f;
179 w->prot = prot;
180 w->keep_always = keep_always;
181 w->offset = offset;
182 w->size = size;
183 w->ptr = ptr;
184
185 LIST_PREPEND(by_fd, f->windows, w);
186
187 return w;
188 }
189
190 static void context_detach_window(Context *c) {
191 Window *w;
192
193 assert(c);
194
195 if (!c->window)
196 return;
197
198 w = TAKE_PTR(c->window);
199 LIST_REMOVE(by_window, w->contexts, c);
200
201 if (!w->contexts && !w->keep_always) {
202 /* Not used anymore? */
203 #if ENABLE_DEBUG_MMAP_CACHE
204 /* Unmap unused windows immediately to expose use-after-unmap
205 * by SIGSEGV. */
206 window_free(w);
207 #else
208 LIST_PREPEND(unused, c->cache->unused, w);
209 if (!c->cache->last_unused)
210 c->cache->last_unused = w;
211
212 w->in_unused = true;
213 #endif
214 }
215 }
216
217 static void context_attach_window(Context *c, Window *w) {
218 assert(c);
219 assert(w);
220
221 if (c->window == w)
222 return;
223
224 context_detach_window(c);
225
226 if (w->in_unused) {
227 /* Used again? */
228 LIST_REMOVE(unused, c->cache->unused, w);
229 if (c->cache->last_unused == w)
230 c->cache->last_unused = w->unused_prev;
231
232 w->in_unused = false;
233 }
234
235 c->window = w;
236 LIST_PREPEND(by_window, w->contexts, c);
237 }
238
239 static Context *context_add(MMapCache *m, unsigned id) {
240 Context *c;
241
242 assert(m);
243
244 c = m->contexts[id];
245 if (c)
246 return c;
247
248 c = new0(Context, 1);
249 if (!c)
250 return NULL;
251
252 c->cache = m;
253 c->id = id;
254
255 assert(!m->contexts[id]);
256 m->contexts[id] = c;
257
258 return c;
259 }
260
261 static void context_free(Context *c) {
262 assert(c);
263
264 context_detach_window(c);
265
266 if (c->cache) {
267 assert(c->cache->contexts[c->id] == c);
268 c->cache->contexts[c->id] = NULL;
269 }
270
271 free(c);
272 }
273
274 static MMapCache *mmap_cache_free(MMapCache *m) {
275 int i;
276
277 assert(m);
278
279 for (i = 0; i < MMAP_CACHE_MAX_CONTEXTS; i++)
280 if (m->contexts[i])
281 context_free(m->contexts[i]);
282
283 hashmap_free(m->fds);
284
285 while (m->unused)
286 window_free(m->unused);
287
288 return mfree(m);
289 }
290
291 DEFINE_TRIVIAL_REF_UNREF_FUNC(MMapCache, mmap_cache, mmap_cache_free);
292
293 static int make_room(MMapCache *m) {
294 assert(m);
295
296 if (!m->last_unused)
297 return 0;
298
299 window_free(m->last_unused);
300 return 1;
301 }
302
303 static int try_context(
304 MMapCache *m,
305 MMapFileDescriptor *f,
306 int prot,
307 unsigned context,
308 bool keep_always,
309 uint64_t offset,
310 size_t size,
311 void **ret,
312 size_t *ret_size) {
313
314 Context *c;
315
316 assert(m);
317 assert(m->n_ref > 0);
318 assert(f);
319 assert(size > 0);
320 assert(ret);
321
322 c = m->contexts[context];
323 if (!c)
324 return 0;
325
326 assert(c->id == context);
327
328 if (!c->window)
329 return 0;
330
331 if (!window_matches_fd(c->window, f, prot, offset, size)) {
332
333 /* Drop the reference to the window, since it's unnecessary now */
334 context_detach_window(c);
335 return 0;
336 }
337
338 if (c->window->fd->sigbus)
339 return -EIO;
340
341 c->window->keep_always = c->window->keep_always || keep_always;
342
343 *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset);
344 if (ret_size)
345 *ret_size = c->window->size - (offset - c->window->offset);
346
347 return 1;
348 }
349
350 static int find_mmap(
351 MMapCache *m,
352 MMapFileDescriptor *f,
353 int prot,
354 unsigned context,
355 bool keep_always,
356 uint64_t offset,
357 size_t size,
358 void **ret,
359 size_t *ret_size) {
360
361 Window *w;
362 Context *c;
363
364 assert(m);
365 assert(m->n_ref > 0);
366 assert(f);
367 assert(size > 0);
368
369 if (f->sigbus)
370 return -EIO;
371
372 LIST_FOREACH(by_fd, w, f->windows)
373 if (window_matches(w, prot, offset, size))
374 break;
375
376 if (!w)
377 return 0;
378
379 c = context_add(m, context);
380 if (!c)
381 return -ENOMEM;
382
383 context_attach_window(c, w);
384 w->keep_always = w->keep_always || keep_always;
385
386 *ret = (uint8_t*) w->ptr + (offset - w->offset);
387 if (ret_size)
388 *ret_size = w->size - (offset - w->offset);
389
390 return 1;
391 }
392
393 static int mmap_try_harder(MMapCache *m, void *addr, MMapFileDescriptor *f, int prot, int flags, uint64_t offset, size_t size, void **res) {
394 void *ptr;
395
396 assert(m);
397 assert(f);
398 assert(res);
399
400 for (;;) {
401 int r;
402
403 ptr = mmap(addr, size, prot, flags, f->fd, offset);
404 if (ptr != MAP_FAILED)
405 break;
406 if (errno != ENOMEM)
407 return negative_errno();
408
409 r = make_room(m);
410 if (r < 0)
411 return r;
412 if (r == 0)
413 return -ENOMEM;
414 }
415
416 *res = ptr;
417 return 0;
418 }
419
420 static int add_mmap(
421 MMapCache *m,
422 MMapFileDescriptor *f,
423 int prot,
424 unsigned context,
425 bool keep_always,
426 uint64_t offset,
427 size_t size,
428 struct stat *st,
429 void **ret,
430 size_t *ret_size) {
431
432 uint64_t woffset, wsize;
433 Context *c;
434 Window *w;
435 void *d;
436 int r;
437
438 assert(m);
439 assert(m->n_ref > 0);
440 assert(f);
441 assert(size > 0);
442 assert(ret);
443
444 woffset = offset & ~((uint64_t) page_size() - 1ULL);
445 wsize = size + (offset - woffset);
446 wsize = PAGE_ALIGN(wsize);
447
448 if (wsize < WINDOW_SIZE) {
449 uint64_t delta;
450
451 delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
452
453 if (delta > offset)
454 woffset = 0;
455 else
456 woffset -= delta;
457
458 wsize = WINDOW_SIZE;
459 }
460
461 if (st) {
462 /* Memory maps that are larger then the files
463 underneath have undefined behavior. Hence, clamp
464 things to the file size if we know it */
465
466 if (woffset >= (uint64_t) st->st_size)
467 return -EADDRNOTAVAIL;
468
469 if (woffset + wsize > (uint64_t) st->st_size)
470 wsize = PAGE_ALIGN(st->st_size - woffset);
471 }
472
473 r = mmap_try_harder(m, NULL, f, prot, MAP_SHARED, woffset, wsize, &d);
474 if (r < 0)
475 return r;
476
477 c = context_add(m, context);
478 if (!c)
479 goto outofmem;
480
481 w = window_add(m, f, prot, keep_always, woffset, wsize, d);
482 if (!w)
483 goto outofmem;
484
485 context_attach_window(c, w);
486
487 *ret = (uint8_t*) w->ptr + (offset - w->offset);
488 if (ret_size)
489 *ret_size = w->size - (offset - w->offset);
490
491 return 1;
492
493 outofmem:
494 (void) munmap(d, wsize);
495 return -ENOMEM;
496 }
497
498 int mmap_cache_get(
499 MMapCache *m,
500 MMapFileDescriptor *f,
501 int prot,
502 unsigned context,
503 bool keep_always,
504 uint64_t offset,
505 size_t size,
506 struct stat *st,
507 void **ret,
508 size_t *ret_size) {
509
510 int r;
511
512 assert(m);
513 assert(m->n_ref > 0);
514 assert(f);
515 assert(size > 0);
516 assert(ret);
517 assert(context < MMAP_CACHE_MAX_CONTEXTS);
518
519 /* Check whether the current context is the right one already */
520 r = try_context(m, f, prot, context, keep_always, offset, size, ret, ret_size);
521 if (r != 0) {
522 m->n_hit++;
523 return r;
524 }
525
526 /* Search for a matching mmap */
527 r = find_mmap(m, f, prot, context, keep_always, offset, size, ret, ret_size);
528 if (r != 0) {
529 m->n_hit++;
530 return r;
531 }
532
533 m->n_missed++;
534
535 /* Create a new mmap */
536 return add_mmap(m, f, prot, context, keep_always, offset, size, st, ret, ret_size);
537 }
538
539 unsigned mmap_cache_get_hit(MMapCache *m) {
540 assert(m);
541
542 return m->n_hit;
543 }
544
545 unsigned mmap_cache_get_missed(MMapCache *m) {
546 assert(m);
547
548 return m->n_missed;
549 }
550
551 static void mmap_cache_process_sigbus(MMapCache *m) {
552 bool found = false;
553 MMapFileDescriptor *f;
554 Iterator i;
555 int r;
556
557 assert(m);
558
559 /* Iterate through all triggered pages and mark their files as
560 * invalidated */
561 for (;;) {
562 bool ours;
563 void *addr;
564
565 r = sigbus_pop(&addr);
566 if (_likely_(r == 0))
567 break;
568 if (r < 0) {
569 log_error_errno(r, "SIGBUS handling failed: %m");
570 abort();
571 }
572
573 ours = false;
574 HASHMAP_FOREACH(f, m->fds, i) {
575 Window *w;
576
577 LIST_FOREACH(by_fd, w, f->windows) {
578 if ((uint8_t*) addr >= (uint8_t*) w->ptr &&
579 (uint8_t*) addr < (uint8_t*) w->ptr + w->size) {
580 found = ours = f->sigbus = true;
581 break;
582 }
583 }
584
585 if (ours)
586 break;
587 }
588
589 /* Didn't find a matching window, give up */
590 if (!ours) {
591 log_error("Unknown SIGBUS page, aborting.");
592 abort();
593 }
594 }
595
596 /* The list of triggered pages is now empty. Now, let's remap
597 * all windows of the triggered file to anonymous maps, so
598 * that no page of the file in question is triggered again, so
599 * that we can be sure not to hit the queue size limit. */
600 if (_likely_(!found))
601 return;
602
603 HASHMAP_FOREACH(f, m->fds, i) {
604 Window *w;
605
606 if (!f->sigbus)
607 continue;
608
609 LIST_FOREACH(by_fd, w, f->windows)
610 window_invalidate(w);
611 }
612 }
613
614 bool mmap_cache_got_sigbus(MMapCache *m, MMapFileDescriptor *f) {
615 assert(m);
616 assert(f);
617
618 mmap_cache_process_sigbus(m);
619
620 return f->sigbus;
621 }
622
623 MMapFileDescriptor* mmap_cache_add_fd(MMapCache *m, int fd) {
624 MMapFileDescriptor *f;
625 int r;
626
627 assert(m);
628 assert(fd >= 0);
629
630 f = hashmap_get(m->fds, FD_TO_PTR(fd));
631 if (f)
632 return f;
633
634 r = hashmap_ensure_allocated(&m->fds, NULL);
635 if (r < 0)
636 return NULL;
637
638 f = new0(MMapFileDescriptor, 1);
639 if (!f)
640 return NULL;
641
642 f->cache = m;
643 f->fd = fd;
644
645 r = hashmap_put(m->fds, FD_TO_PTR(fd), f);
646 if (r < 0)
647 return mfree(f);
648
649 return f;
650 }
651
652 void mmap_cache_free_fd(MMapCache *m, MMapFileDescriptor *f) {
653 assert(m);
654 assert(f);
655
656 /* Make sure that any queued SIGBUS are first dispatched, so
657 * that we don't end up with a SIGBUS entry we cannot relate
658 * to any existing memory map */
659
660 mmap_cache_process_sigbus(m);
661
662 while (f->windows)
663 window_free(f->windows);
664
665 if (f->cache)
666 assert_se(hashmap_remove(f->cache->fds, FD_TO_PTR(f->fd)));
667
668 free(f);
669 }