]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/mmap-cache.c
Merge pull request #11986 from poettering/util-split
[thirdparty/systemd.git] / src / journal / mmap-cache.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <errno.h>
4 #include <stdlib.h>
5 #include <sys/mman.h>
6
7 #include "alloc-util.h"
8 #include "fd-util.h"
9 #include "hashmap.h"
10 #include "list.h"
11 #include "log.h"
12 #include "macro.h"
13 #include "memory-util.h"
14 #include "mmap-cache.h"
15 #include "sigbus.h"
16
17 typedef struct Window Window;
18 typedef struct Context Context;
19
20 struct Window {
21 MMapCache *cache;
22
23 bool invalidated:1;
24 bool keep_always:1;
25 bool in_unused:1;
26
27 int prot;
28 void *ptr;
29 uint64_t offset;
30 size_t size;
31
32 MMapFileDescriptor *fd;
33
34 LIST_FIELDS(Window, by_fd);
35 LIST_FIELDS(Window, unused);
36
37 LIST_HEAD(Context, contexts);
38 };
39
40 struct Context {
41 MMapCache *cache;
42 unsigned id;
43 Window *window;
44
45 LIST_FIELDS(Context, by_window);
46 };
47
48 struct MMapFileDescriptor {
49 MMapCache *cache;
50 int fd;
51 bool sigbus;
52 LIST_HEAD(Window, windows);
53 };
54
55 struct MMapCache {
56 unsigned n_ref;
57 unsigned n_windows;
58
59 unsigned n_hit, n_missed;
60
61 Hashmap *fds;
62 Context *contexts[MMAP_CACHE_MAX_CONTEXTS];
63
64 LIST_HEAD(Window, unused);
65 Window *last_unused;
66 };
67
68 #define WINDOWS_MIN 64
69
70 #if ENABLE_DEBUG_MMAP_CACHE
71 /* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
72 # define WINDOW_SIZE (page_size())
73 #else
74 # define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
75 #endif
76
77 MMapCache* mmap_cache_new(void) {
78 MMapCache *m;
79
80 m = new0(MMapCache, 1);
81 if (!m)
82 return NULL;
83
84 m->n_ref = 1;
85 return m;
86 }
87
88 static void window_unlink(Window *w) {
89 Context *c;
90
91 assert(w);
92
93 if (w->ptr)
94 munmap(w->ptr, w->size);
95
96 if (w->fd)
97 LIST_REMOVE(by_fd, w->fd->windows, w);
98
99 if (w->in_unused) {
100 if (w->cache->last_unused == w)
101 w->cache->last_unused = w->unused_prev;
102
103 LIST_REMOVE(unused, w->cache->unused, w);
104 }
105
106 LIST_FOREACH(by_window, c, w->contexts) {
107 assert(c->window == w);
108 c->window = NULL;
109 }
110 }
111
112 static void window_invalidate(Window *w) {
113 assert(w);
114
115 if (w->invalidated)
116 return;
117
118 /* Replace the window with anonymous pages. This is useful
119 * when we hit a SIGBUS and want to make sure the file cannot
120 * trigger any further SIGBUS, possibly overrunning the sigbus
121 * queue. */
122
123 assert_se(mmap(w->ptr, w->size, w->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
124 w->invalidated = true;
125 }
126
127 static void window_free(Window *w) {
128 assert(w);
129
130 window_unlink(w);
131 w->cache->n_windows--;
132 free(w);
133 }
134
135 _pure_ static bool window_matches(Window *w, int prot, uint64_t offset, size_t size) {
136 assert(w);
137 assert(size > 0);
138
139 return
140 prot == w->prot &&
141 offset >= w->offset &&
142 offset + size <= w->offset + w->size;
143 }
144
145 _pure_ static bool window_matches_fd(Window *w, MMapFileDescriptor *f, int prot, uint64_t offset, size_t size) {
146 assert(w);
147 assert(f);
148
149 return
150 w->fd &&
151 f->fd == w->fd->fd &&
152 window_matches(w, prot, offset, size);
153 }
154
155 static Window *window_add(MMapCache *m, MMapFileDescriptor *f, int prot, bool keep_always, uint64_t offset, size_t size, void *ptr) {
156 Window *w;
157
158 assert(m);
159 assert(f);
160
161 if (!m->last_unused || m->n_windows <= WINDOWS_MIN) {
162
163 /* Allocate a new window */
164 w = new0(Window, 1);
165 if (!w)
166 return NULL;
167 m->n_windows++;
168 } else {
169
170 /* Reuse an existing one */
171 w = m->last_unused;
172 window_unlink(w);
173 zero(*w);
174 }
175
176 w->cache = m;
177 w->fd = f;
178 w->prot = prot;
179 w->keep_always = keep_always;
180 w->offset = offset;
181 w->size = size;
182 w->ptr = ptr;
183
184 LIST_PREPEND(by_fd, f->windows, w);
185
186 return w;
187 }
188
189 static void context_detach_window(Context *c) {
190 Window *w;
191
192 assert(c);
193
194 if (!c->window)
195 return;
196
197 w = TAKE_PTR(c->window);
198 LIST_REMOVE(by_window, w->contexts, c);
199
200 if (!w->contexts && !w->keep_always) {
201 /* Not used anymore? */
202 #if ENABLE_DEBUG_MMAP_CACHE
203 /* Unmap unused windows immediately to expose use-after-unmap
204 * by SIGSEGV. */
205 window_free(w);
206 #else
207 LIST_PREPEND(unused, c->cache->unused, w);
208 if (!c->cache->last_unused)
209 c->cache->last_unused = w;
210
211 w->in_unused = true;
212 #endif
213 }
214 }
215
216 static void context_attach_window(Context *c, Window *w) {
217 assert(c);
218 assert(w);
219
220 if (c->window == w)
221 return;
222
223 context_detach_window(c);
224
225 if (w->in_unused) {
226 /* Used again? */
227 LIST_REMOVE(unused, c->cache->unused, w);
228 if (c->cache->last_unused == w)
229 c->cache->last_unused = w->unused_prev;
230
231 w->in_unused = false;
232 }
233
234 c->window = w;
235 LIST_PREPEND(by_window, w->contexts, c);
236 }
237
238 static Context *context_add(MMapCache *m, unsigned id) {
239 Context *c;
240
241 assert(m);
242
243 c = m->contexts[id];
244 if (c)
245 return c;
246
247 c = new0(Context, 1);
248 if (!c)
249 return NULL;
250
251 c->cache = m;
252 c->id = id;
253
254 assert(!m->contexts[id]);
255 m->contexts[id] = c;
256
257 return c;
258 }
259
260 static void context_free(Context *c) {
261 assert(c);
262
263 context_detach_window(c);
264
265 if (c->cache) {
266 assert(c->cache->contexts[c->id] == c);
267 c->cache->contexts[c->id] = NULL;
268 }
269
270 free(c);
271 }
272
273 static MMapCache *mmap_cache_free(MMapCache *m) {
274 int i;
275
276 assert(m);
277
278 for (i = 0; i < MMAP_CACHE_MAX_CONTEXTS; i++)
279 if (m->contexts[i])
280 context_free(m->contexts[i]);
281
282 hashmap_free(m->fds);
283
284 while (m->unused)
285 window_free(m->unused);
286
287 return mfree(m);
288 }
289
290 DEFINE_TRIVIAL_REF_UNREF_FUNC(MMapCache, mmap_cache, mmap_cache_free);
291
292 static int make_room(MMapCache *m) {
293 assert(m);
294
295 if (!m->last_unused)
296 return 0;
297
298 window_free(m->last_unused);
299 return 1;
300 }
301
302 static int try_context(
303 MMapCache *m,
304 MMapFileDescriptor *f,
305 int prot,
306 unsigned context,
307 bool keep_always,
308 uint64_t offset,
309 size_t size,
310 void **ret,
311 size_t *ret_size) {
312
313 Context *c;
314
315 assert(m);
316 assert(m->n_ref > 0);
317 assert(f);
318 assert(size > 0);
319 assert(ret);
320
321 c = m->contexts[context];
322 if (!c)
323 return 0;
324
325 assert(c->id == context);
326
327 if (!c->window)
328 return 0;
329
330 if (!window_matches_fd(c->window, f, prot, offset, size)) {
331
332 /* Drop the reference to the window, since it's unnecessary now */
333 context_detach_window(c);
334 return 0;
335 }
336
337 if (c->window->fd->sigbus)
338 return -EIO;
339
340 c->window->keep_always = c->window->keep_always || keep_always;
341
342 *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset);
343 if (ret_size)
344 *ret_size = c->window->size - (offset - c->window->offset);
345
346 return 1;
347 }
348
349 static int find_mmap(
350 MMapCache *m,
351 MMapFileDescriptor *f,
352 int prot,
353 unsigned context,
354 bool keep_always,
355 uint64_t offset,
356 size_t size,
357 void **ret,
358 size_t *ret_size) {
359
360 Window *w;
361 Context *c;
362
363 assert(m);
364 assert(m->n_ref > 0);
365 assert(f);
366 assert(size > 0);
367
368 if (f->sigbus)
369 return -EIO;
370
371 LIST_FOREACH(by_fd, w, f->windows)
372 if (window_matches(w, prot, offset, size))
373 break;
374
375 if (!w)
376 return 0;
377
378 c = context_add(m, context);
379 if (!c)
380 return -ENOMEM;
381
382 context_attach_window(c, w);
383 w->keep_always = w->keep_always || keep_always;
384
385 *ret = (uint8_t*) w->ptr + (offset - w->offset);
386 if (ret_size)
387 *ret_size = w->size - (offset - w->offset);
388
389 return 1;
390 }
391
392 static int mmap_try_harder(MMapCache *m, void *addr, MMapFileDescriptor *f, int prot, int flags, uint64_t offset, size_t size, void **res) {
393 void *ptr;
394
395 assert(m);
396 assert(f);
397 assert(res);
398
399 for (;;) {
400 int r;
401
402 ptr = mmap(addr, size, prot, flags, f->fd, offset);
403 if (ptr != MAP_FAILED)
404 break;
405 if (errno != ENOMEM)
406 return negative_errno();
407
408 r = make_room(m);
409 if (r < 0)
410 return r;
411 if (r == 0)
412 return -ENOMEM;
413 }
414
415 *res = ptr;
416 return 0;
417 }
418
419 static int add_mmap(
420 MMapCache *m,
421 MMapFileDescriptor *f,
422 int prot,
423 unsigned context,
424 bool keep_always,
425 uint64_t offset,
426 size_t size,
427 struct stat *st,
428 void **ret,
429 size_t *ret_size) {
430
431 uint64_t woffset, wsize;
432 Context *c;
433 Window *w;
434 void *d;
435 int r;
436
437 assert(m);
438 assert(m->n_ref > 0);
439 assert(f);
440 assert(size > 0);
441 assert(ret);
442
443 woffset = offset & ~((uint64_t) page_size() - 1ULL);
444 wsize = size + (offset - woffset);
445 wsize = PAGE_ALIGN(wsize);
446
447 if (wsize < WINDOW_SIZE) {
448 uint64_t delta;
449
450 delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
451
452 if (delta > offset)
453 woffset = 0;
454 else
455 woffset -= delta;
456
457 wsize = WINDOW_SIZE;
458 }
459
460 if (st) {
461 /* Memory maps that are larger then the files
462 underneath have undefined behavior. Hence, clamp
463 things to the file size if we know it */
464
465 if (woffset >= (uint64_t) st->st_size)
466 return -EADDRNOTAVAIL;
467
468 if (woffset + wsize > (uint64_t) st->st_size)
469 wsize = PAGE_ALIGN(st->st_size - woffset);
470 }
471
472 r = mmap_try_harder(m, NULL, f, prot, MAP_SHARED, woffset, wsize, &d);
473 if (r < 0)
474 return r;
475
476 c = context_add(m, context);
477 if (!c)
478 goto outofmem;
479
480 w = window_add(m, f, prot, keep_always, woffset, wsize, d);
481 if (!w)
482 goto outofmem;
483
484 context_attach_window(c, w);
485
486 *ret = (uint8_t*) w->ptr + (offset - w->offset);
487 if (ret_size)
488 *ret_size = w->size - (offset - w->offset);
489
490 return 1;
491
492 outofmem:
493 (void) munmap(d, wsize);
494 return -ENOMEM;
495 }
496
497 int mmap_cache_get(
498 MMapCache *m,
499 MMapFileDescriptor *f,
500 int prot,
501 unsigned context,
502 bool keep_always,
503 uint64_t offset,
504 size_t size,
505 struct stat *st,
506 void **ret,
507 size_t *ret_size) {
508
509 int r;
510
511 assert(m);
512 assert(m->n_ref > 0);
513 assert(f);
514 assert(size > 0);
515 assert(ret);
516 assert(context < MMAP_CACHE_MAX_CONTEXTS);
517
518 /* Check whether the current context is the right one already */
519 r = try_context(m, f, prot, context, keep_always, offset, size, ret, ret_size);
520 if (r != 0) {
521 m->n_hit++;
522 return r;
523 }
524
525 /* Search for a matching mmap */
526 r = find_mmap(m, f, prot, context, keep_always, offset, size, ret, ret_size);
527 if (r != 0) {
528 m->n_hit++;
529 return r;
530 }
531
532 m->n_missed++;
533
534 /* Create a new mmap */
535 return add_mmap(m, f, prot, context, keep_always, offset, size, st, ret, ret_size);
536 }
537
538 unsigned mmap_cache_get_hit(MMapCache *m) {
539 assert(m);
540
541 return m->n_hit;
542 }
543
544 unsigned mmap_cache_get_missed(MMapCache *m) {
545 assert(m);
546
547 return m->n_missed;
548 }
549
550 static void mmap_cache_process_sigbus(MMapCache *m) {
551 bool found = false;
552 MMapFileDescriptor *f;
553 Iterator i;
554 int r;
555
556 assert(m);
557
558 /* Iterate through all triggered pages and mark their files as
559 * invalidated */
560 for (;;) {
561 bool ours;
562 void *addr;
563
564 r = sigbus_pop(&addr);
565 if (_likely_(r == 0))
566 break;
567 if (r < 0) {
568 log_error_errno(r, "SIGBUS handling failed: %m");
569 abort();
570 }
571
572 ours = false;
573 HASHMAP_FOREACH(f, m->fds, i) {
574 Window *w;
575
576 LIST_FOREACH(by_fd, w, f->windows) {
577 if ((uint8_t*) addr >= (uint8_t*) w->ptr &&
578 (uint8_t*) addr < (uint8_t*) w->ptr + w->size) {
579 found = ours = f->sigbus = true;
580 break;
581 }
582 }
583
584 if (ours)
585 break;
586 }
587
588 /* Didn't find a matching window, give up */
589 if (!ours) {
590 log_error("Unknown SIGBUS page, aborting.");
591 abort();
592 }
593 }
594
595 /* The list of triggered pages is now empty. Now, let's remap
596 * all windows of the triggered file to anonymous maps, so
597 * that no page of the file in question is triggered again, so
598 * that we can be sure not to hit the queue size limit. */
599 if (_likely_(!found))
600 return;
601
602 HASHMAP_FOREACH(f, m->fds, i) {
603 Window *w;
604
605 if (!f->sigbus)
606 continue;
607
608 LIST_FOREACH(by_fd, w, f->windows)
609 window_invalidate(w);
610 }
611 }
612
613 bool mmap_cache_got_sigbus(MMapCache *m, MMapFileDescriptor *f) {
614 assert(m);
615 assert(f);
616
617 mmap_cache_process_sigbus(m);
618
619 return f->sigbus;
620 }
621
622 MMapFileDescriptor* mmap_cache_add_fd(MMapCache *m, int fd) {
623 MMapFileDescriptor *f;
624 int r;
625
626 assert(m);
627 assert(fd >= 0);
628
629 f = hashmap_get(m->fds, FD_TO_PTR(fd));
630 if (f)
631 return f;
632
633 r = hashmap_ensure_allocated(&m->fds, NULL);
634 if (r < 0)
635 return NULL;
636
637 f = new0(MMapFileDescriptor, 1);
638 if (!f)
639 return NULL;
640
641 f->cache = m;
642 f->fd = fd;
643
644 r = hashmap_put(m->fds, FD_TO_PTR(fd), f);
645 if (r < 0)
646 return mfree(f);
647
648 return f;
649 }
650
651 void mmap_cache_free_fd(MMapCache *m, MMapFileDescriptor *f) {
652 assert(m);
653 assert(f);
654
655 /* Make sure that any queued SIGBUS are first dispatched, so
656 * that we don't end up with a SIGBUS entry we cannot relate
657 * to any existing memory map */
658
659 mmap_cache_process_sigbus(m);
660
661 while (f->windows)
662 window_free(f->windows);
663
664 if (f->cache)
665 assert_se(hashmap_remove(f->cache->fds, FD_TO_PTR(f->fd)));
666
667 free(f);
668 }