]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/mmap-cache.c
Add SPDX license identifiers to source files under the LGPL
[thirdparty/systemd.git] / src / journal / mmap-cache.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2012 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <stdlib.h>
23 #include <sys/mman.h>
24
25 #include "alloc-util.h"
26 #include "fd-util.h"
27 #include "hashmap.h"
28 #include "list.h"
29 #include "log.h"
30 #include "macro.h"
31 #include "mmap-cache.h"
32 #include "sigbus.h"
33 #include "util.h"
34
35 typedef struct Window Window;
36 typedef struct Context Context;
37
38 struct Window {
39 MMapCache *cache;
40
41 bool invalidated:1;
42 bool keep_always:1;
43 bool in_unused:1;
44
45 int prot;
46 void *ptr;
47 uint64_t offset;
48 size_t size;
49
50 MMapFileDescriptor *fd;
51
52 LIST_FIELDS(Window, by_fd);
53 LIST_FIELDS(Window, unused);
54
55 LIST_HEAD(Context, contexts);
56 };
57
58 struct Context {
59 MMapCache *cache;
60 unsigned id;
61 Window *window;
62
63 LIST_FIELDS(Context, by_window);
64 };
65
66 struct MMapFileDescriptor {
67 MMapCache *cache;
68 int fd;
69 bool sigbus;
70 LIST_HEAD(Window, windows);
71 };
72
73 struct MMapCache {
74 int n_ref;
75 unsigned n_windows;
76
77 unsigned n_hit, n_missed;
78
79 Hashmap *fds;
80 Context *contexts[MMAP_CACHE_MAX_CONTEXTS];
81
82 LIST_HEAD(Window, unused);
83 Window *last_unused;
84 };
85
86 #define WINDOWS_MIN 64
87
88 #if ENABLE_DEBUG_MMAP_CACHE
89 /* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
90 # define WINDOW_SIZE (page_size())
91 #else
92 # define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
93 #endif
94
95 MMapCache* mmap_cache_new(void) {
96 MMapCache *m;
97
98 m = new0(MMapCache, 1);
99 if (!m)
100 return NULL;
101
102 m->n_ref = 1;
103 return m;
104 }
105
106 MMapCache* mmap_cache_ref(MMapCache *m) {
107 assert(m);
108 assert(m->n_ref > 0);
109
110 m->n_ref++;
111 return m;
112 }
113
114 static void window_unlink(Window *w) {
115 Context *c;
116
117 assert(w);
118
119 if (w->ptr)
120 munmap(w->ptr, w->size);
121
122 if (w->fd)
123 LIST_REMOVE(by_fd, w->fd->windows, w);
124
125 if (w->in_unused) {
126 if (w->cache->last_unused == w)
127 w->cache->last_unused = w->unused_prev;
128
129 LIST_REMOVE(unused, w->cache->unused, w);
130 }
131
132 LIST_FOREACH(by_window, c, w->contexts) {
133 assert(c->window == w);
134 c->window = NULL;
135 }
136 }
137
138 static void window_invalidate(Window *w) {
139 assert(w);
140
141 if (w->invalidated)
142 return;
143
144 /* Replace the window with anonymous pages. This is useful
145 * when we hit a SIGBUS and want to make sure the file cannot
146 * trigger any further SIGBUS, possibly overrunning the sigbus
147 * queue. */
148
149 assert_se(mmap(w->ptr, w->size, w->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
150 w->invalidated = true;
151 }
152
153 static void window_free(Window *w) {
154 assert(w);
155
156 window_unlink(w);
157 w->cache->n_windows--;
158 free(w);
159 }
160
161 _pure_ static inline bool window_matches(Window *w, int prot, uint64_t offset, size_t size) {
162 assert(w);
163 assert(size > 0);
164
165 return
166 prot == w->prot &&
167 offset >= w->offset &&
168 offset + size <= w->offset + w->size;
169 }
170
171 _pure_ static bool window_matches_fd(Window *w, MMapFileDescriptor *f, int prot, uint64_t offset, size_t size) {
172 assert(w);
173 assert(f);
174
175 return
176 w->fd &&
177 f->fd == w->fd->fd &&
178 window_matches(w, prot, offset, size);
179 }
180
181 static Window *window_add(MMapCache *m, MMapFileDescriptor *f, int prot, bool keep_always, uint64_t offset, size_t size, void *ptr) {
182 Window *w;
183
184 assert(m);
185 assert(f);
186
187 if (!m->last_unused || m->n_windows <= WINDOWS_MIN) {
188
189 /* Allocate a new window */
190 w = new0(Window, 1);
191 if (!w)
192 return NULL;
193 m->n_windows++;
194 } else {
195
196 /* Reuse an existing one */
197 w = m->last_unused;
198 window_unlink(w);
199 zero(*w);
200 }
201
202 w->cache = m;
203 w->fd = f;
204 w->prot = prot;
205 w->keep_always = keep_always;
206 w->offset = offset;
207 w->size = size;
208 w->ptr = ptr;
209
210 LIST_PREPEND(by_fd, f->windows, w);
211
212 return w;
213 }
214
215 static void context_detach_window(Context *c) {
216 Window *w;
217
218 assert(c);
219
220 if (!c->window)
221 return;
222
223 w = c->window;
224 c->window = NULL;
225 LIST_REMOVE(by_window, w->contexts, c);
226
227 if (!w->contexts && !w->keep_always) {
228 /* Not used anymore? */
229 #if ENABLE_DEBUG_MMAP_CACHE
230 /* Unmap unused windows immediately to expose use-after-unmap
231 * by SIGSEGV. */
232 window_free(w);
233 #else
234 LIST_PREPEND(unused, c->cache->unused, w);
235 if (!c->cache->last_unused)
236 c->cache->last_unused = w;
237
238 w->in_unused = true;
239 #endif
240 }
241 }
242
243 static void context_attach_window(Context *c, Window *w) {
244 assert(c);
245 assert(w);
246
247 if (c->window == w)
248 return;
249
250 context_detach_window(c);
251
252 if (w->in_unused) {
253 /* Used again? */
254 LIST_REMOVE(unused, c->cache->unused, w);
255 if (c->cache->last_unused == w)
256 c->cache->last_unused = w->unused_prev;
257
258 w->in_unused = false;
259 }
260
261 c->window = w;
262 LIST_PREPEND(by_window, w->contexts, c);
263 }
264
265 static Context *context_add(MMapCache *m, unsigned id) {
266 Context *c;
267
268 assert(m);
269
270 c = m->contexts[id];
271 if (c)
272 return c;
273
274 c = new0(Context, 1);
275 if (!c)
276 return NULL;
277
278 c->cache = m;
279 c->id = id;
280
281 assert(!m->contexts[id]);
282 m->contexts[id] = c;
283
284 return c;
285 }
286
287 static void context_free(Context *c) {
288 assert(c);
289
290 context_detach_window(c);
291
292 if (c->cache) {
293 assert(c->cache->contexts[c->id] == c);
294 c->cache->contexts[c->id] = NULL;
295 }
296
297 free(c);
298 }
299
300 static void mmap_cache_free(MMapCache *m) {
301 int i;
302
303 assert(m);
304
305 for (i = 0; i < MMAP_CACHE_MAX_CONTEXTS; i++)
306 if (m->contexts[i])
307 context_free(m->contexts[i]);
308
309 hashmap_free(m->fds);
310
311 while (m->unused)
312 window_free(m->unused);
313
314 free(m);
315 }
316
317 MMapCache* mmap_cache_unref(MMapCache *m) {
318
319 if (!m)
320 return NULL;
321
322 assert(m->n_ref > 0);
323
324 m->n_ref--;
325 if (m->n_ref == 0)
326 mmap_cache_free(m);
327
328 return NULL;
329 }
330
331 static int make_room(MMapCache *m) {
332 assert(m);
333
334 if (!m->last_unused)
335 return 0;
336
337 window_free(m->last_unused);
338 return 1;
339 }
340
341 static int try_context(
342 MMapCache *m,
343 MMapFileDescriptor *f,
344 int prot,
345 unsigned context,
346 bool keep_always,
347 uint64_t offset,
348 size_t size,
349 void **ret,
350 size_t *ret_size) {
351
352 Context *c;
353
354 assert(m);
355 assert(m->n_ref > 0);
356 assert(f);
357 assert(size > 0);
358 assert(ret);
359
360 c = m->contexts[context];
361 if (!c)
362 return 0;
363
364 assert(c->id == context);
365
366 if (!c->window)
367 return 0;
368
369 if (!window_matches_fd(c->window, f, prot, offset, size)) {
370
371 /* Drop the reference to the window, since it's unnecessary now */
372 context_detach_window(c);
373 return 0;
374 }
375
376 if (c->window->fd->sigbus)
377 return -EIO;
378
379 c->window->keep_always = c->window->keep_always || keep_always;
380
381 *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset);
382 if (ret_size)
383 *ret_size = c->window->size - (offset - c->window->offset);
384
385 return 1;
386 }
387
388 static int find_mmap(
389 MMapCache *m,
390 MMapFileDescriptor *f,
391 int prot,
392 unsigned context,
393 bool keep_always,
394 uint64_t offset,
395 size_t size,
396 void **ret,
397 size_t *ret_size) {
398
399 Window *w;
400 Context *c;
401
402 assert(m);
403 assert(m->n_ref > 0);
404 assert(f);
405 assert(size > 0);
406
407 if (f->sigbus)
408 return -EIO;
409
410 LIST_FOREACH(by_fd, w, f->windows)
411 if (window_matches(w, prot, offset, size))
412 break;
413
414 if (!w)
415 return 0;
416
417 c = context_add(m, context);
418 if (!c)
419 return -ENOMEM;
420
421 context_attach_window(c, w);
422 w->keep_always = w->keep_always || keep_always;
423
424 *ret = (uint8_t*) w->ptr + (offset - w->offset);
425 if (ret_size)
426 *ret_size = w->size - (offset - w->offset);
427
428 return 1;
429 }
430
431 static int mmap_try_harder(MMapCache *m, void *addr, MMapFileDescriptor *f, int prot, int flags, uint64_t offset, size_t size, void **res) {
432 void *ptr;
433
434 assert(m);
435 assert(f);
436 assert(res);
437
438 for (;;) {
439 int r;
440
441 ptr = mmap(addr, size, prot, flags, f->fd, offset);
442 if (ptr != MAP_FAILED)
443 break;
444 if (errno != ENOMEM)
445 return negative_errno();
446
447 r = make_room(m);
448 if (r < 0)
449 return r;
450 if (r == 0)
451 return -ENOMEM;
452 }
453
454 *res = ptr;
455 return 0;
456 }
457
458 static int add_mmap(
459 MMapCache *m,
460 MMapFileDescriptor *f,
461 int prot,
462 unsigned context,
463 bool keep_always,
464 uint64_t offset,
465 size_t size,
466 struct stat *st,
467 void **ret,
468 size_t *ret_size) {
469
470 uint64_t woffset, wsize;
471 Context *c;
472 Window *w;
473 void *d;
474 int r;
475
476 assert(m);
477 assert(m->n_ref > 0);
478 assert(f);
479 assert(size > 0);
480 assert(ret);
481
482 woffset = offset & ~((uint64_t) page_size() - 1ULL);
483 wsize = size + (offset - woffset);
484 wsize = PAGE_ALIGN(wsize);
485
486 if (wsize < WINDOW_SIZE) {
487 uint64_t delta;
488
489 delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
490
491 if (delta > offset)
492 woffset = 0;
493 else
494 woffset -= delta;
495
496 wsize = WINDOW_SIZE;
497 }
498
499 if (st) {
500 /* Memory maps that are larger then the files
501 underneath have undefined behavior. Hence, clamp
502 things to the file size if we know it */
503
504 if (woffset >= (uint64_t) st->st_size)
505 return -EADDRNOTAVAIL;
506
507 if (woffset + wsize > (uint64_t) st->st_size)
508 wsize = PAGE_ALIGN(st->st_size - woffset);
509 }
510
511 r = mmap_try_harder(m, NULL, f, prot, MAP_SHARED, woffset, wsize, &d);
512 if (r < 0)
513 return r;
514
515 c = context_add(m, context);
516 if (!c)
517 goto outofmem;
518
519 w = window_add(m, f, prot, keep_always, woffset, wsize, d);
520 if (!w)
521 goto outofmem;
522
523 context_attach_window(c, w);
524
525 *ret = (uint8_t*) w->ptr + (offset - w->offset);
526 if (ret_size)
527 *ret_size = w->size - (offset - w->offset);
528
529 return 1;
530
531 outofmem:
532 (void) munmap(d, wsize);
533 return -ENOMEM;
534 }
535
536 int mmap_cache_get(
537 MMapCache *m,
538 MMapFileDescriptor *f,
539 int prot,
540 unsigned context,
541 bool keep_always,
542 uint64_t offset,
543 size_t size,
544 struct stat *st,
545 void **ret,
546 size_t *ret_size) {
547
548 int r;
549
550 assert(m);
551 assert(m->n_ref > 0);
552 assert(f);
553 assert(size > 0);
554 assert(ret);
555 assert(context < MMAP_CACHE_MAX_CONTEXTS);
556
557 /* Check whether the current context is the right one already */
558 r = try_context(m, f, prot, context, keep_always, offset, size, ret, ret_size);
559 if (r != 0) {
560 m->n_hit++;
561 return r;
562 }
563
564 /* Search for a matching mmap */
565 r = find_mmap(m, f, prot, context, keep_always, offset, size, ret, ret_size);
566 if (r != 0) {
567 m->n_hit++;
568 return r;
569 }
570
571 m->n_missed++;
572
573 /* Create a new mmap */
574 return add_mmap(m, f, prot, context, keep_always, offset, size, st, ret, ret_size);
575 }
576
577 unsigned mmap_cache_get_hit(MMapCache *m) {
578 assert(m);
579
580 return m->n_hit;
581 }
582
583 unsigned mmap_cache_get_missed(MMapCache *m) {
584 assert(m);
585
586 return m->n_missed;
587 }
588
589 static void mmap_cache_process_sigbus(MMapCache *m) {
590 bool found = false;
591 MMapFileDescriptor *f;
592 Iterator i;
593 int r;
594
595 assert(m);
596
597 /* Iterate through all triggered pages and mark their files as
598 * invalidated */
599 for (;;) {
600 bool ours;
601 void *addr;
602
603 r = sigbus_pop(&addr);
604 if (_likely_(r == 0))
605 break;
606 if (r < 0) {
607 log_error_errno(r, "SIGBUS handling failed: %m");
608 abort();
609 }
610
611 ours = false;
612 HASHMAP_FOREACH(f, m->fds, i) {
613 Window *w;
614
615 LIST_FOREACH(by_fd, w, f->windows) {
616 if ((uint8_t*) addr >= (uint8_t*) w->ptr &&
617 (uint8_t*) addr < (uint8_t*) w->ptr + w->size) {
618 found = ours = f->sigbus = true;
619 break;
620 }
621 }
622
623 if (ours)
624 break;
625 }
626
627 /* Didn't find a matching window, give up */
628 if (!ours) {
629 log_error("Unknown SIGBUS page, aborting.");
630 abort();
631 }
632 }
633
634 /* The list of triggered pages is now empty. Now, let's remap
635 * all windows of the triggered file to anonymous maps, so
636 * that no page of the file in question is triggered again, so
637 * that we can be sure not to hit the queue size limit. */
638 if (_likely_(!found))
639 return;
640
641 HASHMAP_FOREACH(f, m->fds, i) {
642 Window *w;
643
644 if (!f->sigbus)
645 continue;
646
647 LIST_FOREACH(by_fd, w, f->windows)
648 window_invalidate(w);
649 }
650 }
651
652 bool mmap_cache_got_sigbus(MMapCache *m, MMapFileDescriptor *f) {
653 assert(m);
654 assert(f);
655
656 mmap_cache_process_sigbus(m);
657
658 return f->sigbus;
659 }
660
661 MMapFileDescriptor* mmap_cache_add_fd(MMapCache *m, int fd) {
662 MMapFileDescriptor *f;
663 int r;
664
665 assert(m);
666 assert(fd >= 0);
667
668 f = hashmap_get(m->fds, FD_TO_PTR(fd));
669 if (f)
670 return f;
671
672 r = hashmap_ensure_allocated(&m->fds, NULL);
673 if (r < 0)
674 return NULL;
675
676 f = new0(MMapFileDescriptor, 1);
677 if (!f)
678 return NULL;
679
680 f->cache = m;
681 f->fd = fd;
682
683 r = hashmap_put(m->fds, FD_TO_PTR(fd), f);
684 if (r < 0)
685 return mfree(f);
686
687 return f;
688 }
689
690 void mmap_cache_free_fd(MMapCache *m, MMapFileDescriptor *f) {
691 assert(m);
692 assert(f);
693
694 /* Make sure that any queued SIGBUS are first dispatched, so
695 * that we don't end up with a SIGBUS entry we cannot relate
696 * to any existing memory map */
697
698 mmap_cache_process_sigbus(m);
699
700 while (f->windows)
701 window_free(f->windows);
702
703 if (f->cache)
704 assert_se(hashmap_remove(f->cache->fds, FD_TO_PTR(f->fd)));
705
706 free(f);
707 }