]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/mmap-cache.c
man: mention BoundsBy=, ConsistsOf=, RequisiteOf=
[thirdparty/systemd.git] / src / journal / mmap-cache.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
16e9f408
LP
2/***
3 This file is part of systemd.
4
5 Copyright 2012 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19***/
20
16e9f408
LP
21#include <errno.h>
22#include <stdlib.h>
f8019684 23#include <sys/mman.h>
16e9f408 24
b5efdb8a 25#include "alloc-util.h"
23e096cc 26#include "fd-util.h"
f8019684
LP
27#include "hashmap.h"
28#include "list.h"
29#include "log.h"
f8019684 30#include "macro.h"
16e9f408 31#include "mmap-cache.h"
cf0fbc49
TA
32#include "sigbus.h"
33#include "util.h"
16e9f408 34
f8019684
LP
35typedef struct Window Window;
36typedef struct Context Context;
84168d80 37
f8019684
LP
38struct Window {
39 MMapCache *cache;
40
739731cd
LP
41 bool invalidated:1;
42 bool keep_always:1;
43 bool in_unused:1;
16e9f408 44
68667801 45 int prot;
16e9f408
LP
46 void *ptr;
47 uint64_t offset;
f8019684
LP
48 size_t size;
49
be7cdd8e 50 MMapFileDescriptor *fd;
16e9f408 51
f8019684
LP
52 LIST_FIELDS(Window, by_fd);
53 LIST_FIELDS(Window, unused);
54
55 LIST_HEAD(Context, contexts);
56};
16e9f408 57
f8019684
LP
58struct Context {
59 MMapCache *cache;
60 unsigned id;
61 Window *window;
16e9f408 62
f8019684
LP
63 LIST_FIELDS(Context, by_window);
64};
65
be7cdd8e 66struct MMapFileDescriptor {
f8019684 67 MMapCache *cache;
16e9f408 68 int fd;
fa6ac760 69 bool sigbus;
f8019684
LP
70 LIST_HEAD(Window, windows);
71};
16e9f408
LP
72
73struct MMapCache {
f8019684 74 int n_ref;
68667801 75 unsigned n_windows;
16e9f408 76
bf807d4d
LP
77 unsigned n_hit, n_missed;
78
f8019684 79 Hashmap *fds;
69adae51 80 Context *contexts[MMAP_CACHE_MAX_CONTEXTS];
16e9f408 81
f8019684
LP
82 LIST_HEAD(Window, unused);
83 Window *last_unused;
16e9f408
LP
84};
85
f8019684 86#define WINDOWS_MIN 64
fad5a6c6 87
349cc4a5 88#if ENABLE_DEBUG_MMAP_CACHE
fad5a6c6
MS
89/* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
90# define WINDOW_SIZE (page_size())
91#else
92# define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
93#endif
16e9f408 94
f8019684
LP
95MMapCache* mmap_cache_new(void) {
96 MMapCache *m;
16e9f408 97
f8019684
LP
98 m = new0(MMapCache, 1);
99 if (!m)
100 return NULL;
16e9f408 101
f8019684
LP
102 m->n_ref = 1;
103 return m;
16e9f408
LP
104}
105
f8019684 106MMapCache* mmap_cache_ref(MMapCache *m) {
16e9f408 107 assert(m);
f8019684 108 assert(m->n_ref > 0);
16e9f408 109
313cefa1 110 m->n_ref++;
f8019684
LP
111 return m;
112}
f65425cb 113
f8019684
LP
114static void window_unlink(Window *w) {
115 Context *c;
f65425cb 116
f8019684 117 assert(w);
16e9f408 118
f8019684
LP
119 if (w->ptr)
120 munmap(w->ptr, w->size);
16e9f408 121
f8019684 122 if (w->fd)
71fda00f 123 LIST_REMOVE(by_fd, w->fd->windows, w);
16e9f408 124
f8019684
LP
125 if (w->in_unused) {
126 if (w->cache->last_unused == w)
127 w->cache->last_unused = w->unused_prev;
16e9f408 128
71fda00f 129 LIST_REMOVE(unused, w->cache->unused, w);
f65425cb 130 }
16e9f408 131
f8019684
LP
132 LIST_FOREACH(by_window, c, w->contexts) {
133 assert(c->window == w);
134 c->window = NULL;
f65425cb 135 }
16e9f408
LP
136}
137
fa6ac760
LP
138static void window_invalidate(Window *w) {
139 assert(w);
140
141 if (w->invalidated)
142 return;
143
144 /* Replace the window with anonymous pages. This is useful
145 * when we hit a SIGBUS and want to make sure the file cannot
146 * trigger any further SIGBUS, possibly overrunning the sigbus
147 * queue. */
148
149 assert_se(mmap(w->ptr, w->size, w->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
150 w->invalidated = true;
151}
152
f8019684
LP
153static void window_free(Window *w) {
154 assert(w);
f65425cb 155
f8019684 156 window_unlink(w);
89de6947 157 w->cache->n_windows--;
f8019684
LP
158 free(w);
159}
f65425cb 160
8c3d9662 161_pure_ static inline bool window_matches(Window *w, int prot, uint64_t offset, size_t size) {
f8019684 162 assert(w);
f8019684 163 assert(size > 0);
16e9f408 164
f8019684 165 return
f8019684
LP
166 prot == w->prot &&
167 offset >= w->offset &&
168 offset + size <= w->offset + w->size;
16e9f408
LP
169}
170
8c3d9662
VC
171_pure_ static bool window_matches_fd(Window *w, MMapFileDescriptor *f, int prot, uint64_t offset, size_t size) {
172 assert(w);
173 assert(f);
174
175 return
176 w->fd &&
177 f->fd == w->fd->fd &&
178 window_matches(w, prot, offset, size);
179}
180
be7cdd8e 181static Window *window_add(MMapCache *m, MMapFileDescriptor *f, int prot, bool keep_always, uint64_t offset, size_t size, void *ptr) {
f8019684 182 Window *w;
16e9f408
LP
183
184 assert(m);
be7cdd8e 185 assert(f);
16e9f408 186
f8019684 187 if (!m->last_unused || m->n_windows <= WINDOWS_MIN) {
f65425cb 188
f8019684
LP
189 /* Allocate a new window */
190 w = new0(Window, 1);
191 if (!w)
192 return NULL;
89de6947 193 m->n_windows++;
f65425cb 194 } else {
16e9f408 195
f8019684
LP
196 /* Reuse an existing one */
197 w = m->last_unused;
198 window_unlink(w);
199 zero(*w);
f65425cb 200 }
f8019684
LP
201
202 w->cache = m;
be7cdd8e 203 w->fd = f;
6a491490
VC
204 w->prot = prot;
205 w->keep_always = keep_always;
206 w->offset = offset;
207 w->size = size;
208 w->ptr = ptr;
209
be7cdd8e 210 LIST_PREPEND(by_fd, f->windows, w);
6a491490 211
f8019684 212 return w;
16e9f408
LP
213}
214
f8019684
LP
215static void context_detach_window(Context *c) {
216 Window *w;
16e9f408 217
f8019684 218 assert(c);
16e9f408 219
f8019684 220 if (!c->window)
16e9f408
LP
221 return;
222
f8019684
LP
223 w = c->window;
224 c->window = NULL;
71fda00f 225 LIST_REMOVE(by_window, w->contexts, c);
16e9f408 226
1b8951e5 227 if (!w->contexts && !w->keep_always) {
f8019684 228 /* Not used anymore? */
349cc4a5 229#if ENABLE_DEBUG_MMAP_CACHE
fad5a6c6
MS
230 /* Unmap unused windows immediately to expose use-after-unmap
231 * by SIGSEGV. */
232 window_free(w);
233#else
71fda00f 234 LIST_PREPEND(unused, c->cache->unused, w);
f8019684
LP
235 if (!c->cache->last_unused)
236 c->cache->last_unused = w;
16e9f408 237
f8019684 238 w->in_unused = true;
fad5a6c6 239#endif
f8019684 240 }
16e9f408
LP
241}
242
f8019684
LP
243static void context_attach_window(Context *c, Window *w) {
244 assert(c);
245 assert(w);
16e9f408 246
f8019684 247 if (c->window == w)
16e9f408
LP
248 return;
249
f8019684 250 context_detach_window(c);
16e9f408 251
e18021f7 252 if (w->in_unused) {
f8019684 253 /* Used again? */
71fda00f 254 LIST_REMOVE(unused, c->cache->unused, w);
a2ab7ee6
CG
255 if (c->cache->last_unused == w)
256 c->cache->last_unused = w->unused_prev;
16e9f408 257
f8019684
LP
258 w->in_unused = false;
259 }
f65425cb 260
f8019684 261 c->window = w;
71fda00f 262 LIST_PREPEND(by_window, w->contexts, c);
16e9f408
LP
263}
264
f8019684
LP
265static Context *context_add(MMapCache *m, unsigned id) {
266 Context *c;
16e9f408
LP
267
268 assert(m);
269
69adae51 270 c = m->contexts[id];
f8019684
LP
271 if (c)
272 return c;
273
f8019684
LP
274 c = new0(Context, 1);
275 if (!c)
276 return NULL;
16e9f408 277
f8019684
LP
278 c->cache = m;
279 c->id = id;
16e9f408 280
69adae51
MS
281 assert(!m->contexts[id]);
282 m->contexts[id] = c;
16e9f408 283
f8019684 284 return c;
16e9f408
LP
285}
286
f8019684
LP
287static void context_free(Context *c) {
288 assert(c);
16e9f408 289
f8019684 290 context_detach_window(c);
16e9f408 291
69adae51
MS
292 if (c->cache) {
293 assert(c->cache->contexts[c->id] == c);
294 c->cache->contexts[c->id] = NULL;
295 }
16e9f408 296
f8019684
LP
297 free(c);
298}
299
f8019684 300static void mmap_cache_free(MMapCache *m) {
69adae51 301 int i;
f8019684 302
16e9f408 303 assert(m);
16e9f408 304
69adae51
MS
305 for (i = 0; i < MMAP_CACHE_MAX_CONTEXTS; i++)
306 if (m->contexts[i])
307 context_free(m->contexts[i]);
8e6d9397 308
8e6d9397
GM
309 hashmap_free(m->fds);
310
f8019684
LP
311 while (m->unused)
312 window_free(m->unused);
313
314 free(m);
16e9f408
LP
315}
316
317MMapCache* mmap_cache_unref(MMapCache *m) {
f649045c
LP
318
319 if (!m)
320 return NULL;
321
16e9f408
LP
322 assert(m->n_ref > 0);
323
313cefa1 324 m->n_ref--;
f8019684 325 if (m->n_ref == 0)
16e9f408 326 mmap_cache_free(m);
16e9f408
LP
327
328 return NULL;
329}
330
f8019684
LP
331static int make_room(MMapCache *m) {
332 assert(m);
333
334 if (!m->last_unused)
335 return 0;
336
337 window_free(m->last_unused);
338 return 1;
339}
340
341static int try_context(
342 MMapCache *m,
be7cdd8e 343 MMapFileDescriptor *f,
f8019684
LP
344 int prot,
345 unsigned context,
346 bool keep_always,
347 uint64_t offset,
348 size_t size,
b42549ad
VC
349 void **ret,
350 size_t *ret_size) {
f8019684
LP
351
352 Context *c;
f65425cb 353
16e9f408 354 assert(m);
f8019684 355 assert(m->n_ref > 0);
be7cdd8e 356 assert(f);
f8019684 357 assert(size > 0);
1b8951e5 358 assert(ret);
16e9f408 359
69adae51 360 c = m->contexts[context];
f8019684 361 if (!c)
16e9f408 362 return 0;
16e9f408 363
f8019684 364 assert(c->id == context);
16e9f408 365
f8019684
LP
366 if (!c->window)
367 return 0;
f65425cb 368
8c3d9662 369 if (!window_matches_fd(c->window, f, prot, offset, size)) {
f65425cb 370
f8019684
LP
371 /* Drop the reference to the window, since it's unnecessary now */
372 context_detach_window(c);
373 return 0;
f65425cb
LP
374 }
375
fa6ac760
LP
376 if (c->window->fd->sigbus)
377 return -EIO;
378
739731cd 379 c->window->keep_always = c->window->keep_always || keep_always;
16e9f408 380
1b8951e5 381 *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset);
b42549ad
VC
382 if (ret_size)
383 *ret_size = c->window->size - (offset - c->window->offset);
384
f8019684 385 return 1;
16e9f408
LP
386}
387
f8019684
LP
388static int find_mmap(
389 MMapCache *m,
be7cdd8e 390 MMapFileDescriptor *f,
f8019684
LP
391 int prot,
392 unsigned context,
393 bool keep_always,
394 uint64_t offset,
395 size_t size,
b42549ad
VC
396 void **ret,
397 size_t *ret_size) {
f8019684 398
f8019684
LP
399 Window *w;
400 Context *c;
16e9f408
LP
401
402 assert(m);
f8019684 403 assert(m->n_ref > 0);
be7cdd8e 404 assert(f);
f8019684 405 assert(size > 0);
16e9f408 406
fa6ac760
LP
407 if (f->sigbus)
408 return -EIO;
409
f8019684 410 LIST_FOREACH(by_fd, w, f->windows)
8c3d9662 411 if (window_matches(w, prot, offset, size))
f8019684 412 break;
16e9f408 413
f8019684
LP
414 if (!w)
415 return 0;
416
417 c = context_add(m, context);
418 if (!c)
419 return -ENOMEM;
420
421 context_attach_window(c, w);
739731cd 422 w->keep_always = w->keep_always || keep_always;
16e9f408 423
1b8951e5 424 *ret = (uint8_t*) w->ptr + (offset - w->offset);
b42549ad
VC
425 if (ret_size)
426 *ret_size = w->size - (offset - w->offset);
427
f8019684 428 return 1;
16e9f408
LP
429}
430
be7cdd8e 431static int mmap_try_harder(MMapCache *m, void *addr, MMapFileDescriptor *f, int prot, int flags, uint64_t offset, size_t size, void **res) {
db87967e
VC
432 void *ptr;
433
434 assert(m);
be7cdd8e 435 assert(f);
db87967e
VC
436 assert(res);
437
438 for (;;) {
439 int r;
440
be7cdd8e 441 ptr = mmap(addr, size, prot, flags, f->fd, offset);
db87967e
VC
442 if (ptr != MAP_FAILED)
443 break;
444 if (errno != ENOMEM)
3f0083a2 445 return negative_errno();
db87967e
VC
446
447 r = make_room(m);
448 if (r < 0)
449 return r;
450 if (r == 0)
451 return -ENOMEM;
452 }
453
454 *res = ptr;
455 return 0;
456}
457
f8019684 458static int add_mmap(
16e9f408 459 MMapCache *m,
be7cdd8e 460 MMapFileDescriptor *f,
16e9f408
LP
461 int prot,
462 unsigned context,
fcde2389 463 bool keep_always,
16e9f408 464 uint64_t offset,
f8019684 465 size_t size,
fcde2389 466 struct stat *st,
b42549ad
VC
467 void **ret,
468 size_t *ret_size) {
16e9f408 469
16e9f408 470 uint64_t woffset, wsize;
f8019684 471 Context *c;
f8019684
LP
472 Window *w;
473 void *d;
16e9f408
LP
474 int r;
475
476 assert(m);
f8019684 477 assert(m->n_ref > 0);
be7cdd8e 478 assert(f);
16e9f408 479 assert(size > 0);
1b8951e5 480 assert(ret);
16e9f408
LP
481
482 woffset = offset & ~((uint64_t) page_size() - 1ULL);
483 wsize = size + (offset - woffset);
484 wsize = PAGE_ALIGN(wsize);
485
486 if (wsize < WINDOW_SIZE) {
487 uint64_t delta;
488
beec0085 489 delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
16e9f408
LP
490
491 if (delta > offset)
492 woffset = 0;
493 else
494 woffset -= delta;
495
496 wsize = WINDOW_SIZE;
497 }
498
fcde2389
LP
499 if (st) {
500 /* Memory maps that are larger then the files
c5315881 501 underneath have undefined behavior. Hence, clamp
fcde2389
LP
502 things to the file size if we know it */
503
504 if (woffset >= (uint64_t) st->st_size)
505 return -EADDRNOTAVAIL;
506
507 if (woffset + wsize > (uint64_t) st->st_size)
508 wsize = PAGE_ALIGN(st->st_size - woffset);
509 }
510
be7cdd8e 511 r = mmap_try_harder(m, NULL, f, prot, MAP_SHARED, woffset, wsize, &d);
db87967e
VC
512 if (r < 0)
513 return r;
16e9f408 514
f8019684
LP
515 c = context_add(m, context);
516 if (!c)
b67ddc7b 517 goto outofmem;
16e9f408 518
6a491490 519 w = window_add(m, f, prot, keep_always, woffset, wsize, d);
f8019684 520 if (!w)
b67ddc7b 521 goto outofmem;
16e9f408 522
c7884da9 523 context_attach_window(c, w);
16e9f408 524
1b8951e5 525 *ret = (uint8_t*) w->ptr + (offset - w->offset);
b42549ad
VC
526 if (ret_size)
527 *ret_size = w->size - (offset - w->offset);
528
16e9f408 529 return 1;
b67ddc7b
PDS
530
531outofmem:
3f0083a2 532 (void) munmap(d, wsize);
b67ddc7b 533 return -ENOMEM;
16e9f408
LP
534}
535
536int mmap_cache_get(
537 MMapCache *m,
be7cdd8e 538 MMapFileDescriptor *f,
16e9f408
LP
539 int prot,
540 unsigned context,
fcde2389 541 bool keep_always,
16e9f408 542 uint64_t offset,
f8019684 543 size_t size,
fcde2389 544 struct stat *st,
b42549ad
VC
545 void **ret,
546 size_t *ret_size) {
16e9f408 547
16e9f408
LP
548 int r;
549
550 assert(m);
f8019684 551 assert(m->n_ref > 0);
be7cdd8e 552 assert(f);
16e9f408 553 assert(size > 0);
1b8951e5 554 assert(ret);
69adae51 555 assert(context < MMAP_CACHE_MAX_CONTEXTS);
16e9f408 556
f8019684 557 /* Check whether the current context is the right one already */
b42549ad 558 r = try_context(m, f, prot, context, keep_always, offset, size, ret, ret_size);
bf807d4d 559 if (r != 0) {
313cefa1 560 m->n_hit++;
16e9f408 561 return r;
bf807d4d 562 }
16e9f408 563
f8019684 564 /* Search for a matching mmap */
b42549ad 565 r = find_mmap(m, f, prot, context, keep_always, offset, size, ret, ret_size);
bf807d4d 566 if (r != 0) {
313cefa1 567 m->n_hit++;
16e9f408 568 return r;
bf807d4d
LP
569 }
570
571 m->n_missed++;
16e9f408 572
f8019684 573 /* Create a new mmap */
b42549ad 574 return add_mmap(m, f, prot, context, keep_always, offset, size, st, ret, ret_size);
ae97089d
ZJS
575}
576
fa6ac760
LP
577unsigned mmap_cache_get_hit(MMapCache *m) {
578 assert(m);
579
580 return m->n_hit;
581}
582
583unsigned mmap_cache_get_missed(MMapCache *m) {
584 assert(m);
585
586 return m->n_missed;
587}
588
589static void mmap_cache_process_sigbus(MMapCache *m) {
590 bool found = false;
be7cdd8e 591 MMapFileDescriptor *f;
fa6ac760
LP
592 Iterator i;
593 int r;
16e9f408
LP
594
595 assert(m);
16e9f408 596
fa6ac760
LP
597 /* Iterate through all triggered pages and mark their files as
598 * invalidated */
599 for (;;) {
600 bool ours;
601 void *addr;
602
603 r = sigbus_pop(&addr);
604 if (_likely_(r == 0))
605 break;
606 if (r < 0) {
607 log_error_errno(r, "SIGBUS handling failed: %m");
608 abort();
609 }
610
611 ours = false;
612 HASHMAP_FOREACH(f, m->fds, i) {
613 Window *w;
614
615 LIST_FOREACH(by_fd, w, f->windows) {
616 if ((uint8_t*) addr >= (uint8_t*) w->ptr &&
617 (uint8_t*) addr < (uint8_t*) w->ptr + w->size) {
618 found = ours = f->sigbus = true;
619 break;
620 }
621 }
622
623 if (ours)
624 break;
625 }
626
627 /* Didn't find a matching window, give up */
628 if (!ours) {
629 log_error("Unknown SIGBUS page, aborting.");
630 abort();
631 }
632 }
633
634 /* The list of triggered pages is now empty. Now, let's remap
635 * all windows of the triggered file to anonymous maps, so
636 * that no page of the file in question is triggered again, so
637 * that we can be sure not to hit the queue size limit. */
638 if (_likely_(!found))
16e9f408 639 return;
16e9f408 640
fa6ac760
LP
641 HASHMAP_FOREACH(f, m->fds, i) {
642 Window *w;
643
644 if (!f->sigbus)
645 continue;
646
647 LIST_FOREACH(by_fd, w, f->windows)
648 window_invalidate(w);
649 }
f8019684 650}
16e9f408 651
be7cdd8e 652bool mmap_cache_got_sigbus(MMapCache *m, MMapFileDescriptor *f) {
bf807d4d 653 assert(m);
be7cdd8e 654 assert(f);
bf807d4d 655
fa6ac760
LP
656 mmap_cache_process_sigbus(m);
657
fa6ac760 658 return f->sigbus;
bf807d4d
LP
659}
660
be7cdd8e
VC
661MMapFileDescriptor* mmap_cache_add_fd(MMapCache *m, int fd) {
662 MMapFileDescriptor *f;
663 int r;
fa6ac760 664
bf807d4d 665 assert(m);
fa6ac760 666 assert(fd >= 0);
bf807d4d 667
be7cdd8e
VC
668 f = hashmap_get(m->fds, FD_TO_PTR(fd));
669 if (f)
670 return f;
671
672 r = hashmap_ensure_allocated(&m->fds, NULL);
673 if (r < 0)
674 return NULL;
675
676 f = new0(MMapFileDescriptor, 1);
677 if (!f)
678 return NULL;
679
680 f->cache = m;
681 f->fd = fd;
682
683 r = hashmap_put(m->fds, FD_TO_PTR(fd), f);
684 if (r < 0)
685 return mfree(f);
686
687 return f;
688}
689
690void mmap_cache_free_fd(MMapCache *m, MMapFileDescriptor *f) {
691 assert(m);
692 assert(f);
693
fa6ac760
LP
694 /* Make sure that any queued SIGBUS are first dispatched, so
695 * that we don't end up with a SIGBUS entry we cannot relate
696 * to any existing memory map */
697
698 mmap_cache_process_sigbus(m);
699
be7cdd8e
VC
700 while (f->windows)
701 window_free(f->windows);
702
703 if (f->cache)
704 assert_se(hashmap_remove(f->cache->fds, FD_TO_PTR(f->fd)));
fa6ac760 705
be7cdd8e 706 free(f);
bf807d4d 707}