]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/mmap-cache.c
Merge pull request #11827 from keszybz/pkgconfig-variables
[thirdparty/systemd.git] / src / journal / mmap-cache.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
16e9f408 2
16e9f408
LP
3#include <errno.h>
4#include <stdlib.h>
f8019684 5#include <sys/mman.h>
16e9f408 6
b5efdb8a 7#include "alloc-util.h"
23e096cc 8#include "fd-util.h"
f8019684
LP
9#include "hashmap.h"
10#include "list.h"
11#include "log.h"
f8019684 12#include "macro.h"
16e9f408 13#include "mmap-cache.h"
cf0fbc49
TA
14#include "sigbus.h"
15#include "util.h"
16e9f408 16
f8019684
LP
17typedef struct Window Window;
18typedef struct Context Context;
84168d80 19
f8019684
LP
20struct Window {
21 MMapCache *cache;
22
739731cd
LP
23 bool invalidated:1;
24 bool keep_always:1;
25 bool in_unused:1;
16e9f408 26
68667801 27 int prot;
16e9f408
LP
28 void *ptr;
29 uint64_t offset;
f8019684
LP
30 size_t size;
31
be7cdd8e 32 MMapFileDescriptor *fd;
16e9f408 33
f8019684
LP
34 LIST_FIELDS(Window, by_fd);
35 LIST_FIELDS(Window, unused);
36
37 LIST_HEAD(Context, contexts);
38};
16e9f408 39
f8019684
LP
40struct Context {
41 MMapCache *cache;
42 unsigned id;
43 Window *window;
16e9f408 44
f8019684
LP
45 LIST_FIELDS(Context, by_window);
46};
47
be7cdd8e 48struct MMapFileDescriptor {
f8019684 49 MMapCache *cache;
16e9f408 50 int fd;
fa6ac760 51 bool sigbus;
f8019684
LP
52 LIST_HEAD(Window, windows);
53};
16e9f408
LP
54
55struct MMapCache {
cf4b2f99 56 unsigned n_ref;
68667801 57 unsigned n_windows;
16e9f408 58
bf807d4d
LP
59 unsigned n_hit, n_missed;
60
f8019684 61 Hashmap *fds;
69adae51 62 Context *contexts[MMAP_CACHE_MAX_CONTEXTS];
16e9f408 63
f8019684
LP
64 LIST_HEAD(Window, unused);
65 Window *last_unused;
16e9f408
LP
66};
67
f8019684 68#define WINDOWS_MIN 64
fad5a6c6 69
349cc4a5 70#if ENABLE_DEBUG_MMAP_CACHE
fad5a6c6
MS
71/* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
72# define WINDOW_SIZE (page_size())
73#else
74# define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
75#endif
16e9f408 76
f8019684
LP
77MMapCache* mmap_cache_new(void) {
78 MMapCache *m;
16e9f408 79
f8019684
LP
80 m = new0(MMapCache, 1);
81 if (!m)
82 return NULL;
16e9f408 83
f8019684
LP
84 m->n_ref = 1;
85 return m;
16e9f408
LP
86}
87
f8019684
LP
88static void window_unlink(Window *w) {
89 Context *c;
f65425cb 90
f8019684 91 assert(w);
16e9f408 92
f8019684
LP
93 if (w->ptr)
94 munmap(w->ptr, w->size);
16e9f408 95
f8019684 96 if (w->fd)
71fda00f 97 LIST_REMOVE(by_fd, w->fd->windows, w);
16e9f408 98
f8019684
LP
99 if (w->in_unused) {
100 if (w->cache->last_unused == w)
101 w->cache->last_unused = w->unused_prev;
16e9f408 102
71fda00f 103 LIST_REMOVE(unused, w->cache->unused, w);
f65425cb 104 }
16e9f408 105
f8019684
LP
106 LIST_FOREACH(by_window, c, w->contexts) {
107 assert(c->window == w);
108 c->window = NULL;
f65425cb 109 }
16e9f408
LP
110}
111
fa6ac760
LP
112static void window_invalidate(Window *w) {
113 assert(w);
114
115 if (w->invalidated)
116 return;
117
118 /* Replace the window with anonymous pages. This is useful
119 * when we hit a SIGBUS and want to make sure the file cannot
120 * trigger any further SIGBUS, possibly overrunning the sigbus
121 * queue. */
122
123 assert_se(mmap(w->ptr, w->size, w->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
124 w->invalidated = true;
125}
126
f8019684
LP
127static void window_free(Window *w) {
128 assert(w);
f65425cb 129
f8019684 130 window_unlink(w);
89de6947 131 w->cache->n_windows--;
f8019684
LP
132 free(w);
133}
f65425cb 134
a1e92eee 135_pure_ static bool window_matches(Window *w, int prot, uint64_t offset, size_t size) {
f8019684 136 assert(w);
f8019684 137 assert(size > 0);
16e9f408 138
f8019684 139 return
f8019684
LP
140 prot == w->prot &&
141 offset >= w->offset &&
142 offset + size <= w->offset + w->size;
16e9f408
LP
143}
144
8c3d9662
VC
145_pure_ static bool window_matches_fd(Window *w, MMapFileDescriptor *f, int prot, uint64_t offset, size_t size) {
146 assert(w);
147 assert(f);
148
149 return
150 w->fd &&
151 f->fd == w->fd->fd &&
152 window_matches(w, prot, offset, size);
153}
154
be7cdd8e 155static Window *window_add(MMapCache *m, MMapFileDescriptor *f, int prot, bool keep_always, uint64_t offset, size_t size, void *ptr) {
f8019684 156 Window *w;
16e9f408
LP
157
158 assert(m);
be7cdd8e 159 assert(f);
16e9f408 160
f8019684 161 if (!m->last_unused || m->n_windows <= WINDOWS_MIN) {
f65425cb 162
f8019684
LP
163 /* Allocate a new window */
164 w = new0(Window, 1);
165 if (!w)
166 return NULL;
89de6947 167 m->n_windows++;
f65425cb 168 } else {
16e9f408 169
f8019684
LP
170 /* Reuse an existing one */
171 w = m->last_unused;
172 window_unlink(w);
173 zero(*w);
f65425cb 174 }
f8019684
LP
175
176 w->cache = m;
be7cdd8e 177 w->fd = f;
6a491490
VC
178 w->prot = prot;
179 w->keep_always = keep_always;
180 w->offset = offset;
181 w->size = size;
182 w->ptr = ptr;
183
be7cdd8e 184 LIST_PREPEND(by_fd, f->windows, w);
6a491490 185
f8019684 186 return w;
16e9f408
LP
187}
188
f8019684
LP
189static void context_detach_window(Context *c) {
190 Window *w;
16e9f408 191
f8019684 192 assert(c);
16e9f408 193
f8019684 194 if (!c->window)
16e9f408
LP
195 return;
196
ae2a15bc 197 w = TAKE_PTR(c->window);
71fda00f 198 LIST_REMOVE(by_window, w->contexts, c);
16e9f408 199
1b8951e5 200 if (!w->contexts && !w->keep_always) {
f8019684 201 /* Not used anymore? */
349cc4a5 202#if ENABLE_DEBUG_MMAP_CACHE
fad5a6c6
MS
203 /* Unmap unused windows immediately to expose use-after-unmap
204 * by SIGSEGV. */
205 window_free(w);
206#else
71fda00f 207 LIST_PREPEND(unused, c->cache->unused, w);
f8019684
LP
208 if (!c->cache->last_unused)
209 c->cache->last_unused = w;
16e9f408 210
f8019684 211 w->in_unused = true;
fad5a6c6 212#endif
f8019684 213 }
16e9f408
LP
214}
215
f8019684
LP
216static void context_attach_window(Context *c, Window *w) {
217 assert(c);
218 assert(w);
16e9f408 219
f8019684 220 if (c->window == w)
16e9f408
LP
221 return;
222
f8019684 223 context_detach_window(c);
16e9f408 224
e18021f7 225 if (w->in_unused) {
f8019684 226 /* Used again? */
71fda00f 227 LIST_REMOVE(unused, c->cache->unused, w);
a2ab7ee6
CG
228 if (c->cache->last_unused == w)
229 c->cache->last_unused = w->unused_prev;
16e9f408 230
f8019684
LP
231 w->in_unused = false;
232 }
f65425cb 233
f8019684 234 c->window = w;
71fda00f 235 LIST_PREPEND(by_window, w->contexts, c);
16e9f408
LP
236}
237
f8019684
LP
238static Context *context_add(MMapCache *m, unsigned id) {
239 Context *c;
16e9f408
LP
240
241 assert(m);
242
69adae51 243 c = m->contexts[id];
f8019684
LP
244 if (c)
245 return c;
246
f8019684
LP
247 c = new0(Context, 1);
248 if (!c)
249 return NULL;
16e9f408 250
f8019684
LP
251 c->cache = m;
252 c->id = id;
16e9f408 253
69adae51
MS
254 assert(!m->contexts[id]);
255 m->contexts[id] = c;
16e9f408 256
f8019684 257 return c;
16e9f408
LP
258}
259
f8019684
LP
260static void context_free(Context *c) {
261 assert(c);
16e9f408 262
f8019684 263 context_detach_window(c);
16e9f408 264
69adae51
MS
265 if (c->cache) {
266 assert(c->cache->contexts[c->id] == c);
267 c->cache->contexts[c->id] = NULL;
268 }
16e9f408 269
f8019684
LP
270 free(c);
271}
272
8301aa0b 273static MMapCache *mmap_cache_free(MMapCache *m) {
69adae51 274 int i;
f8019684 275
16e9f408 276 assert(m);
16e9f408 277
69adae51
MS
278 for (i = 0; i < MMAP_CACHE_MAX_CONTEXTS; i++)
279 if (m->contexts[i])
280 context_free(m->contexts[i]);
8e6d9397 281
8e6d9397
GM
282 hashmap_free(m->fds);
283
f8019684
LP
284 while (m->unused)
285 window_free(m->unused);
286
8301aa0b 287 return mfree(m);
16e9f408
LP
288}
289
8301aa0b 290DEFINE_TRIVIAL_REF_UNREF_FUNC(MMapCache, mmap_cache, mmap_cache_free);
16e9f408 291
f8019684
LP
292static int make_room(MMapCache *m) {
293 assert(m);
294
295 if (!m->last_unused)
296 return 0;
297
298 window_free(m->last_unused);
299 return 1;
300}
301
302static int try_context(
303 MMapCache *m,
be7cdd8e 304 MMapFileDescriptor *f,
f8019684
LP
305 int prot,
306 unsigned context,
307 bool keep_always,
308 uint64_t offset,
309 size_t size,
b42549ad
VC
310 void **ret,
311 size_t *ret_size) {
f8019684
LP
312
313 Context *c;
f65425cb 314
16e9f408 315 assert(m);
f8019684 316 assert(m->n_ref > 0);
be7cdd8e 317 assert(f);
f8019684 318 assert(size > 0);
1b8951e5 319 assert(ret);
16e9f408 320
69adae51 321 c = m->contexts[context];
f8019684 322 if (!c)
16e9f408 323 return 0;
16e9f408 324
f8019684 325 assert(c->id == context);
16e9f408 326
f8019684
LP
327 if (!c->window)
328 return 0;
f65425cb 329
8c3d9662 330 if (!window_matches_fd(c->window, f, prot, offset, size)) {
f65425cb 331
f8019684
LP
332 /* Drop the reference to the window, since it's unnecessary now */
333 context_detach_window(c);
334 return 0;
f65425cb
LP
335 }
336
fa6ac760
LP
337 if (c->window->fd->sigbus)
338 return -EIO;
339
739731cd 340 c->window->keep_always = c->window->keep_always || keep_always;
16e9f408 341
1b8951e5 342 *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset);
b42549ad
VC
343 if (ret_size)
344 *ret_size = c->window->size - (offset - c->window->offset);
345
f8019684 346 return 1;
16e9f408
LP
347}
348
f8019684
LP
349static int find_mmap(
350 MMapCache *m,
be7cdd8e 351 MMapFileDescriptor *f,
f8019684
LP
352 int prot,
353 unsigned context,
354 bool keep_always,
355 uint64_t offset,
356 size_t size,
b42549ad
VC
357 void **ret,
358 size_t *ret_size) {
f8019684 359
f8019684
LP
360 Window *w;
361 Context *c;
16e9f408
LP
362
363 assert(m);
f8019684 364 assert(m->n_ref > 0);
be7cdd8e 365 assert(f);
f8019684 366 assert(size > 0);
16e9f408 367
fa6ac760
LP
368 if (f->sigbus)
369 return -EIO;
370
f8019684 371 LIST_FOREACH(by_fd, w, f->windows)
8c3d9662 372 if (window_matches(w, prot, offset, size))
f8019684 373 break;
16e9f408 374
f8019684
LP
375 if (!w)
376 return 0;
377
378 c = context_add(m, context);
379 if (!c)
380 return -ENOMEM;
381
382 context_attach_window(c, w);
739731cd 383 w->keep_always = w->keep_always || keep_always;
16e9f408 384
1b8951e5 385 *ret = (uint8_t*) w->ptr + (offset - w->offset);
b42549ad
VC
386 if (ret_size)
387 *ret_size = w->size - (offset - w->offset);
388
f8019684 389 return 1;
16e9f408
LP
390}
391
be7cdd8e 392static int mmap_try_harder(MMapCache *m, void *addr, MMapFileDescriptor *f, int prot, int flags, uint64_t offset, size_t size, void **res) {
db87967e
VC
393 void *ptr;
394
395 assert(m);
be7cdd8e 396 assert(f);
db87967e
VC
397 assert(res);
398
399 for (;;) {
400 int r;
401
be7cdd8e 402 ptr = mmap(addr, size, prot, flags, f->fd, offset);
db87967e
VC
403 if (ptr != MAP_FAILED)
404 break;
405 if (errno != ENOMEM)
3f0083a2 406 return negative_errno();
db87967e
VC
407
408 r = make_room(m);
409 if (r < 0)
410 return r;
411 if (r == 0)
412 return -ENOMEM;
413 }
414
415 *res = ptr;
416 return 0;
417}
418
f8019684 419static int add_mmap(
16e9f408 420 MMapCache *m,
be7cdd8e 421 MMapFileDescriptor *f,
16e9f408
LP
422 int prot,
423 unsigned context,
fcde2389 424 bool keep_always,
16e9f408 425 uint64_t offset,
f8019684 426 size_t size,
fcde2389 427 struct stat *st,
b42549ad
VC
428 void **ret,
429 size_t *ret_size) {
16e9f408 430
16e9f408 431 uint64_t woffset, wsize;
f8019684 432 Context *c;
f8019684
LP
433 Window *w;
434 void *d;
16e9f408
LP
435 int r;
436
437 assert(m);
f8019684 438 assert(m->n_ref > 0);
be7cdd8e 439 assert(f);
16e9f408 440 assert(size > 0);
1b8951e5 441 assert(ret);
16e9f408
LP
442
443 woffset = offset & ~((uint64_t) page_size() - 1ULL);
444 wsize = size + (offset - woffset);
445 wsize = PAGE_ALIGN(wsize);
446
447 if (wsize < WINDOW_SIZE) {
448 uint64_t delta;
449
beec0085 450 delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
16e9f408
LP
451
452 if (delta > offset)
453 woffset = 0;
454 else
455 woffset -= delta;
456
457 wsize = WINDOW_SIZE;
458 }
459
fcde2389
LP
460 if (st) {
461 /* Memory maps that are larger then the files
c5315881 462 underneath have undefined behavior. Hence, clamp
fcde2389
LP
463 things to the file size if we know it */
464
465 if (woffset >= (uint64_t) st->st_size)
466 return -EADDRNOTAVAIL;
467
468 if (woffset + wsize > (uint64_t) st->st_size)
469 wsize = PAGE_ALIGN(st->st_size - woffset);
470 }
471
be7cdd8e 472 r = mmap_try_harder(m, NULL, f, prot, MAP_SHARED, woffset, wsize, &d);
db87967e
VC
473 if (r < 0)
474 return r;
16e9f408 475
f8019684
LP
476 c = context_add(m, context);
477 if (!c)
b67ddc7b 478 goto outofmem;
16e9f408 479
6a491490 480 w = window_add(m, f, prot, keep_always, woffset, wsize, d);
f8019684 481 if (!w)
b67ddc7b 482 goto outofmem;
16e9f408 483
c7884da9 484 context_attach_window(c, w);
16e9f408 485
1b8951e5 486 *ret = (uint8_t*) w->ptr + (offset - w->offset);
b42549ad
VC
487 if (ret_size)
488 *ret_size = w->size - (offset - w->offset);
489
16e9f408 490 return 1;
b67ddc7b
PDS
491
492outofmem:
3f0083a2 493 (void) munmap(d, wsize);
b67ddc7b 494 return -ENOMEM;
16e9f408
LP
495}
496
497int mmap_cache_get(
498 MMapCache *m,
be7cdd8e 499 MMapFileDescriptor *f,
16e9f408
LP
500 int prot,
501 unsigned context,
fcde2389 502 bool keep_always,
16e9f408 503 uint64_t offset,
f8019684 504 size_t size,
fcde2389 505 struct stat *st,
b42549ad
VC
506 void **ret,
507 size_t *ret_size) {
16e9f408 508
16e9f408
LP
509 int r;
510
511 assert(m);
f8019684 512 assert(m->n_ref > 0);
be7cdd8e 513 assert(f);
16e9f408 514 assert(size > 0);
1b8951e5 515 assert(ret);
69adae51 516 assert(context < MMAP_CACHE_MAX_CONTEXTS);
16e9f408 517
f8019684 518 /* Check whether the current context is the right one already */
b42549ad 519 r = try_context(m, f, prot, context, keep_always, offset, size, ret, ret_size);
bf807d4d 520 if (r != 0) {
313cefa1 521 m->n_hit++;
16e9f408 522 return r;
bf807d4d 523 }
16e9f408 524
f8019684 525 /* Search for a matching mmap */
b42549ad 526 r = find_mmap(m, f, prot, context, keep_always, offset, size, ret, ret_size);
bf807d4d 527 if (r != 0) {
313cefa1 528 m->n_hit++;
16e9f408 529 return r;
bf807d4d
LP
530 }
531
532 m->n_missed++;
16e9f408 533
f8019684 534 /* Create a new mmap */
b42549ad 535 return add_mmap(m, f, prot, context, keep_always, offset, size, st, ret, ret_size);
ae97089d
ZJS
536}
537
fa6ac760
LP
538unsigned mmap_cache_get_hit(MMapCache *m) {
539 assert(m);
540
541 return m->n_hit;
542}
543
544unsigned mmap_cache_get_missed(MMapCache *m) {
545 assert(m);
546
547 return m->n_missed;
548}
549
550static void mmap_cache_process_sigbus(MMapCache *m) {
551 bool found = false;
be7cdd8e 552 MMapFileDescriptor *f;
fa6ac760
LP
553 Iterator i;
554 int r;
16e9f408
LP
555
556 assert(m);
16e9f408 557
fa6ac760
LP
558 /* Iterate through all triggered pages and mark their files as
559 * invalidated */
560 for (;;) {
561 bool ours;
562 void *addr;
563
564 r = sigbus_pop(&addr);
565 if (_likely_(r == 0))
566 break;
567 if (r < 0) {
568 log_error_errno(r, "SIGBUS handling failed: %m");
569 abort();
570 }
571
572 ours = false;
573 HASHMAP_FOREACH(f, m->fds, i) {
574 Window *w;
575
576 LIST_FOREACH(by_fd, w, f->windows) {
577 if ((uint8_t*) addr >= (uint8_t*) w->ptr &&
578 (uint8_t*) addr < (uint8_t*) w->ptr + w->size) {
579 found = ours = f->sigbus = true;
580 break;
581 }
582 }
583
584 if (ours)
585 break;
586 }
587
588 /* Didn't find a matching window, give up */
589 if (!ours) {
590 log_error("Unknown SIGBUS page, aborting.");
591 abort();
592 }
593 }
594
595 /* The list of triggered pages is now empty. Now, let's remap
596 * all windows of the triggered file to anonymous maps, so
597 * that no page of the file in question is triggered again, so
598 * that we can be sure not to hit the queue size limit. */
599 if (_likely_(!found))
16e9f408 600 return;
16e9f408 601
fa6ac760
LP
602 HASHMAP_FOREACH(f, m->fds, i) {
603 Window *w;
604
605 if (!f->sigbus)
606 continue;
607
608 LIST_FOREACH(by_fd, w, f->windows)
609 window_invalidate(w);
610 }
f8019684 611}
16e9f408 612
be7cdd8e 613bool mmap_cache_got_sigbus(MMapCache *m, MMapFileDescriptor *f) {
bf807d4d 614 assert(m);
be7cdd8e 615 assert(f);
bf807d4d 616
fa6ac760
LP
617 mmap_cache_process_sigbus(m);
618
fa6ac760 619 return f->sigbus;
bf807d4d
LP
620}
621
be7cdd8e
VC
622MMapFileDescriptor* mmap_cache_add_fd(MMapCache *m, int fd) {
623 MMapFileDescriptor *f;
624 int r;
fa6ac760 625
bf807d4d 626 assert(m);
fa6ac760 627 assert(fd >= 0);
bf807d4d 628
be7cdd8e
VC
629 f = hashmap_get(m->fds, FD_TO_PTR(fd));
630 if (f)
631 return f;
632
633 r = hashmap_ensure_allocated(&m->fds, NULL);
634 if (r < 0)
635 return NULL;
636
637 f = new0(MMapFileDescriptor, 1);
638 if (!f)
639 return NULL;
640
641 f->cache = m;
642 f->fd = fd;
643
644 r = hashmap_put(m->fds, FD_TO_PTR(fd), f);
645 if (r < 0)
646 return mfree(f);
647
648 return f;
649}
650
651void mmap_cache_free_fd(MMapCache *m, MMapFileDescriptor *f) {
652 assert(m);
653 assert(f);
654
fa6ac760
LP
655 /* Make sure that any queued SIGBUS are first dispatched, so
656 * that we don't end up with a SIGBUS entry we cannot relate
657 * to any existing memory map */
658
659 mmap_cache_process_sigbus(m);
660
be7cdd8e
VC
661 while (f->windows)
662 window_free(f->windows);
663
664 if (f->cache)
665 assert_se(hashmap_remove(f->cache->fds, FD_TO_PTR(f->fd)));
fa6ac760 666
be7cdd8e 667 free(f);
bf807d4d 668}