]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/libsystemd/sd-journal/mmap-cache.c
Fixes for vscode/intellisense parsing (#38040)
[thirdparty/systemd.git] / src / libsystemd / sd-journal / mmap-cache.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
16e9f408 2
16e9f408 3#include <stdlib.h>
f8019684 4#include <sys/mman.h>
16e9f408 5
b5efdb8a 6#include "alloc-util.h"
1e4e5572 7#include "bitfield.h"
2b2fec7d 8#include "errno-util.h"
23e096cc 9#include "fd-util.h"
f8019684
LP
10#include "hashmap.h"
11#include "list.h"
12#include "log.h"
0a970718 13#include "memory-util.h"
16e9f408 14#include "mmap-cache.h"
cf0fbc49 15#include "sigbus.h"
16e9f408 16
f8019684 17typedef struct Window Window;
2ebc9cc8
YW
18
19typedef enum WindowFlags {
1a25ab66
YW
20 WINDOW_KEEP_ALWAYS = 1u << (_MMAP_CACHE_CATEGORY_MAX + 0),
21 WINDOW_IN_UNUSED = 1u << (_MMAP_CACHE_CATEGORY_MAX + 1),
22 WINDOW_INVALIDATED = 1u << (_MMAP_CACHE_CATEGORY_MAX + 2),
2ebc9cc8
YW
23
24 _WINDOW_USED_MASK = WINDOW_IN_UNUSED - 1, /* The mask contains all bits that indicate the windows
25 * is currently in use. Covers the all the object types
26 * and the additional WINDOW_KEEP_ALWAYS flag. */
27} WindowFlags;
28
29#define WINDOW_IS_UNUSED(w) (((w)->flags & _WINDOW_USED_MASK) == 0)
84168d80 30
f8019684 31struct Window {
0073f6c6 32 MMapFileDescriptor *fd;
f8019684 33
2ebc9cc8 34 WindowFlags flags;
16e9f408 35
16e9f408
LP
36 void *ptr;
37 uint64_t offset;
f8019684
LP
38 size_t size;
39
0073f6c6 40 LIST_FIELDS(Window, windows);
f8019684 41 LIST_FIELDS(Window, unused);
f8019684
LP
42};
43
be7cdd8e 44struct MMapFileDescriptor {
f8019684 45 MMapCache *cache;
2ebc9cc8 46
16e9f408 47 int fd;
104fc4be 48 int prot;
fa6ac760 49 bool sigbus;
2ebc9cc8 50
f8019684
LP
51 LIST_HEAD(Window, windows);
52};
16e9f408
LP
53
54struct MMapCache {
cf4b2f99 55 unsigned n_ref;
68667801 56 unsigned n_windows;
16e9f408 57
1a25ab66 58 unsigned n_category_cache_hit;
8fc4d1be
YW
59 unsigned n_window_list_hit;
60 unsigned n_missed;
bf807d4d 61
f8019684 62 Hashmap *fds;
16e9f408 63
f8019684
LP
64 LIST_HEAD(Window, unused);
65 Window *last_unused;
176f7327 66 unsigned n_unused;
7580b0d8 67
1a25ab66 68 Window *windows_by_category[_MMAP_CACHE_CATEGORY_MAX];
16e9f408
LP
69};
70
f8019684 71#define WINDOWS_MIN 64
176f7327 72#define UNUSED_MIN 4
fad5a6c6 73
349cc4a5 74#if ENABLE_DEBUG_MMAP_CACHE
fad5a6c6
MS
75/* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
76# define WINDOW_SIZE (page_size())
77#else
c886f2d2 78# define WINDOW_SIZE ((size_t) (UINT64_C(8) * UINT64_C(1024) * UINT64_C(1024)))
fad5a6c6 79#endif
16e9f408 80
f8019684
LP
81MMapCache* mmap_cache_new(void) {
82 MMapCache *m;
16e9f408 83
397caa81 84 m = new(MMapCache, 1);
f8019684
LP
85 if (!m)
86 return NULL;
16e9f408 87
397caa81
YW
88 *m = (MMapCache) {
89 .n_ref = 1,
90 };
91
f8019684 92 return m;
16e9f408
LP
93}
94
b38977e5 95static Window* window_unlink(Window *w) {
f8019684 96 assert(w);
16e9f408 97
b38977e5
YW
98 MMapCache *m = mmap_cache_fd_cache(w->fd);
99
f8019684
LP
100 if (w->ptr)
101 munmap(w->ptr, w->size);
16e9f408 102
2ebc9cc8 103 if (FLAGS_SET(w->flags, WINDOW_IN_UNUSED)) {
b38977e5
YW
104 if (m->last_unused == w)
105 m->last_unused = w->unused_prev;
b38977e5 106 LIST_REMOVE(unused, m->unused, w);
176f7327 107 m->n_unused--;
f65425cb 108 }
16e9f408 109
1a25ab66 110 for (unsigned i = 0; i < _MMAP_CACHE_CATEGORY_MAX; i++)
1e4e5572 111 if (BIT_SET(w->flags, i))
1a25ab66 112 assert_se(TAKE_PTR(m->windows_by_category[i]) == w);
b38977e5 113
0073f6c6 114 return LIST_REMOVE(windows, w->fd->windows, w);
16e9f408
LP
115}
116
fa6ac760
LP
117static void window_invalidate(Window *w) {
118 assert(w);
104fc4be 119 assert(w->fd);
fa6ac760 120
2ebc9cc8 121 if (FLAGS_SET(w->flags, WINDOW_INVALIDATED))
fa6ac760
LP
122 return;
123
8fc4d1be
YW
124 /* Replace the window with anonymous pages. This is useful when we hit a SIGBUS and want to make sure
125 * the file cannot trigger any further SIGBUS, possibly overrunning the sigbus queue. */
fa6ac760 126
104fc4be 127 assert_se(mmap(w->ptr, w->size, w->fd->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
2ebc9cc8 128 w->flags |= WINDOW_INVALIDATED;
fa6ac760
LP
129}
130
b38977e5
YW
131static Window* window_free(Window *w) {
132 if (!w)
133 return NULL;
f65425cb 134
f8019684 135 window_unlink(w);
0073f6c6 136 w->fd->cache->n_windows--;
b38977e5
YW
137
138 return mfree(w);
f8019684 139}
f65425cb 140
81598f5e 141static bool window_matches(Window *w, MMapFileDescriptor *f, uint64_t offset, size_t size) {
f8019684 142 assert(size > 0);
16e9f408 143
f8019684 144 return
81598f5e
YW
145 w &&
146 f == w->fd &&
f8019684
LP
147 offset >= w->offset &&
148 offset + size <= w->offset + w->size;
16e9f408
LP
149}
150
7a56f6b2
YW
151static bool window_matches_by_addr(Window *w, MMapFileDescriptor *f, void *addr, size_t size) {
152 assert(size > 0);
153
154 return
155 w &&
156 f == w->fd &&
157 (uint8_t*) addr >= (uint8_t*) w->ptr &&
158 (uint8_t*) addr + size <= (uint8_t*) w->ptr + w->size;
159}
160
40f5e6a9
YW
161static Window* window_add(MMapFileDescriptor *f, uint64_t offset, size_t size, void *ptr) {
162 MMapCache *m = mmap_cache_fd_cache(f);
f8019684 163 Window *w;
16e9f408 164
176f7327 165 if (!m->last_unused || m->n_windows < WINDOWS_MIN || m->n_unused < UNUSED_MIN) {
f8019684 166 /* Allocate a new window */
41ab8c67 167 w = new(Window, 1);
f8019684
LP
168 if (!w)
169 return NULL;
89de6947 170 m->n_windows++;
b38977e5 171 } else
f8019684 172 /* Reuse an existing one */
b38977e5 173 w = window_unlink(m->last_unused);
f8019684 174
41ab8c67 175 *w = (Window) {
41ab8c67 176 .fd = f,
41ab8c67
LP
177 .offset = offset,
178 .size = size,
179 .ptr = ptr,
180 };
6a491490 181
0073f6c6 182 return LIST_PREPEND(windows, f->windows, w);
16e9f408
LP
183}
184
1a25ab66 185static void category_detach_window(MMapCache *m, MMapCacheCategory c) {
f8019684 186 Window *w;
16e9f408 187
7580b0d8 188 assert(m);
1a25ab66 189 assert(c >= 0 && c < _MMAP_CACHE_CATEGORY_MAX);
16e9f408 190
1a25ab66 191 w = TAKE_PTR(m->windows_by_category[c]);
2ebc9cc8
YW
192 if (!w)
193 return; /* Nothing attached. */
16e9f408 194
1e4e5572 195 assert(BIT_SET(w->flags, c));
2ebc9cc8 196 w->flags &= ~(1u << c);
16e9f408 197
2ebc9cc8 198 if (WINDOW_IS_UNUSED(w)) {
f8019684 199 /* Not used anymore? */
349cc4a5 200#if ENABLE_DEBUG_MMAP_CACHE
8fc4d1be 201 /* Unmap unused windows immediately to expose use-after-unmap by SIGSEGV. */
fad5a6c6
MS
202 window_free(w);
203#else
7580b0d8
VC
204 LIST_PREPEND(unused, m->unused, w);
205 if (!m->last_unused)
206 m->last_unused = w;
176f7327 207 m->n_unused++;
2ebc9cc8 208 w->flags |= WINDOW_IN_UNUSED;
fad5a6c6 209#endif
f8019684 210 }
16e9f408
LP
211}
212
1a25ab66 213static void category_attach_window(MMapCache *m, MMapCacheCategory c, Window *w) {
7580b0d8 214 assert(m);
1a25ab66 215 assert(c >= 0 && c < _MMAP_CACHE_CATEGORY_MAX);
f8019684 216 assert(w);
16e9f408 217
1a25ab66 218 if (m->windows_by_category[c] == w)
2ebc9cc8 219 return; /* Already attached. */
16e9f408 220
1a25ab66 221 category_detach_window(m, c);
16e9f408 222
2ebc9cc8 223 if (FLAGS_SET(w->flags, WINDOW_IN_UNUSED)) {
f8019684 224 /* Used again? */
7580b0d8
VC
225 if (m->last_unused == w)
226 m->last_unused = w->unused_prev;
227 LIST_REMOVE(unused, m->unused, w);
176f7327 228 m->n_unused--;
2ebc9cc8 229 w->flags &= ~WINDOW_IN_UNUSED;
f8019684 230 }
f65425cb 231
1a25ab66 232 m->windows_by_category[c] = w;
2ebc9cc8 233 w->flags |= (1u << c);
16e9f408
LP
234}
235
b38977e5
YW
236static MMapCache* mmap_cache_free(MMapCache *m) {
237 if (!m)
238 return NULL;
16e9f408 239
b38977e5
YW
240 /* All windows are owned by fds, and each fd takes a reference of MMapCache. So, when this is called,
241 * all fds are already freed, and hence there is no window. */
8e6d9397 242
b38977e5 243 assert(hashmap_isempty(m->fds));
8e6d9397
GM
244 hashmap_free(m->fds);
245
176f7327 246 assert(!m->unused && m->n_unused == 0);
b38977e5 247 assert(m->n_windows == 0);
f8019684 248
8301aa0b 249 return mfree(m);
16e9f408
LP
250}
251
8301aa0b 252DEFINE_TRIVIAL_REF_UNREF_FUNC(MMapCache, mmap_cache, mmap_cache_free);
16e9f408 253
1ed867d3
YW
254static int mmap_try_harder(MMapFileDescriptor *f, void *addr, int flags, uint64_t offset, size_t size, void **ret) {
255 MMapCache *m = mmap_cache_fd_cache(f);
db87967e 256
1ed867d3 257 assert(ret);
db87967e
VC
258
259 for (;;) {
1ed867d3 260 void *ptr;
db87967e 261
104fc4be 262 ptr = mmap(addr, size, f->prot, flags, f->fd, offset);
1ed867d3
YW
263 if (ptr != MAP_FAILED) {
264 *ret = ptr;
265 return 0;
266 }
db87967e 267 if (errno != ENOMEM)
3f0083a2 268 return negative_errno();
db87967e 269
1ed867d3
YW
270 /* When failed with ENOMEM, try again after making a room by freeing an unused window. */
271
272 if (!m->last_unused)
273 return -ENOMEM; /* no free window, propagate the original error. */
db87967e 274
1ed867d3
YW
275 window_free(m->last_unused);
276 }
db87967e
VC
277}
278
f8019684 279static int add_mmap(
be7cdd8e 280 MMapFileDescriptor *f,
16e9f408 281 uint64_t offset,
f8019684 282 size_t size,
fcde2389 283 struct stat *st,
40f5e6a9 284 Window **ret) {
16e9f408 285
f8019684
LP
286 Window *w;
287 void *d;
16e9f408
LP
288 int r;
289
be7cdd8e 290 assert(f);
16e9f408 291 assert(size > 0);
1b8951e5 292 assert(ret);
16e9f408 293
c886f2d2
YW
294 /* overflow check */
295 if (size > SIZE_MAX - PAGE_OFFSET_U64(offset))
296 return -EADDRNOTAVAIL;
16e9f408 297
c886f2d2
YW
298 size = PAGE_ALIGN(size + PAGE_OFFSET_U64(offset));
299 offset = PAGE_ALIGN_DOWN_U64(offset);
16e9f408 300
c886f2d2
YW
301 if (size < WINDOW_SIZE) {
302 uint64_t delta;
16e9f408 303
c886f2d2
YW
304 delta = PAGE_ALIGN((WINDOW_SIZE - size) / 2);
305 offset = LESS_BY(offset, delta);
306 size = WINDOW_SIZE;
16e9f408
LP
307 }
308
fcde2389 309 if (st) {
8fc4d1be
YW
310 /* Memory maps that are larger then the files underneath have undefined behavior. Hence,
311 * clamp things to the file size if we know it */
fcde2389 312
c886f2d2 313 if (offset >= (uint64_t) st->st_size)
fcde2389
LP
314 return -EADDRNOTAVAIL;
315
c886f2d2
YW
316 if (size > (uint64_t) st->st_size - offset)
317 size = PAGE_ALIGN((uint64_t) st->st_size - offset);
fcde2389
LP
318 }
319
c886f2d2
YW
320 if (size >= SIZE_MAX)
321 return -EADDRNOTAVAIL;
322
323 r = mmap_try_harder(f, NULL, MAP_SHARED, offset, size, &d);
db87967e
VC
324 if (r < 0)
325 return r;
16e9f408 326
c886f2d2 327 w = window_add(f, offset, size, d);
40f5e6a9 328 if (!w) {
c886f2d2 329 (void) munmap(d, size);
40f5e6a9
YW
330 return -ENOMEM;
331 }
b67ddc7b 332
40f5e6a9
YW
333 *ret = w;
334 return 0;
16e9f408
LP
335}
336
c3bd54bf 337int mmap_cache_fd_get(
be7cdd8e 338 MMapFileDescriptor *f,
1a25ab66 339 MMapCacheCategory c,
fcde2389 340 bool keep_always,
16e9f408 341 uint64_t offset,
f8019684 342 size_t size,
fcde2389 343 struct stat *st,
258190a0 344 void **ret) {
16e9f408 345
40f5e6a9 346 MMapCache *m = mmap_cache_fd_cache(f);
40f5e6a9 347 Window *w;
16e9f408
LP
348 int r;
349
16e9f408 350 assert(size > 0);
1a25ab66 351 assert(c >= 0 && c < _MMAP_CACHE_CATEGORY_MAX);
1b8951e5 352 assert(ret);
40f5e6a9
YW
353
354 if (f->sigbus)
355 return -EIO;
16e9f408 356
1a25ab66
YW
357 /* Check whether the current category is the right one already */
358 if (window_matches(m->windows_by_category[c], f, offset, size)) {
359 m->n_category_cache_hit++;
360 w = m->windows_by_category[c];
40f5e6a9
YW
361 goto found;
362 }
363
364 /* Drop the reference to the window, since it's unnecessary now */
1a25ab66 365 category_detach_window(m, c);
16e9f408 366
f8019684 367 /* Search for a matching mmap */
0073f6c6 368 LIST_FOREACH(windows, i, f->windows)
40f5e6a9
YW
369 if (window_matches(i, f, offset, size)) {
370 m->n_window_list_hit++;
371 w = i;
372 goto found;
373 }
bf807d4d 374
40f5e6a9 375 m->n_missed++;
16e9f408 376
f8019684 377 /* Create a new mmap */
40f5e6a9
YW
378 r = add_mmap(f, offset, size, st, &w);
379 if (r < 0)
380 return r;
381
382found:
2ebc9cc8
YW
383 if (keep_always)
384 w->flags |= WINDOW_KEEP_ALWAYS;
385
1a25ab66 386 category_attach_window(m, c, w);
40f5e6a9
YW
387 *ret = (uint8_t*) w->ptr + (offset - w->offset);
388 return 0;
ae97089d
ZJS
389}
390
a1b8d21f
YW
391int mmap_cache_fd_pin(
392 MMapFileDescriptor *f,
393 MMapCacheCategory c,
394 void *addr,
395 size_t size) {
396
397 MMapCache *m = mmap_cache_fd_cache(f);
398 Window *w;
399
400 assert(addr);
401 assert(c >= 0 && c < _MMAP_CACHE_CATEGORY_MAX);
402 assert(size > 0);
403
404 if (f->sigbus)
405 return -EIO;
406
407 /* Check if the current category is the right one. */
408 if (window_matches_by_addr(m->windows_by_category[c], f, addr, size)) {
409 m->n_category_cache_hit++;
410 w = m->windows_by_category[c];
411 goto found;
412 }
413
414 /* Search for a matching mmap. */
415 LIST_FOREACH(windows, i, f->windows)
416 if (window_matches_by_addr(i, f, addr, size)) {
417 m->n_window_list_hit++;
418 w = i;
419 goto found;
420 }
421
422 m->n_missed++;
423 return -EADDRNOTAVAIL; /* Not found. */
424
425found:
426 if (FLAGS_SET(w->flags, WINDOW_KEEP_ALWAYS))
427 return 0; /* The window will never unmapped. */
428
429 /* Attach the window to the 'pinning' category. */
430 category_attach_window(m, MMAP_CACHE_CATEGORY_PIN, w);
431 return 1;
432}
433
3a595c59 434void mmap_cache_stats_log_debug(MMapCache *m) {
fa6ac760
LP
435 assert(m);
436
284802c5
VC
437 log_debug("mmap cache statistics: %u category cache hit, %u window list hit, %u miss, %u files, %u windows, %u unused",
438 m->n_category_cache_hit, m->n_window_list_hit, m->n_missed, hashmap_size(m->fds), m->n_windows, m->n_unused);
fa6ac760
LP
439}
440
441static void mmap_cache_process_sigbus(MMapCache *m) {
442 bool found = false;
be7cdd8e 443 MMapFileDescriptor *f;
fa6ac760 444 int r;
16e9f408
LP
445
446 assert(m);
16e9f408 447
8fc4d1be 448 /* Iterate through all triggered pages and mark their files as invalidated. */
fa6ac760
LP
449 for (;;) {
450 bool ours;
451 void *addr;
452
453 r = sigbus_pop(&addr);
454 if (_likely_(r == 0))
455 break;
456 if (r < 0) {
457 log_error_errno(r, "SIGBUS handling failed: %m");
458 abort();
459 }
460
461 ours = false;
90e74a66 462 HASHMAP_FOREACH(f, m->fds) {
7a56f6b2
YW
463 LIST_FOREACH(windows, w, f->windows)
464 if (window_matches_by_addr(w, f, addr, 1)) {
fa6ac760
LP
465 found = ours = f->sigbus = true;
466 break;
467 }
fa6ac760
LP
468
469 if (ours)
470 break;
471 }
472
8fc4d1be 473 /* Didn't find a matching window, give up. */
fa6ac760
LP
474 if (!ours) {
475 log_error("Unknown SIGBUS page, aborting.");
476 abort();
477 }
478 }
479
8fc4d1be
YW
480 /* The list of triggered pages is now empty. Now, let's remap all windows of the triggered file to
481 * anonymous maps, so that no page of the file in question is triggered again, so that we can be sure
482 * not to hit the queue size limit. */
fa6ac760 483 if (_likely_(!found))
16e9f408 484 return;
16e9f408 485
90e74a66 486 HASHMAP_FOREACH(f, m->fds) {
fa6ac760
LP
487 if (!f->sigbus)
488 continue;
489
0073f6c6 490 LIST_FOREACH(windows, w, f->windows)
fa6ac760
LP
491 window_invalidate(w);
492 }
f8019684 493}
16e9f408 494
c3bd54bf 495bool mmap_cache_fd_got_sigbus(MMapFileDescriptor *f) {
be7cdd8e 496 assert(f);
bf807d4d 497
1da2c4ce 498 mmap_cache_process_sigbus(f->cache);
fa6ac760 499
fa6ac760 500 return f->sigbus;
bf807d4d
LP
501}
502
8926a6a4
YW
503int mmap_cache_add_fd(MMapCache *m, int fd, int prot, MMapFileDescriptor **ret) {
504 _cleanup_free_ MMapFileDescriptor *f = NULL;
505 MMapFileDescriptor *existing;
be7cdd8e 506 int r;
fa6ac760 507
bf807d4d 508 assert(m);
fa6ac760 509 assert(fd >= 0);
bf807d4d 510
8926a6a4
YW
511 existing = hashmap_get(m->fds, FD_TO_PTR(fd));
512 if (existing) {
8ff0f36e
YW
513 if (existing->prot != prot)
514 return -EEXIST;
8926a6a4
YW
515 if (ret)
516 *ret = existing;
517 return 0;
518 }
be7cdd8e 519
8926a6a4 520 f = new(MMapFileDescriptor, 1);
be7cdd8e 521 if (!f)
8926a6a4
YW
522 return -ENOMEM;
523
524 *f = (MMapFileDescriptor) {
525 .fd = fd,
526 .prot = prot,
527 };
be7cdd8e 528
8926a6a4 529 r = hashmap_ensure_put(&m->fds, NULL, FD_TO_PTR(fd), f);
be7cdd8e 530 if (r < 0)
8926a6a4
YW
531 return r;
532 assert(r > 0);
be7cdd8e 533
fd9ac6c3 534 f->cache = mmap_cache_ref(m);
fd9ac6c3 535
8926a6a4
YW
536 if (ret)
537 *ret = f;
538
539 TAKE_PTR(f);
540 return 1;
be7cdd8e
VC
541}
542
b38977e5
YW
543MMapFileDescriptor* mmap_cache_fd_free(MMapFileDescriptor *f) {
544 if (!f)
545 return NULL;
be7cdd8e 546
8fc4d1be
YW
547 /* Make sure that any queued SIGBUS are first dispatched, so that we don't end up with a SIGBUS entry
548 * we cannot relate to any existing memory map. */
fa6ac760 549
1da2c4ce 550 mmap_cache_process_sigbus(f->cache);
fa6ac760 551
be7cdd8e
VC
552 while (f->windows)
553 window_free(f->windows);
554
b38977e5 555 assert_se(hashmap_remove(f->cache->fds, FD_TO_PTR(f->fd)) == f);
fa6ac760 556
b38977e5
YW
557 /* Unref the cache at the end. Otherwise, the assertions in mmap_cache_free() may be triggered. */
558 f->cache = mmap_cache_unref(f->cache);
559
560 return mfree(f);
bf807d4d 561}
176bf8b8
VC
562
563MMapCache* mmap_cache_fd_cache(MMapFileDescriptor *f) {
564 assert(f);
b38977e5 565 return ASSERT_PTR(f->cache);
176bf8b8 566}