]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-journal/mmap-cache.c
Merge pull request #22791 from keszybz/bootctl-invert-order
[thirdparty/systemd.git] / src / libsystemd / sd-journal / mmap-cache.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <stdlib.h>
5 #include <sys/mman.h>
6
7 #include "alloc-util.h"
8 #include "errno-util.h"
9 #include "fd-util.h"
10 #include "hashmap.h"
11 #include "list.h"
12 #include "log.h"
13 #include "macro.h"
14 #include "memory-util.h"
15 #include "mmap-cache.h"
16 #include "sigbus.h"
17
18 typedef struct Window Window;
19 typedef struct Context Context;
20
21 struct Window {
22 MMapCache *cache;
23
24 bool invalidated:1;
25 bool keep_always:1;
26 bool in_unused:1;
27
28 void *ptr;
29 uint64_t offset;
30 size_t size;
31
32 MMapFileDescriptor *fd;
33
34 LIST_FIELDS(Window, by_fd);
35 LIST_FIELDS(Window, unused);
36
37 LIST_HEAD(Context, contexts);
38 };
39
40 struct Context {
41 Window *window;
42
43 LIST_FIELDS(Context, by_window);
44 };
45
46 struct MMapFileDescriptor {
47 MMapCache *cache;
48 int fd;
49 int prot;
50 bool sigbus;
51 LIST_HEAD(Window, windows);
52 };
53
54 struct MMapCache {
55 unsigned n_ref;
56 unsigned n_windows;
57
58 unsigned n_context_cache_hit, n_window_list_hit, n_missed;
59
60 Hashmap *fds;
61
62 LIST_HEAD(Window, unused);
63 Window *last_unused;
64
65 Context contexts[MMAP_CACHE_MAX_CONTEXTS];
66 };
67
68 #define WINDOWS_MIN 64
69
70 #if ENABLE_DEBUG_MMAP_CACHE
71 /* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
72 # define WINDOW_SIZE (page_size())
73 #else
74 # define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
75 #endif
76
77 MMapCache* mmap_cache_new(void) {
78 MMapCache *m;
79
80 m = new0(MMapCache, 1);
81 if (!m)
82 return NULL;
83
84 m->n_ref = 1;
85 return m;
86 }
87
88 static void window_unlink(Window *w) {
89
90 assert(w);
91
92 if (w->ptr)
93 munmap(w->ptr, w->size);
94
95 if (w->fd)
96 LIST_REMOVE(by_fd, w->fd->windows, w);
97
98 if (w->in_unused) {
99 if (w->cache->last_unused == w)
100 w->cache->last_unused = w->unused_prev;
101
102 LIST_REMOVE(unused, w->cache->unused, w);
103 }
104
105 LIST_FOREACH(by_window, c, w->contexts) {
106 assert(c->window == w);
107 c->window = NULL;
108 }
109 }
110
111 static void window_invalidate(Window *w) {
112 assert(w);
113 assert(w->fd);
114
115 if (w->invalidated)
116 return;
117
118 /* Replace the window with anonymous pages. This is useful
119 * when we hit a SIGBUS and want to make sure the file cannot
120 * trigger any further SIGBUS, possibly overrunning the sigbus
121 * queue. */
122
123 assert_se(mmap(w->ptr, w->size, w->fd->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
124 w->invalidated = true;
125 }
126
127 static void window_free(Window *w) {
128 assert(w);
129
130 window_unlink(w);
131 w->cache->n_windows--;
132 free(w);
133 }
134
135 _pure_ static bool window_matches(Window *w, uint64_t offset, size_t size) {
136 assert(w);
137 assert(size > 0);
138
139 return
140 offset >= w->offset &&
141 offset + size <= w->offset + w->size;
142 }
143
144 _pure_ static bool window_matches_fd(Window *w, MMapFileDescriptor *f, uint64_t offset, size_t size) {
145 assert(w);
146 assert(f);
147
148 return
149 w->fd == f &&
150 window_matches(w, offset, size);
151 }
152
153 static Window *window_add(MMapCache *m, MMapFileDescriptor *f, bool keep_always, uint64_t offset, size_t size, void *ptr) {
154 Window *w;
155
156 assert(m);
157 assert(f);
158
159 if (!m->last_unused || m->n_windows <= WINDOWS_MIN) {
160
161 /* Allocate a new window */
162 w = new(Window, 1);
163 if (!w)
164 return NULL;
165 m->n_windows++;
166 } else {
167
168 /* Reuse an existing one */
169 w = m->last_unused;
170 window_unlink(w);
171 }
172
173 *w = (Window) {
174 .cache = m,
175 .fd = f,
176 .keep_always = keep_always,
177 .offset = offset,
178 .size = size,
179 .ptr = ptr,
180 };
181
182 LIST_PREPEND(by_fd, f->windows, w);
183
184 return w;
185 }
186
187 static void context_detach_window(MMapCache *m, Context *c) {
188 Window *w;
189
190 assert(m);
191 assert(c);
192
193 if (!c->window)
194 return;
195
196 w = TAKE_PTR(c->window);
197 LIST_REMOVE(by_window, w->contexts, c);
198
199 if (!w->contexts && !w->keep_always) {
200 /* Not used anymore? */
201 #if ENABLE_DEBUG_MMAP_CACHE
202 /* Unmap unused windows immediately to expose use-after-unmap
203 * by SIGSEGV. */
204 window_free(w);
205 #else
206 LIST_PREPEND(unused, m->unused, w);
207 if (!m->last_unused)
208 m->last_unused = w;
209
210 w->in_unused = true;
211 #endif
212 }
213 }
214
215 static void context_attach_window(MMapCache *m, Context *c, Window *w) {
216 assert(m);
217 assert(c);
218 assert(w);
219
220 if (c->window == w)
221 return;
222
223 context_detach_window(m, c);
224
225 if (w->in_unused) {
226 /* Used again? */
227 if (m->last_unused == w)
228 m->last_unused = w->unused_prev;
229 LIST_REMOVE(unused, m->unused, w);
230
231 w->in_unused = false;
232 }
233
234 c->window = w;
235 LIST_PREPEND(by_window, w->contexts, c);
236 }
237
238 static MMapCache *mmap_cache_free(MMapCache *m) {
239 assert(m);
240
241 for (int i = 0; i < MMAP_CACHE_MAX_CONTEXTS; i++)
242 context_detach_window(m, &m->contexts[i]);
243
244 hashmap_free(m->fds);
245
246 while (m->unused)
247 window_free(m->unused);
248
249 return mfree(m);
250 }
251
252 DEFINE_TRIVIAL_REF_UNREF_FUNC(MMapCache, mmap_cache, mmap_cache_free);
253
254 static int make_room(MMapCache *m) {
255 assert(m);
256
257 if (!m->last_unused)
258 return 0;
259
260 window_free(m->last_unused);
261 return 1;
262 }
263
264 static int try_context(
265 MMapFileDescriptor *f,
266 Context *c,
267 bool keep_always,
268 uint64_t offset,
269 size_t size,
270 void **ret) {
271
272 assert(f);
273 assert(f->cache);
274 assert(f->cache->n_ref > 0);
275 assert(c);
276 assert(size > 0);
277 assert(ret);
278
279 if (!c->window)
280 return 0;
281
282 if (!window_matches_fd(c->window, f, offset, size)) {
283
284 /* Drop the reference to the window, since it's unnecessary now */
285 context_detach_window(f->cache, c);
286 return 0;
287 }
288
289 if (c->window->fd->sigbus)
290 return -EIO;
291
292 c->window->keep_always = c->window->keep_always || keep_always;
293
294 *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset);
295 f->cache->n_context_cache_hit++;
296
297 return 1;
298 }
299
300 static int find_mmap(
301 MMapFileDescriptor *f,
302 Context *c,
303 bool keep_always,
304 uint64_t offset,
305 size_t size,
306 void **ret) {
307
308 Window *found = NULL;
309
310 assert(f);
311 assert(f->cache);
312 assert(f->cache->n_ref > 0);
313 assert(c);
314 assert(size > 0);
315
316 if (f->sigbus)
317 return -EIO;
318
319 LIST_FOREACH(by_fd, w, f->windows)
320 if (window_matches(w, offset, size)) {
321 found = w;
322 break;
323 }
324
325 if (!found)
326 return 0;
327
328 context_attach_window(f->cache, c, found);
329 found->keep_always = found->keep_always || keep_always;
330
331 *ret = (uint8_t*) found->ptr + (offset - found->offset);
332 f->cache->n_window_list_hit++;
333
334 return 1;
335 }
336
337 static int mmap_try_harder(MMapFileDescriptor *f, void *addr, int flags, uint64_t offset, size_t size, void **res) {
338 void *ptr;
339
340 assert(f);
341 assert(res);
342
343 for (;;) {
344 int r;
345
346 ptr = mmap(addr, size, f->prot, flags, f->fd, offset);
347 if (ptr != MAP_FAILED)
348 break;
349 if (errno != ENOMEM)
350 return negative_errno();
351
352 r = make_room(f->cache);
353 if (r < 0)
354 return r;
355 if (r == 0)
356 return -ENOMEM;
357 }
358
359 *res = ptr;
360 return 0;
361 }
362
363 static int add_mmap(
364 MMapFileDescriptor *f,
365 Context *c,
366 bool keep_always,
367 uint64_t offset,
368 size_t size,
369 struct stat *st,
370 void **ret) {
371
372 uint64_t woffset, wsize;
373 Window *w;
374 void *d;
375 int r;
376
377 assert(f);
378 assert(f->cache);
379 assert(f->cache->n_ref > 0);
380 assert(c);
381 assert(size > 0);
382 assert(ret);
383
384 woffset = offset & ~((uint64_t) page_size() - 1ULL);
385 wsize = size + (offset - woffset);
386 wsize = PAGE_ALIGN(wsize);
387
388 if (wsize < WINDOW_SIZE) {
389 uint64_t delta;
390
391 delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
392
393 if (delta > offset)
394 woffset = 0;
395 else
396 woffset -= delta;
397
398 wsize = WINDOW_SIZE;
399 }
400
401 if (st) {
402 /* Memory maps that are larger then the files
403 underneath have undefined behavior. Hence, clamp
404 things to the file size if we know it */
405
406 if (woffset >= (uint64_t) st->st_size)
407 return -EADDRNOTAVAIL;
408
409 if (woffset + wsize > (uint64_t) st->st_size)
410 wsize = PAGE_ALIGN(st->st_size - woffset);
411 }
412
413 r = mmap_try_harder(f, NULL, MAP_SHARED, woffset, wsize, &d);
414 if (r < 0)
415 return r;
416
417 w = window_add(f->cache, f, keep_always, woffset, wsize, d);
418 if (!w)
419 goto outofmem;
420
421 context_attach_window(f->cache, c, w);
422
423 *ret = (uint8_t*) w->ptr + (offset - w->offset);
424
425 return 1;
426
427 outofmem:
428 (void) munmap(d, wsize);
429 return -ENOMEM;
430 }
431
432 int mmap_cache_fd_get(
433 MMapFileDescriptor *f,
434 unsigned context,
435 bool keep_always,
436 uint64_t offset,
437 size_t size,
438 struct stat *st,
439 void **ret) {
440
441 Context *c;
442 int r;
443
444 assert(f);
445 assert(f->cache);
446 assert(f->cache->n_ref > 0);
447 assert(size > 0);
448 assert(ret);
449 assert(context < MMAP_CACHE_MAX_CONTEXTS);
450
451 c = &f->cache->contexts[context];
452
453 /* Check whether the current context is the right one already */
454 r = try_context(f, c, keep_always, offset, size, ret);
455 if (r != 0)
456 return r;
457
458 /* Search for a matching mmap */
459 r = find_mmap(f, c, keep_always, offset, size, ret);
460 if (r != 0)
461 return r;
462
463 f->cache->n_missed++;
464
465 /* Create a new mmap */
466 return add_mmap(f, c, keep_always, offset, size, st, ret);
467 }
468
469 void mmap_cache_stats_log_debug(MMapCache *m) {
470 assert(m);
471
472 log_debug("mmap cache statistics: %u context cache hit, %u window list hit, %u miss", m->n_context_cache_hit, m->n_window_list_hit, m->n_missed);
473 }
474
475 static void mmap_cache_process_sigbus(MMapCache *m) {
476 bool found = false;
477 MMapFileDescriptor *f;
478 int r;
479
480 assert(m);
481
482 /* Iterate through all triggered pages and mark their files as
483 * invalidated */
484 for (;;) {
485 bool ours;
486 void *addr;
487
488 r = sigbus_pop(&addr);
489 if (_likely_(r == 0))
490 break;
491 if (r < 0) {
492 log_error_errno(r, "SIGBUS handling failed: %m");
493 abort();
494 }
495
496 ours = false;
497 HASHMAP_FOREACH(f, m->fds) {
498 LIST_FOREACH(by_fd, w, f->windows) {
499 if ((uint8_t*) addr >= (uint8_t*) w->ptr &&
500 (uint8_t*) addr < (uint8_t*) w->ptr + w->size) {
501 found = ours = f->sigbus = true;
502 break;
503 }
504 }
505
506 if (ours)
507 break;
508 }
509
510 /* Didn't find a matching window, give up */
511 if (!ours) {
512 log_error("Unknown SIGBUS page, aborting.");
513 abort();
514 }
515 }
516
517 /* The list of triggered pages is now empty. Now, let's remap
518 * all windows of the triggered file to anonymous maps, so
519 * that no page of the file in question is triggered again, so
520 * that we can be sure not to hit the queue size limit. */
521 if (_likely_(!found))
522 return;
523
524 HASHMAP_FOREACH(f, m->fds) {
525 if (!f->sigbus)
526 continue;
527
528 LIST_FOREACH(by_fd, w, f->windows)
529 window_invalidate(w);
530 }
531 }
532
533 bool mmap_cache_fd_got_sigbus(MMapFileDescriptor *f) {
534 assert(f);
535
536 mmap_cache_process_sigbus(f->cache);
537
538 return f->sigbus;
539 }
540
541 MMapFileDescriptor* mmap_cache_add_fd(MMapCache *m, int fd, int prot) {
542 MMapFileDescriptor *f;
543 int r;
544
545 assert(m);
546 assert(fd >= 0);
547
548 f = hashmap_get(m->fds, FD_TO_PTR(fd));
549 if (f)
550 return f;
551
552 r = hashmap_ensure_allocated(&m->fds, NULL);
553 if (r < 0)
554 return NULL;
555
556 f = new0(MMapFileDescriptor, 1);
557 if (!f)
558 return NULL;
559
560 r = hashmap_put(m->fds, FD_TO_PTR(fd), f);
561 if (r < 0)
562 return mfree(f);
563
564 f->cache = mmap_cache_ref(m);
565 f->fd = fd;
566 f->prot = prot;
567
568 return f;
569 }
570
571 void mmap_cache_fd_free(MMapFileDescriptor *f) {
572 assert(f);
573 assert(f->cache);
574
575 /* Make sure that any queued SIGBUS are first dispatched, so
576 * that we don't end up with a SIGBUS entry we cannot relate
577 * to any existing memory map */
578
579 mmap_cache_process_sigbus(f->cache);
580
581 while (f->windows)
582 window_free(f->windows);
583
584 if (f->cache) {
585 assert_se(hashmap_remove(f->cache->fds, FD_TO_PTR(f->fd)));
586 f->cache = mmap_cache_unref(f->cache);
587 }
588
589 free(f);
590 }
591
592 MMapCache* mmap_cache_fd_cache(MMapFileDescriptor *f) {
593 assert(f);
594
595 return f->cache;
596 }