]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-journal/mmap-cache.c
sd-boot+bootctl: invert order of entries w/o sort-key
[thirdparty/systemd.git] / src / libsystemd / sd-journal / mmap-cache.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <stdlib.h>
5 #include <sys/mman.h>
6
7 #include "alloc-util.h"
8 #include "errno-util.h"
9 #include "fd-util.h"
10 #include "hashmap.h"
11 #include "list.h"
12 #include "log.h"
13 #include "macro.h"
14 #include "memory-util.h"
15 #include "mmap-cache.h"
16 #include "sigbus.h"
17
18 typedef struct Window Window;
19 typedef struct Context Context;
20
21 struct Window {
22 MMapCache *cache;
23
24 bool invalidated:1;
25 bool keep_always:1;
26 bool in_unused:1;
27
28 void *ptr;
29 uint64_t offset;
30 size_t size;
31
32 MMapFileDescriptor *fd;
33
34 LIST_FIELDS(Window, by_fd);
35 LIST_FIELDS(Window, unused);
36
37 LIST_HEAD(Context, contexts);
38 };
39
40 struct Context {
41 Window *window;
42
43 LIST_FIELDS(Context, by_window);
44 };
45
46 struct MMapFileDescriptor {
47 MMapCache *cache;
48 int fd;
49 int prot;
50 bool sigbus;
51 LIST_HEAD(Window, windows);
52 };
53
54 struct MMapCache {
55 unsigned n_ref;
56 unsigned n_windows;
57
58 unsigned n_context_cache_hit, n_window_list_hit, n_missed;
59
60 Hashmap *fds;
61
62 LIST_HEAD(Window, unused);
63 Window *last_unused;
64
65 Context contexts[MMAP_CACHE_MAX_CONTEXTS];
66 };
67
68 #define WINDOWS_MIN 64
69
70 #if ENABLE_DEBUG_MMAP_CACHE
71 /* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
72 # define WINDOW_SIZE (page_size())
73 #else
74 # define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
75 #endif
76
77 MMapCache* mmap_cache_new(void) {
78 MMapCache *m;
79
80 m = new0(MMapCache, 1);
81 if (!m)
82 return NULL;
83
84 m->n_ref = 1;
85 return m;
86 }
87
88 static void window_unlink(Window *w) {
89 Context *c;
90
91 assert(w);
92
93 if (w->ptr)
94 munmap(w->ptr, w->size);
95
96 if (w->fd)
97 LIST_REMOVE(by_fd, w->fd->windows, w);
98
99 if (w->in_unused) {
100 if (w->cache->last_unused == w)
101 w->cache->last_unused = w->unused_prev;
102
103 LIST_REMOVE(unused, w->cache->unused, w);
104 }
105
106 LIST_FOREACH(by_window, c, w->contexts) {
107 assert(c->window == w);
108 c->window = NULL;
109 }
110 }
111
112 static void window_invalidate(Window *w) {
113 assert(w);
114 assert(w->fd);
115
116 if (w->invalidated)
117 return;
118
119 /* Replace the window with anonymous pages. This is useful
120 * when we hit a SIGBUS and want to make sure the file cannot
121 * trigger any further SIGBUS, possibly overrunning the sigbus
122 * queue. */
123
124 assert_se(mmap(w->ptr, w->size, w->fd->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
125 w->invalidated = true;
126 }
127
128 static void window_free(Window *w) {
129 assert(w);
130
131 window_unlink(w);
132 w->cache->n_windows--;
133 free(w);
134 }
135
136 _pure_ static bool window_matches(Window *w, uint64_t offset, size_t size) {
137 assert(w);
138 assert(size > 0);
139
140 return
141 offset >= w->offset &&
142 offset + size <= w->offset + w->size;
143 }
144
145 _pure_ static bool window_matches_fd(Window *w, MMapFileDescriptor *f, uint64_t offset, size_t size) {
146 assert(w);
147 assert(f);
148
149 return
150 w->fd == f &&
151 window_matches(w, offset, size);
152 }
153
154 static Window *window_add(MMapCache *m, MMapFileDescriptor *f, bool keep_always, uint64_t offset, size_t size, void *ptr) {
155 Window *w;
156
157 assert(m);
158 assert(f);
159
160 if (!m->last_unused || m->n_windows <= WINDOWS_MIN) {
161
162 /* Allocate a new window */
163 w = new(Window, 1);
164 if (!w)
165 return NULL;
166 m->n_windows++;
167 } else {
168
169 /* Reuse an existing one */
170 w = m->last_unused;
171 window_unlink(w);
172 }
173
174 *w = (Window) {
175 .cache = m,
176 .fd = f,
177 .keep_always = keep_always,
178 .offset = offset,
179 .size = size,
180 .ptr = ptr,
181 };
182
183 LIST_PREPEND(by_fd, f->windows, w);
184
185 return w;
186 }
187
188 static void context_detach_window(MMapCache *m, Context *c) {
189 Window *w;
190
191 assert(m);
192 assert(c);
193
194 if (!c->window)
195 return;
196
197 w = TAKE_PTR(c->window);
198 LIST_REMOVE(by_window, w->contexts, c);
199
200 if (!w->contexts && !w->keep_always) {
201 /* Not used anymore? */
202 #if ENABLE_DEBUG_MMAP_CACHE
203 /* Unmap unused windows immediately to expose use-after-unmap
204 * by SIGSEGV. */
205 window_free(w);
206 #else
207 LIST_PREPEND(unused, m->unused, w);
208 if (!m->last_unused)
209 m->last_unused = w;
210
211 w->in_unused = true;
212 #endif
213 }
214 }
215
216 static void context_attach_window(MMapCache *m, Context *c, Window *w) {
217 assert(m);
218 assert(c);
219 assert(w);
220
221 if (c->window == w)
222 return;
223
224 context_detach_window(m, c);
225
226 if (w->in_unused) {
227 /* Used again? */
228 if (m->last_unused == w)
229 m->last_unused = w->unused_prev;
230 LIST_REMOVE(unused, m->unused, w);
231
232 w->in_unused = false;
233 }
234
235 c->window = w;
236 LIST_PREPEND(by_window, w->contexts, c);
237 }
238
239 static MMapCache *mmap_cache_free(MMapCache *m) {
240 assert(m);
241
242 for (int i = 0; i < MMAP_CACHE_MAX_CONTEXTS; i++)
243 context_detach_window(m, &m->contexts[i]);
244
245 hashmap_free(m->fds);
246
247 while (m->unused)
248 window_free(m->unused);
249
250 return mfree(m);
251 }
252
253 DEFINE_TRIVIAL_REF_UNREF_FUNC(MMapCache, mmap_cache, mmap_cache_free);
254
255 static int make_room(MMapCache *m) {
256 assert(m);
257
258 if (!m->last_unused)
259 return 0;
260
261 window_free(m->last_unused);
262 return 1;
263 }
264
265 static int try_context(
266 MMapFileDescriptor *f,
267 Context *c,
268 bool keep_always,
269 uint64_t offset,
270 size_t size,
271 void **ret) {
272
273 assert(f);
274 assert(f->cache);
275 assert(f->cache->n_ref > 0);
276 assert(c);
277 assert(size > 0);
278 assert(ret);
279
280 if (!c->window)
281 return 0;
282
283 if (!window_matches_fd(c->window, f, offset, size)) {
284
285 /* Drop the reference to the window, since it's unnecessary now */
286 context_detach_window(f->cache, c);
287 return 0;
288 }
289
290 if (c->window->fd->sigbus)
291 return -EIO;
292
293 c->window->keep_always = c->window->keep_always || keep_always;
294
295 *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset);
296 f->cache->n_context_cache_hit++;
297
298 return 1;
299 }
300
301 static int find_mmap(
302 MMapFileDescriptor *f,
303 Context *c,
304 bool keep_always,
305 uint64_t offset,
306 size_t size,
307 void **ret) {
308
309 Window *w;
310
311 assert(f);
312 assert(f->cache);
313 assert(f->cache->n_ref > 0);
314 assert(c);
315 assert(size > 0);
316
317 if (f->sigbus)
318 return -EIO;
319
320 LIST_FOREACH(by_fd, w, f->windows)
321 if (window_matches(w, offset, size))
322 break;
323
324 if (!w)
325 return 0;
326
327 context_attach_window(f->cache, c, w);
328 w->keep_always = w->keep_always || keep_always;
329
330 *ret = (uint8_t*) w->ptr + (offset - w->offset);
331 f->cache->n_window_list_hit++;
332
333 return 1;
334 }
335
336 static int mmap_try_harder(MMapFileDescriptor *f, void *addr, int flags, uint64_t offset, size_t size, void **res) {
337 void *ptr;
338
339 assert(f);
340 assert(res);
341
342 for (;;) {
343 int r;
344
345 ptr = mmap(addr, size, f->prot, flags, f->fd, offset);
346 if (ptr != MAP_FAILED)
347 break;
348 if (errno != ENOMEM)
349 return negative_errno();
350
351 r = make_room(f->cache);
352 if (r < 0)
353 return r;
354 if (r == 0)
355 return -ENOMEM;
356 }
357
358 *res = ptr;
359 return 0;
360 }
361
362 static int add_mmap(
363 MMapFileDescriptor *f,
364 Context *c,
365 bool keep_always,
366 uint64_t offset,
367 size_t size,
368 struct stat *st,
369 void **ret) {
370
371 uint64_t woffset, wsize;
372 Window *w;
373 void *d;
374 int r;
375
376 assert(f);
377 assert(f->cache);
378 assert(f->cache->n_ref > 0);
379 assert(c);
380 assert(size > 0);
381 assert(ret);
382
383 woffset = offset & ~((uint64_t) page_size() - 1ULL);
384 wsize = size + (offset - woffset);
385 wsize = PAGE_ALIGN(wsize);
386
387 if (wsize < WINDOW_SIZE) {
388 uint64_t delta;
389
390 delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
391
392 if (delta > offset)
393 woffset = 0;
394 else
395 woffset -= delta;
396
397 wsize = WINDOW_SIZE;
398 }
399
400 if (st) {
401 /* Memory maps that are larger then the files
402 underneath have undefined behavior. Hence, clamp
403 things to the file size if we know it */
404
405 if (woffset >= (uint64_t) st->st_size)
406 return -EADDRNOTAVAIL;
407
408 if (woffset + wsize > (uint64_t) st->st_size)
409 wsize = PAGE_ALIGN(st->st_size - woffset);
410 }
411
412 r = mmap_try_harder(f, NULL, MAP_SHARED, woffset, wsize, &d);
413 if (r < 0)
414 return r;
415
416 w = window_add(f->cache, f, keep_always, woffset, wsize, d);
417 if (!w)
418 goto outofmem;
419
420 context_attach_window(f->cache, c, w);
421
422 *ret = (uint8_t*) w->ptr + (offset - w->offset);
423
424 return 1;
425
426 outofmem:
427 (void) munmap(d, wsize);
428 return -ENOMEM;
429 }
430
431 int mmap_cache_fd_get(
432 MMapFileDescriptor *f,
433 unsigned context,
434 bool keep_always,
435 uint64_t offset,
436 size_t size,
437 struct stat *st,
438 void **ret) {
439
440 Context *c;
441 int r;
442
443 assert(f);
444 assert(f->cache);
445 assert(f->cache->n_ref > 0);
446 assert(size > 0);
447 assert(ret);
448 assert(context < MMAP_CACHE_MAX_CONTEXTS);
449
450 c = &f->cache->contexts[context];
451
452 /* Check whether the current context is the right one already */
453 r = try_context(f, c, keep_always, offset, size, ret);
454 if (r != 0)
455 return r;
456
457 /* Search for a matching mmap */
458 r = find_mmap(f, c, keep_always, offset, size, ret);
459 if (r != 0)
460 return r;
461
462 f->cache->n_missed++;
463
464 /* Create a new mmap */
465 return add_mmap(f, c, keep_always, offset, size, st, ret);
466 }
467
468 void mmap_cache_stats_log_debug(MMapCache *m) {
469 assert(m);
470
471 log_debug("mmap cache statistics: %u context cache hit, %u window list hit, %u miss", m->n_context_cache_hit, m->n_window_list_hit, m->n_missed);
472 }
473
474 static void mmap_cache_process_sigbus(MMapCache *m) {
475 bool found = false;
476 MMapFileDescriptor *f;
477 int r;
478
479 assert(m);
480
481 /* Iterate through all triggered pages and mark their files as
482 * invalidated */
483 for (;;) {
484 bool ours;
485 void *addr;
486
487 r = sigbus_pop(&addr);
488 if (_likely_(r == 0))
489 break;
490 if (r < 0) {
491 log_error_errno(r, "SIGBUS handling failed: %m");
492 abort();
493 }
494
495 ours = false;
496 HASHMAP_FOREACH(f, m->fds) {
497 Window *w;
498
499 LIST_FOREACH(by_fd, w, f->windows) {
500 if ((uint8_t*) addr >= (uint8_t*) w->ptr &&
501 (uint8_t*) addr < (uint8_t*) w->ptr + w->size) {
502 found = ours = f->sigbus = true;
503 break;
504 }
505 }
506
507 if (ours)
508 break;
509 }
510
511 /* Didn't find a matching window, give up */
512 if (!ours) {
513 log_error("Unknown SIGBUS page, aborting.");
514 abort();
515 }
516 }
517
518 /* The list of triggered pages is now empty. Now, let's remap
519 * all windows of the triggered file to anonymous maps, so
520 * that no page of the file in question is triggered again, so
521 * that we can be sure not to hit the queue size limit. */
522 if (_likely_(!found))
523 return;
524
525 HASHMAP_FOREACH(f, m->fds) {
526 Window *w;
527
528 if (!f->sigbus)
529 continue;
530
531 LIST_FOREACH(by_fd, w, f->windows)
532 window_invalidate(w);
533 }
534 }
535
536 bool mmap_cache_fd_got_sigbus(MMapFileDescriptor *f) {
537 assert(f);
538
539 mmap_cache_process_sigbus(f->cache);
540
541 return f->sigbus;
542 }
543
544 MMapFileDescriptor* mmap_cache_add_fd(MMapCache *m, int fd, int prot) {
545 MMapFileDescriptor *f;
546 int r;
547
548 assert(m);
549 assert(fd >= 0);
550
551 f = hashmap_get(m->fds, FD_TO_PTR(fd));
552 if (f)
553 return f;
554
555 r = hashmap_ensure_allocated(&m->fds, NULL);
556 if (r < 0)
557 return NULL;
558
559 f = new0(MMapFileDescriptor, 1);
560 if (!f)
561 return NULL;
562
563 r = hashmap_put(m->fds, FD_TO_PTR(fd), f);
564 if (r < 0)
565 return mfree(f);
566
567 f->cache = mmap_cache_ref(m);
568 f->fd = fd;
569 f->prot = prot;
570
571 return f;
572 }
573
574 void mmap_cache_fd_free(MMapFileDescriptor *f) {
575 assert(f);
576 assert(f->cache);
577
578 /* Make sure that any queued SIGBUS are first dispatched, so
579 * that we don't end up with a SIGBUS entry we cannot relate
580 * to any existing memory map */
581
582 mmap_cache_process_sigbus(f->cache);
583
584 while (f->windows)
585 window_free(f->windows);
586
587 if (f->cache) {
588 assert_se(hashmap_remove(f->cache->fds, FD_TO_PTR(f->fd)));
589 f->cache = mmap_cache_unref(f->cache);
590 }
591
592 free(f);
593 }
594
595 MMapCache* mmap_cache_fd_cache(MMapFileDescriptor *f) {
596 assert(f);
597
598 return f->cache;
599 }