]>
git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/mmap-cache.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
7 #include "alloc-util.h"
8 #include "errno-util.h"
14 #include "memory-util.h"
15 #include "mmap-cache.h"
18 typedef struct Window Window
;
19 typedef struct Context Context
;
33 MMapFileDescriptor
*fd
;
35 LIST_FIELDS(Window
, by_fd
);
36 LIST_FIELDS(Window
, unused
);
38 LIST_HEAD(Context
, contexts
);
46 LIST_FIELDS(Context
, by_window
);
49 struct MMapFileDescriptor
{
53 LIST_HEAD(Window
, windows
);
60 unsigned n_hit
, n_missed
;
63 Context
*contexts
[MMAP_CACHE_MAX_CONTEXTS
];
65 LIST_HEAD(Window
, unused
);
69 #define WINDOWS_MIN 64
71 #if ENABLE_DEBUG_MMAP_CACHE
72 /* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
73 # define WINDOW_SIZE (page_size())
75 # define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
78 MMapCache
* mmap_cache_new(void) {
81 m
= new0(MMapCache
, 1);
89 static void window_unlink(Window
*w
) {
95 munmap(w
->ptr
, w
->size
);
98 LIST_REMOVE(by_fd
, w
->fd
->windows
, w
);
101 if (w
->cache
->last_unused
== w
)
102 w
->cache
->last_unused
= w
->unused_prev
;
104 LIST_REMOVE(unused
, w
->cache
->unused
, w
);
107 LIST_FOREACH(by_window
, c
, w
->contexts
) {
108 assert(c
->window
== w
);
113 static void window_invalidate(Window
*w
) {
119 /* Replace the window with anonymous pages. This is useful
120 * when we hit a SIGBUS and want to make sure the file cannot
121 * trigger any further SIGBUS, possibly overrunning the sigbus
124 assert_se(mmap(w
->ptr
, w
->size
, w
->prot
, MAP_PRIVATE
|MAP_ANONYMOUS
|MAP_FIXED
, -1, 0) == w
->ptr
);
125 w
->invalidated
= true;
128 static void window_free(Window
*w
) {
132 w
->cache
->n_windows
--;
136 _pure_
static bool window_matches(Window
*w
, int prot
, uint64_t offset
, size_t size
) {
142 offset
>= w
->offset
&&
143 offset
+ size
<= w
->offset
+ w
->size
;
146 _pure_
static bool window_matches_fd(Window
*w
, MMapFileDescriptor
*f
, int prot
, uint64_t offset
, size_t size
) {
152 f
->fd
== w
->fd
->fd
&&
153 window_matches(w
, prot
, offset
, size
);
156 static Window
*window_add(MMapCache
*m
, MMapFileDescriptor
*f
, int prot
, bool keep_always
, uint64_t offset
, size_t size
, void *ptr
) {
162 if (!m
->last_unused
|| m
->n_windows
<= WINDOWS_MIN
) {
164 /* Allocate a new window */
171 /* Reuse an existing one */
180 w
->keep_always
= keep_always
;
185 LIST_PREPEND(by_fd
, f
->windows
, w
);
190 static void context_detach_window(Context
*c
) {
198 w
= TAKE_PTR(c
->window
);
199 LIST_REMOVE(by_window
, w
->contexts
, c
);
201 if (!w
->contexts
&& !w
->keep_always
) {
202 /* Not used anymore? */
203 #if ENABLE_DEBUG_MMAP_CACHE
204 /* Unmap unused windows immediately to expose use-after-unmap
208 LIST_PREPEND(unused
, c
->cache
->unused
, w
);
209 if (!c
->cache
->last_unused
)
210 c
->cache
->last_unused
= w
;
217 static void context_attach_window(Context
*c
, Window
*w
) {
224 context_detach_window(c
);
228 LIST_REMOVE(unused
, c
->cache
->unused
, w
);
229 if (c
->cache
->last_unused
== w
)
230 c
->cache
->last_unused
= w
->unused_prev
;
232 w
->in_unused
= false;
236 LIST_PREPEND(by_window
, w
->contexts
, c
);
239 static Context
*context_add(MMapCache
*m
, unsigned id
) {
248 c
= new0(Context
, 1);
255 assert(!m
->contexts
[id
]);
261 static void context_free(Context
*c
) {
264 context_detach_window(c
);
267 assert(c
->cache
->contexts
[c
->id
] == c
);
268 c
->cache
->contexts
[c
->id
] = NULL
;
274 static MMapCache
*mmap_cache_free(MMapCache
*m
) {
279 for (i
= 0; i
< MMAP_CACHE_MAX_CONTEXTS
; i
++)
281 context_free(m
->contexts
[i
]);
283 hashmap_free(m
->fds
);
286 window_free(m
->unused
);
291 DEFINE_TRIVIAL_REF_UNREF_FUNC(MMapCache
, mmap_cache
, mmap_cache_free
);
293 static int make_room(MMapCache
*m
) {
299 window_free(m
->last_unused
);
303 static int try_context(
305 MMapFileDescriptor
*f
,
317 assert(m
->n_ref
> 0);
322 c
= m
->contexts
[context
];
326 assert(c
->id
== context
);
331 if (!window_matches_fd(c
->window
, f
, prot
, offset
, size
)) {
333 /* Drop the reference to the window, since it's unnecessary now */
334 context_detach_window(c
);
338 if (c
->window
->fd
->sigbus
)
341 c
->window
->keep_always
= c
->window
->keep_always
|| keep_always
;
343 *ret
= (uint8_t*) c
->window
->ptr
+ (offset
- c
->window
->offset
);
345 *ret_size
= c
->window
->size
- (offset
- c
->window
->offset
);
350 static int find_mmap(
352 MMapFileDescriptor
*f
,
365 assert(m
->n_ref
> 0);
372 LIST_FOREACH(by_fd
, w
, f
->windows
)
373 if (window_matches(w
, prot
, offset
, size
))
379 c
= context_add(m
, context
);
383 context_attach_window(c
, w
);
384 w
->keep_always
= w
->keep_always
|| keep_always
;
386 *ret
= (uint8_t*) w
->ptr
+ (offset
- w
->offset
);
388 *ret_size
= w
->size
- (offset
- w
->offset
);
393 static int mmap_try_harder(MMapCache
*m
, void *addr
, MMapFileDescriptor
*f
, int prot
, int flags
, uint64_t offset
, size_t size
, void **res
) {
403 ptr
= mmap(addr
, size
, prot
, flags
, f
->fd
, offset
);
404 if (ptr
!= MAP_FAILED
)
407 return negative_errno();
422 MMapFileDescriptor
*f
,
432 uint64_t woffset
, wsize
;
439 assert(m
->n_ref
> 0);
444 woffset
= offset
& ~((uint64_t) page_size() - 1ULL);
445 wsize
= size
+ (offset
- woffset
);
446 wsize
= PAGE_ALIGN(wsize
);
448 if (wsize
< WINDOW_SIZE
) {
451 delta
= PAGE_ALIGN((WINDOW_SIZE
- wsize
) / 2);
462 /* Memory maps that are larger then the files
463 underneath have undefined behavior. Hence, clamp
464 things to the file size if we know it */
466 if (woffset
>= (uint64_t) st
->st_size
)
467 return -EADDRNOTAVAIL
;
469 if (woffset
+ wsize
> (uint64_t) st
->st_size
)
470 wsize
= PAGE_ALIGN(st
->st_size
- woffset
);
473 r
= mmap_try_harder(m
, NULL
, f
, prot
, MAP_SHARED
, woffset
, wsize
, &d
);
477 c
= context_add(m
, context
);
481 w
= window_add(m
, f
, prot
, keep_always
, woffset
, wsize
, d
);
485 context_attach_window(c
, w
);
487 *ret
= (uint8_t*) w
->ptr
+ (offset
- w
->offset
);
489 *ret_size
= w
->size
- (offset
- w
->offset
);
494 (void) munmap(d
, wsize
);
500 MMapFileDescriptor
*f
,
513 assert(m
->n_ref
> 0);
517 assert(context
< MMAP_CACHE_MAX_CONTEXTS
);
519 /* Check whether the current context is the right one already */
520 r
= try_context(m
, f
, prot
, context
, keep_always
, offset
, size
, ret
, ret_size
);
526 /* Search for a matching mmap */
527 r
= find_mmap(m
, f
, prot
, context
, keep_always
, offset
, size
, ret
, ret_size
);
535 /* Create a new mmap */
536 return add_mmap(m
, f
, prot
, context
, keep_always
, offset
, size
, st
, ret
, ret_size
);
539 unsigned mmap_cache_get_hit(MMapCache
*m
) {
545 unsigned mmap_cache_get_missed(MMapCache
*m
) {
551 static void mmap_cache_process_sigbus(MMapCache
*m
) {
553 MMapFileDescriptor
*f
;
559 /* Iterate through all triggered pages and mark their files as
565 r
= sigbus_pop(&addr
);
566 if (_likely_(r
== 0))
569 log_error_errno(r
, "SIGBUS handling failed: %m");
574 HASHMAP_FOREACH(f
, m
->fds
, i
) {
577 LIST_FOREACH(by_fd
, w
, f
->windows
) {
578 if ((uint8_t*) addr
>= (uint8_t*) w
->ptr
&&
579 (uint8_t*) addr
< (uint8_t*) w
->ptr
+ w
->size
) {
580 found
= ours
= f
->sigbus
= true;
589 /* Didn't find a matching window, give up */
591 log_error("Unknown SIGBUS page, aborting.");
596 /* The list of triggered pages is now empty. Now, let's remap
597 * all windows of the triggered file to anonymous maps, so
598 * that no page of the file in question is triggered again, so
599 * that we can be sure not to hit the queue size limit. */
600 if (_likely_(!found
))
603 HASHMAP_FOREACH(f
, m
->fds
, i
) {
609 LIST_FOREACH(by_fd
, w
, f
->windows
)
610 window_invalidate(w
);
614 bool mmap_cache_got_sigbus(MMapCache
*m
, MMapFileDescriptor
*f
) {
618 mmap_cache_process_sigbus(m
);
623 MMapFileDescriptor
* mmap_cache_add_fd(MMapCache
*m
, int fd
) {
624 MMapFileDescriptor
*f
;
630 f
= hashmap_get(m
->fds
, FD_TO_PTR(fd
));
634 r
= hashmap_ensure_allocated(&m
->fds
, NULL
);
638 f
= new0(MMapFileDescriptor
, 1);
645 r
= hashmap_put(m
->fds
, FD_TO_PTR(fd
), f
);
652 void mmap_cache_free_fd(MMapCache
*m
, MMapFileDescriptor
*f
) {
656 /* Make sure that any queued SIGBUS are first dispatched, so
657 * that we don't end up with a SIGBUS entry we cannot relate
658 * to any existing memory map */
660 mmap_cache_process_sigbus(m
);
663 window_free(f
->windows
);
666 assert_se(hashmap_remove(f
->cache
->fds
, FD_TO_PTR(f
->fd
)));