]>
git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-journal/mmap-cache.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
7 #include "alloc-util.h"
8 #include "errno-util.h"
14 #include "memory-util.h"
15 #include "mmap-cache.h"
18 typedef struct Window Window
;
19 typedef struct Context Context
;
32 MMapFileDescriptor
*fd
;
34 LIST_FIELDS(Window
, by_fd
);
35 LIST_FIELDS(Window
, unused
);
37 LIST_HEAD(Context
, contexts
);
43 LIST_FIELDS(Context
, by_window
);
46 struct MMapFileDescriptor
{
51 LIST_HEAD(Window
, windows
);
58 unsigned n_context_cache_hit
, n_window_list_hit
, n_missed
;
62 LIST_HEAD(Window
, unused
);
65 Context contexts
[MMAP_CACHE_MAX_CONTEXTS
];
68 #define WINDOWS_MIN 64
70 #if ENABLE_DEBUG_MMAP_CACHE
71 /* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
72 # define WINDOW_SIZE (page_size())
74 # define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
77 MMapCache
* mmap_cache_new(void) {
80 m
= new0(MMapCache
, 1);
88 static void window_unlink(Window
*w
) {
93 munmap(w
->ptr
, w
->size
);
96 LIST_REMOVE(by_fd
, w
->fd
->windows
, w
);
99 if (w
->cache
->last_unused
== w
)
100 w
->cache
->last_unused
= w
->unused_prev
;
102 LIST_REMOVE(unused
, w
->cache
->unused
, w
);
105 LIST_FOREACH(by_window
, c
, w
->contexts
) {
106 assert(c
->window
== w
);
111 static void window_invalidate(Window
*w
) {
118 /* Replace the window with anonymous pages. This is useful
119 * when we hit a SIGBUS and want to make sure the file cannot
120 * trigger any further SIGBUS, possibly overrunning the sigbus
123 assert_se(mmap(w
->ptr
, w
->size
, w
->fd
->prot
, MAP_PRIVATE
|MAP_ANONYMOUS
|MAP_FIXED
, -1, 0) == w
->ptr
);
124 w
->invalidated
= true;
127 static void window_free(Window
*w
) {
131 w
->cache
->n_windows
--;
135 _pure_
static bool window_matches(Window
*w
, uint64_t offset
, size_t size
) {
140 offset
>= w
->offset
&&
141 offset
+ size
<= w
->offset
+ w
->size
;
144 _pure_
static bool window_matches_fd(Window
*w
, MMapFileDescriptor
*f
, uint64_t offset
, size_t size
) {
150 window_matches(w
, offset
, size
);
153 static Window
*window_add(MMapCache
*m
, MMapFileDescriptor
*f
, bool keep_always
, uint64_t offset
, size_t size
, void *ptr
) {
159 if (!m
->last_unused
|| m
->n_windows
<= WINDOWS_MIN
) {
161 /* Allocate a new window */
168 /* Reuse an existing one */
176 .keep_always
= keep_always
,
182 LIST_PREPEND(by_fd
, f
->windows
, w
);
187 static void context_detach_window(MMapCache
*m
, Context
*c
) {
196 w
= TAKE_PTR(c
->window
);
197 LIST_REMOVE(by_window
, w
->contexts
, c
);
199 if (!w
->contexts
&& !w
->keep_always
) {
200 /* Not used anymore? */
201 #if ENABLE_DEBUG_MMAP_CACHE
202 /* Unmap unused windows immediately to expose use-after-unmap
206 LIST_PREPEND(unused
, m
->unused
, w
);
215 static void context_attach_window(MMapCache
*m
, Context
*c
, Window
*w
) {
223 context_detach_window(m
, c
);
227 if (m
->last_unused
== w
)
228 m
->last_unused
= w
->unused_prev
;
229 LIST_REMOVE(unused
, m
->unused
, w
);
231 w
->in_unused
= false;
235 LIST_PREPEND(by_window
, w
->contexts
, c
);
238 static MMapCache
*mmap_cache_free(MMapCache
*m
) {
241 for (int i
= 0; i
< MMAP_CACHE_MAX_CONTEXTS
; i
++)
242 context_detach_window(m
, &m
->contexts
[i
]);
244 hashmap_free(m
->fds
);
247 window_free(m
->unused
);
252 DEFINE_TRIVIAL_REF_UNREF_FUNC(MMapCache
, mmap_cache
, mmap_cache_free
);
254 static int make_room(MMapCache
*m
) {
260 window_free(m
->last_unused
);
264 static int try_context(
265 MMapFileDescriptor
*f
,
274 assert(f
->cache
->n_ref
> 0);
282 if (!window_matches_fd(c
->window
, f
, offset
, size
)) {
284 /* Drop the reference to the window, since it's unnecessary now */
285 context_detach_window(f
->cache
, c
);
289 if (c
->window
->fd
->sigbus
)
292 c
->window
->keep_always
= c
->window
->keep_always
|| keep_always
;
294 *ret
= (uint8_t*) c
->window
->ptr
+ (offset
- c
->window
->offset
);
295 f
->cache
->n_context_cache_hit
++;
300 static int find_mmap(
301 MMapFileDescriptor
*f
,
308 Window
*found
= NULL
;
312 assert(f
->cache
->n_ref
> 0);
319 LIST_FOREACH(by_fd
, w
, f
->windows
)
320 if (window_matches(w
, offset
, size
)) {
328 context_attach_window(f
->cache
, c
, found
);
329 found
->keep_always
= found
->keep_always
|| keep_always
;
331 *ret
= (uint8_t*) found
->ptr
+ (offset
- found
->offset
);
332 f
->cache
->n_window_list_hit
++;
337 static int mmap_try_harder(MMapFileDescriptor
*f
, void *addr
, int flags
, uint64_t offset
, size_t size
, void **res
) {
346 ptr
= mmap(addr
, size
, f
->prot
, flags
, f
->fd
, offset
);
347 if (ptr
!= MAP_FAILED
)
350 return negative_errno();
352 r
= make_room(f
->cache
);
364 MMapFileDescriptor
*f
,
372 uint64_t woffset
, wsize
;
379 assert(f
->cache
->n_ref
> 0);
384 woffset
= offset
& ~((uint64_t) page_size() - 1ULL);
385 wsize
= size
+ (offset
- woffset
);
386 wsize
= PAGE_ALIGN(wsize
);
388 if (wsize
< WINDOW_SIZE
) {
391 delta
= PAGE_ALIGN((WINDOW_SIZE
- wsize
) / 2);
402 /* Memory maps that are larger then the files
403 underneath have undefined behavior. Hence, clamp
404 things to the file size if we know it */
406 if (woffset
>= (uint64_t) st
->st_size
)
407 return -EADDRNOTAVAIL
;
409 if (woffset
+ wsize
> (uint64_t) st
->st_size
)
410 wsize
= PAGE_ALIGN(st
->st_size
- woffset
);
413 r
= mmap_try_harder(f
, NULL
, MAP_SHARED
, woffset
, wsize
, &d
);
417 w
= window_add(f
->cache
, f
, keep_always
, woffset
, wsize
, d
);
421 context_attach_window(f
->cache
, c
, w
);
423 *ret
= (uint8_t*) w
->ptr
+ (offset
- w
->offset
);
428 (void) munmap(d
, wsize
);
432 int mmap_cache_fd_get(
433 MMapFileDescriptor
*f
,
446 assert(f
->cache
->n_ref
> 0);
449 assert(context
< MMAP_CACHE_MAX_CONTEXTS
);
451 c
= &f
->cache
->contexts
[context
];
453 /* Check whether the current context is the right one already */
454 r
= try_context(f
, c
, keep_always
, offset
, size
, ret
);
458 /* Search for a matching mmap */
459 r
= find_mmap(f
, c
, keep_always
, offset
, size
, ret
);
463 f
->cache
->n_missed
++;
465 /* Create a new mmap */
466 return add_mmap(f
, c
, keep_always
, offset
, size
, st
, ret
);
469 void mmap_cache_stats_log_debug(MMapCache
*m
) {
472 log_debug("mmap cache statistics: %u context cache hit, %u window list hit, %u miss", m
->n_context_cache_hit
, m
->n_window_list_hit
, m
->n_missed
);
475 static void mmap_cache_process_sigbus(MMapCache
*m
) {
477 MMapFileDescriptor
*f
;
482 /* Iterate through all triggered pages and mark their files as
488 r
= sigbus_pop(&addr
);
489 if (_likely_(r
== 0))
492 log_error_errno(r
, "SIGBUS handling failed: %m");
497 HASHMAP_FOREACH(f
, m
->fds
) {
498 LIST_FOREACH(by_fd
, w
, f
->windows
) {
499 if ((uint8_t*) addr
>= (uint8_t*) w
->ptr
&&
500 (uint8_t*) addr
< (uint8_t*) w
->ptr
+ w
->size
) {
501 found
= ours
= f
->sigbus
= true;
510 /* Didn't find a matching window, give up */
512 log_error("Unknown SIGBUS page, aborting.");
517 /* The list of triggered pages is now empty. Now, let's remap
518 * all windows of the triggered file to anonymous maps, so
519 * that no page of the file in question is triggered again, so
520 * that we can be sure not to hit the queue size limit. */
521 if (_likely_(!found
))
524 HASHMAP_FOREACH(f
, m
->fds
) {
528 LIST_FOREACH(by_fd
, w
, f
->windows
)
529 window_invalidate(w
);
533 bool mmap_cache_fd_got_sigbus(MMapFileDescriptor
*f
) {
536 mmap_cache_process_sigbus(f
->cache
);
541 MMapFileDescriptor
* mmap_cache_add_fd(MMapCache
*m
, int fd
, int prot
) {
542 MMapFileDescriptor
*f
;
548 f
= hashmap_get(m
->fds
, FD_TO_PTR(fd
));
552 r
= hashmap_ensure_allocated(&m
->fds
, NULL
);
556 f
= new0(MMapFileDescriptor
, 1);
560 r
= hashmap_put(m
->fds
, FD_TO_PTR(fd
), f
);
564 f
->cache
= mmap_cache_ref(m
);
571 void mmap_cache_fd_free(MMapFileDescriptor
*f
) {
575 /* Make sure that any queued SIGBUS are first dispatched, so
576 * that we don't end up with a SIGBUS entry we cannot relate
577 * to any existing memory map */
579 mmap_cache_process_sigbus(f
->cache
);
582 window_free(f
->windows
);
585 assert_se(hashmap_remove(f
->cache
->fds
, FD_TO_PTR(f
->fd
)));
586 f
->cache
= mmap_cache_unref(f
->cache
);
592 MMapCache
* mmap_cache_fd_cache(MMapFileDescriptor
*f
) {