1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
7 #include "alloc-util.h"
8 #include "errno-util.h"
14 #include "memory-util.h"
15 #include "mmap-cache.h"
18 typedef struct Window Window
;
19 typedef struct Context Context
;
22 MMapFileDescriptor
*fd
;
32 LIST_FIELDS(Window
, windows
);
33 LIST_FIELDS(Window
, unused
);
35 LIST_HEAD(Context
, contexts
);
41 LIST_FIELDS(Context
, by_window
);
44 struct MMapFileDescriptor
{
49 LIST_HEAD(Window
, windows
);
56 unsigned n_context_cache_hit
;
57 unsigned n_window_list_hit
;
62 LIST_HEAD(Window
, unused
);
65 Context contexts
[MMAP_CACHE_MAX_CONTEXTS
];
68 #define WINDOWS_MIN 64
70 #if ENABLE_DEBUG_MMAP_CACHE
71 /* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
72 # define WINDOW_SIZE (page_size())
74 # define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
77 MMapCache
* mmap_cache_new(void) {
80 m
= new(MMapCache
, 1);
91 static Window
* window_unlink(Window
*w
) {
94 MMapCache
*m
= mmap_cache_fd_cache(w
->fd
);
97 munmap(w
->ptr
, w
->size
);
100 if (m
->last_unused
== w
)
101 m
->last_unused
= w
->unused_prev
;
103 LIST_REMOVE(unused
, m
->unused
, w
);
106 LIST_FOREACH(by_window
, c
, w
->contexts
) {
107 assert(c
->window
== w
);
111 return LIST_REMOVE(windows
, w
->fd
->windows
, w
);
114 static void window_invalidate(Window
*w
) {
121 /* Replace the window with anonymous pages. This is useful when we hit a SIGBUS and want to make sure
122 * the file cannot trigger any further SIGBUS, possibly overrunning the sigbus queue. */
124 assert_se(mmap(w
->ptr
, w
->size
, w
->fd
->prot
, MAP_PRIVATE
|MAP_ANONYMOUS
|MAP_FIXED
, -1, 0) == w
->ptr
);
125 w
->invalidated
= true;
128 static Window
* window_free(Window
*w
) {
133 w
->fd
->cache
->n_windows
--;
138 static bool window_matches(Window
*w
, MMapFileDescriptor
*f
, uint64_t offset
, size_t size
) {
144 offset
>= w
->offset
&&
145 offset
+ size
<= w
->offset
+ w
->size
;
148 static Window
* window_add(MMapFileDescriptor
*f
, uint64_t offset
, size_t size
, void *ptr
) {
149 MMapCache
*m
= mmap_cache_fd_cache(f
);
152 if (!m
->last_unused
|| m
->n_windows
<= WINDOWS_MIN
) {
153 /* Allocate a new window */
159 /* Reuse an existing one */
160 w
= window_unlink(m
->last_unused
);
169 return LIST_PREPEND(windows
, f
->windows
, w
);
172 static void context_detach_window(MMapCache
*m
, Context
*c
) {
181 w
= TAKE_PTR(c
->window
);
182 LIST_REMOVE(by_window
, w
->contexts
, c
);
184 if (!w
->contexts
&& !w
->keep_always
) {
185 /* Not used anymore? */
186 #if ENABLE_DEBUG_MMAP_CACHE
187 /* Unmap unused windows immediately to expose use-after-unmap by SIGSEGV. */
190 LIST_PREPEND(unused
, m
->unused
, w
);
199 static void context_attach_window(MMapCache
*m
, Context
*c
, Window
*w
) {
207 context_detach_window(m
, c
);
211 if (m
->last_unused
== w
)
212 m
->last_unused
= w
->unused_prev
;
213 LIST_REMOVE(unused
, m
->unused
, w
);
215 w
->in_unused
= false;
219 LIST_PREPEND(by_window
, w
->contexts
, c
);
222 static MMapCache
* mmap_cache_free(MMapCache
*m
) {
226 /* All windows are owned by fds, and each fd takes a reference of MMapCache. So, when this is called,
227 * all fds are already freed, and hence there is no window. */
229 assert(hashmap_isempty(m
->fds
));
230 hashmap_free(m
->fds
);
233 assert(m
->n_windows
== 0);
238 DEFINE_TRIVIAL_REF_UNREF_FUNC(MMapCache
, mmap_cache
, mmap_cache_free
);
240 static int mmap_try_harder(MMapFileDescriptor
*f
, void *addr
, int flags
, uint64_t offset
, size_t size
, void **ret
) {
241 MMapCache
*m
= mmap_cache_fd_cache(f
);
248 ptr
= mmap(addr
, size
, f
->prot
, flags
, f
->fd
, offset
);
249 if (ptr
!= MAP_FAILED
) {
254 return negative_errno();
256 /* When failed with ENOMEM, try again after making a room by freeing an unused window. */
259 return -ENOMEM
; /* no free window, propagate the original error. */
261 window_free(m
->last_unused
);
266 MMapFileDescriptor
*f
,
272 uint64_t woffset
, wsize
;
281 woffset
= offset
& ~((uint64_t) page_size() - 1ULL);
282 wsize
= size
+ (offset
- woffset
);
283 wsize
= PAGE_ALIGN(wsize
);
285 if (wsize
< WINDOW_SIZE
) {
288 delta
= PAGE_ALIGN((WINDOW_SIZE
- wsize
) / 2);
299 /* Memory maps that are larger then the files underneath have undefined behavior. Hence,
300 * clamp things to the file size if we know it */
302 if (woffset
>= (uint64_t) st
->st_size
)
303 return -EADDRNOTAVAIL
;
305 if (woffset
+ wsize
> (uint64_t) st
->st_size
)
306 wsize
= PAGE_ALIGN(st
->st_size
- woffset
);
309 r
= mmap_try_harder(f
, NULL
, MAP_SHARED
, woffset
, wsize
, &d
);
313 w
= window_add(f
, woffset
, wsize
, d
);
315 (void) munmap(d
, wsize
);
323 int mmap_cache_fd_get(
324 MMapFileDescriptor
*f
,
332 MMapCache
*m
= mmap_cache_fd_cache(f
);
337 assert(context
< MMAP_CACHE_MAX_CONTEXTS
);
344 c
= &f
->cache
->contexts
[context
];
346 /* Check whether the current context is the right one already */
347 if (window_matches(c
->window
, f
, offset
, size
)) {
348 m
->n_context_cache_hit
++;
353 /* Drop the reference to the window, since it's unnecessary now */
354 context_detach_window(m
, c
);
356 /* Search for a matching mmap */
357 LIST_FOREACH(windows
, i
, f
->windows
)
358 if (window_matches(i
, f
, offset
, size
)) {
359 m
->n_window_list_hit
++;
366 /* Create a new mmap */
367 r
= add_mmap(f
, offset
, size
, st
, &w
);
372 w
->keep_always
= w
->keep_always
|| keep_always
;
373 context_attach_window(m
, c
, w
);
374 *ret
= (uint8_t*) w
->ptr
+ (offset
- w
->offset
);
378 void mmap_cache_stats_log_debug(MMapCache
*m
) {
381 log_debug("mmap cache statistics: %u context cache hit, %u window list hit, %u miss",
382 m
->n_context_cache_hit
, m
->n_window_list_hit
, m
->n_missed
);
385 static void mmap_cache_process_sigbus(MMapCache
*m
) {
387 MMapFileDescriptor
*f
;
392 /* Iterate through all triggered pages and mark their files as invalidated. */
397 r
= sigbus_pop(&addr
);
398 if (_likely_(r
== 0))
401 log_error_errno(r
, "SIGBUS handling failed: %m");
406 HASHMAP_FOREACH(f
, m
->fds
) {
407 LIST_FOREACH(windows
, w
, f
->windows
) {
408 if ((uint8_t*) addr
>= (uint8_t*) w
->ptr
&&
409 (uint8_t*) addr
< (uint8_t*) w
->ptr
+ w
->size
) {
410 found
= ours
= f
->sigbus
= true;
419 /* Didn't find a matching window, give up. */
421 log_error("Unknown SIGBUS page, aborting.");
426 /* The list of triggered pages is now empty. Now, let's remap all windows of the triggered file to
427 * anonymous maps, so that no page of the file in question is triggered again, so that we can be sure
428 * not to hit the queue size limit. */
429 if (_likely_(!found
))
432 HASHMAP_FOREACH(f
, m
->fds
) {
436 LIST_FOREACH(windows
, w
, f
->windows
)
437 window_invalidate(w
);
441 bool mmap_cache_fd_got_sigbus(MMapFileDescriptor
*f
) {
444 mmap_cache_process_sigbus(f
->cache
);
449 int mmap_cache_add_fd(MMapCache
*m
, int fd
, int prot
, MMapFileDescriptor
**ret
) {
450 _cleanup_free_ MMapFileDescriptor
*f
= NULL
;
451 MMapFileDescriptor
*existing
;
457 existing
= hashmap_get(m
->fds
, FD_TO_PTR(fd
));
459 if (existing
->prot
!= prot
)
466 f
= new(MMapFileDescriptor
, 1);
470 *f
= (MMapFileDescriptor
) {
475 r
= hashmap_ensure_put(&m
->fds
, NULL
, FD_TO_PTR(fd
), f
);
480 f
->cache
= mmap_cache_ref(m
);
489 MMapFileDescriptor
* mmap_cache_fd_free(MMapFileDescriptor
*f
) {
493 /* Make sure that any queued SIGBUS are first dispatched, so that we don't end up with a SIGBUS entry
494 * we cannot relate to any existing memory map. */
496 mmap_cache_process_sigbus(f
->cache
);
499 window_free(f
->windows
);
501 assert_se(hashmap_remove(f
->cache
->fds
, FD_TO_PTR(f
->fd
)) == f
);
503 /* Unref the cache at the end. Otherwise, the assertions in mmap_cache_free() may be triggered. */
504 f
->cache
= mmap_cache_unref(f
->cache
);
509 MMapCache
* mmap_cache_fd_cache(MMapFileDescriptor
*f
) {
511 return ASSERT_PTR(f
->cache
);