1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
6 #include "alloc-util.h"
8 #include "errno-util.h"
13 #include "memory-util.h"
14 #include "mmap-cache.h"
17 typedef struct Window Window
;
19 typedef enum WindowFlags
{
20 WINDOW_KEEP_ALWAYS
= 1u << (_MMAP_CACHE_CATEGORY_MAX
+ 0),
21 WINDOW_IN_UNUSED
= 1u << (_MMAP_CACHE_CATEGORY_MAX
+ 1),
22 WINDOW_INVALIDATED
= 1u << (_MMAP_CACHE_CATEGORY_MAX
+ 2),
24 _WINDOW_USED_MASK
= WINDOW_IN_UNUSED
- 1, /* The mask contains all bits that indicate the windows
25 * is currently in use. Covers the all the object types
26 * and the additional WINDOW_KEEP_ALWAYS flag. */
29 #define WINDOW_IS_UNUSED(w) (((w)->flags & _WINDOW_USED_MASK) == 0)
32 MMapFileDescriptor
*fd
;
40 LIST_FIELDS(Window
, windows
);
41 LIST_FIELDS(Window
, unused
);
44 struct MMapFileDescriptor
{
51 LIST_HEAD(Window
, windows
);
58 unsigned n_category_cache_hit
;
59 unsigned n_window_list_hit
;
64 LIST_HEAD(Window
, unused
);
68 Window
*windows_by_category
[_MMAP_CACHE_CATEGORY_MAX
];
71 #define WINDOWS_MIN 64
74 #if ENABLE_DEBUG_MMAP_CACHE
75 /* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
76 # define WINDOW_SIZE (page_size())
78 # define WINDOW_SIZE ((size_t) (UINT64_C(8) * UINT64_C(1024) * UINT64_C(1024)))
81 MMapCache
* mmap_cache_new(void) {
84 m
= new(MMapCache
, 1);
95 static Window
* window_unlink(Window
*w
) {
98 MMapCache
*m
= mmap_cache_fd_cache(w
->fd
);
101 munmap(w
->ptr
, w
->size
);
103 if (FLAGS_SET(w
->flags
, WINDOW_IN_UNUSED
)) {
104 if (m
->last_unused
== w
)
105 m
->last_unused
= w
->unused_prev
;
106 LIST_REMOVE(unused
, m
->unused
, w
);
110 for (unsigned i
= 0; i
< _MMAP_CACHE_CATEGORY_MAX
; i
++)
111 if (BIT_SET(w
->flags
, i
))
112 assert_se(TAKE_PTR(m
->windows_by_category
[i
]) == w
);
114 return LIST_REMOVE(windows
, w
->fd
->windows
, w
);
117 static void window_invalidate(Window
*w
) {
121 if (FLAGS_SET(w
->flags
, WINDOW_INVALIDATED
))
124 /* Replace the window with anonymous pages. This is useful when we hit a SIGBUS and want to make sure
125 * the file cannot trigger any further SIGBUS, possibly overrunning the sigbus queue. */
127 assert_se(mmap(w
->ptr
, w
->size
, w
->fd
->prot
, MAP_PRIVATE
|MAP_ANONYMOUS
|MAP_FIXED
, -1, 0) == w
->ptr
);
128 w
->flags
|= WINDOW_INVALIDATED
;
131 static Window
* window_free(Window
*w
) {
136 w
->fd
->cache
->n_windows
--;
141 static bool window_matches(Window
*w
, MMapFileDescriptor
*f
, uint64_t offset
, size_t size
) {
147 offset
>= w
->offset
&&
148 offset
+ size
<= w
->offset
+ w
->size
;
151 static bool window_matches_by_addr(Window
*w
, MMapFileDescriptor
*f
, void *addr
, size_t size
) {
157 (uint8_t*) addr
>= (uint8_t*) w
->ptr
&&
158 (uint8_t*) addr
+ size
<= (uint8_t*) w
->ptr
+ w
->size
;
161 static Window
* window_add(MMapFileDescriptor
*f
, uint64_t offset
, size_t size
, void *ptr
) {
162 MMapCache
*m
= mmap_cache_fd_cache(f
);
165 if (!m
->last_unused
|| m
->n_windows
< WINDOWS_MIN
|| m
->n_unused
< UNUSED_MIN
) {
166 /* Allocate a new window */
172 /* Reuse an existing one */
173 w
= window_unlink(m
->last_unused
);
182 return LIST_PREPEND(windows
, f
->windows
, w
);
185 static void category_detach_window(MMapCache
*m
, MMapCacheCategory c
) {
189 assert(c
>= 0 && c
< _MMAP_CACHE_CATEGORY_MAX
);
191 w
= TAKE_PTR(m
->windows_by_category
[c
]);
193 return; /* Nothing attached. */
195 assert(BIT_SET(w
->flags
, c
));
196 w
->flags
&= ~(1u << c
);
198 if (WINDOW_IS_UNUSED(w
)) {
199 /* Not used anymore? */
200 #if ENABLE_DEBUG_MMAP_CACHE
201 /* Unmap unused windows immediately to expose use-after-unmap by SIGSEGV. */
204 LIST_PREPEND(unused
, m
->unused
, w
);
208 w
->flags
|= WINDOW_IN_UNUSED
;
213 static void category_attach_window(MMapCache
*m
, MMapCacheCategory c
, Window
*w
) {
215 assert(c
>= 0 && c
< _MMAP_CACHE_CATEGORY_MAX
);
218 if (m
->windows_by_category
[c
] == w
)
219 return; /* Already attached. */
221 category_detach_window(m
, c
);
223 if (FLAGS_SET(w
->flags
, WINDOW_IN_UNUSED
)) {
225 if (m
->last_unused
== w
)
226 m
->last_unused
= w
->unused_prev
;
227 LIST_REMOVE(unused
, m
->unused
, w
);
229 w
->flags
&= ~WINDOW_IN_UNUSED
;
232 m
->windows_by_category
[c
] = w
;
233 w
->flags
|= (1u << c
);
236 static MMapCache
* mmap_cache_free(MMapCache
*m
) {
240 /* All windows are owned by fds, and each fd takes a reference of MMapCache. So, when this is called,
241 * all fds are already freed, and hence there is no window. */
243 assert(hashmap_isempty(m
->fds
));
244 hashmap_free(m
->fds
);
246 assert(!m
->unused
&& m
->n_unused
== 0);
247 assert(m
->n_windows
== 0);
252 DEFINE_TRIVIAL_REF_UNREF_FUNC(MMapCache
, mmap_cache
, mmap_cache_free
);
254 static int mmap_try_harder(MMapFileDescriptor
*f
, void *addr
, int flags
, uint64_t offset
, size_t size
, void **ret
) {
255 MMapCache
*m
= mmap_cache_fd_cache(f
);
262 ptr
= mmap(addr
, size
, f
->prot
, flags
, f
->fd
, offset
);
263 if (ptr
!= MAP_FAILED
) {
268 return negative_errno();
270 /* When failed with ENOMEM, try again after making a room by freeing an unused window. */
273 return -ENOMEM
; /* no free window, propagate the original error. */
275 window_free(m
->last_unused
);
280 MMapFileDescriptor
*f
,
295 if (size
> SIZE_MAX
- PAGE_OFFSET_U64(offset
))
296 return -EADDRNOTAVAIL
;
298 size
= PAGE_ALIGN(size
+ PAGE_OFFSET_U64(offset
));
299 offset
= PAGE_ALIGN_DOWN_U64(offset
);
301 if (size
< WINDOW_SIZE
) {
304 delta
= PAGE_ALIGN((WINDOW_SIZE
- size
) / 2);
305 offset
= LESS_BY(offset
, delta
);
310 /* Memory maps that are larger then the files underneath have undefined behavior. Hence,
311 * clamp things to the file size if we know it */
313 if (offset
>= (uint64_t) st
->st_size
)
314 return -EADDRNOTAVAIL
;
316 if (size
> (uint64_t) st
->st_size
- offset
)
317 size
= PAGE_ALIGN((uint64_t) st
->st_size
- offset
);
320 if (size
>= SIZE_MAX
)
321 return -EADDRNOTAVAIL
;
323 r
= mmap_try_harder(f
, NULL
, MAP_SHARED
, offset
, size
, &d
);
327 w
= window_add(f
, offset
, size
, d
);
329 (void) munmap(d
, size
);
337 int mmap_cache_fd_get(
338 MMapFileDescriptor
*f
,
346 MMapCache
*m
= mmap_cache_fd_cache(f
);
351 assert(c
>= 0 && c
< _MMAP_CACHE_CATEGORY_MAX
);
357 /* Check whether the current category is the right one already */
358 if (window_matches(m
->windows_by_category
[c
], f
, offset
, size
)) {
359 m
->n_category_cache_hit
++;
360 w
= m
->windows_by_category
[c
];
364 /* Drop the reference to the window, since it's unnecessary now */
365 category_detach_window(m
, c
);
367 /* Search for a matching mmap */
368 LIST_FOREACH(windows
, i
, f
->windows
)
369 if (window_matches(i
, f
, offset
, size
)) {
370 m
->n_window_list_hit
++;
377 /* Create a new mmap */
378 r
= add_mmap(f
, offset
, size
, st
, &w
);
384 w
->flags
|= WINDOW_KEEP_ALWAYS
;
386 category_attach_window(m
, c
, w
);
387 *ret
= (uint8_t*) w
->ptr
+ (offset
- w
->offset
);
391 int mmap_cache_fd_pin(
392 MMapFileDescriptor
*f
,
397 MMapCache
*m
= mmap_cache_fd_cache(f
);
401 assert(c
>= 0 && c
< _MMAP_CACHE_CATEGORY_MAX
);
407 /* Check if the current category is the right one. */
408 if (window_matches_by_addr(m
->windows_by_category
[c
], f
, addr
, size
)) {
409 m
->n_category_cache_hit
++;
410 w
= m
->windows_by_category
[c
];
414 /* Search for a matching mmap. */
415 LIST_FOREACH(windows
, i
, f
->windows
)
416 if (window_matches_by_addr(i
, f
, addr
, size
)) {
417 m
->n_window_list_hit
++;
423 return -EADDRNOTAVAIL
; /* Not found. */
426 if (FLAGS_SET(w
->flags
, WINDOW_KEEP_ALWAYS
))
427 return 0; /* The window will never unmapped. */
429 /* Attach the window to the 'pinning' category. */
430 category_attach_window(m
, MMAP_CACHE_CATEGORY_PIN
, w
);
434 void mmap_cache_stats_log_debug(MMapCache
*m
) {
437 log_debug("mmap cache statistics: %u category cache hit, %u window list hit, %u miss, %u files, %u windows, %u unused",
438 m
->n_category_cache_hit
, m
->n_window_list_hit
, m
->n_missed
, hashmap_size(m
->fds
), m
->n_windows
, m
->n_unused
);
441 static void mmap_cache_process_sigbus(MMapCache
*m
) {
443 MMapFileDescriptor
*f
;
448 /* Iterate through all triggered pages and mark their files as invalidated. */
453 r
= sigbus_pop(&addr
);
454 if (_likely_(r
== 0))
457 log_error_errno(r
, "SIGBUS handling failed: %m");
462 HASHMAP_FOREACH(f
, m
->fds
) {
463 LIST_FOREACH(windows
, w
, f
->windows
)
464 if (window_matches_by_addr(w
, f
, addr
, 1)) {
465 found
= ours
= f
->sigbus
= true;
473 /* Didn't find a matching window, give up. */
475 log_error("Unknown SIGBUS page, aborting.");
480 /* The list of triggered pages is now empty. Now, let's remap all windows of the triggered file to
481 * anonymous maps, so that no page of the file in question is triggered again, so that we can be sure
482 * not to hit the queue size limit. */
483 if (_likely_(!found
))
486 HASHMAP_FOREACH(f
, m
->fds
) {
490 LIST_FOREACH(windows
, w
, f
->windows
)
491 window_invalidate(w
);
495 bool mmap_cache_fd_got_sigbus(MMapFileDescriptor
*f
) {
498 mmap_cache_process_sigbus(f
->cache
);
503 int mmap_cache_add_fd(MMapCache
*m
, int fd
, int prot
, MMapFileDescriptor
**ret
) {
504 _cleanup_free_ MMapFileDescriptor
*f
= NULL
;
505 MMapFileDescriptor
*existing
;
511 existing
= hashmap_get(m
->fds
, FD_TO_PTR(fd
));
513 if (existing
->prot
!= prot
)
520 f
= new(MMapFileDescriptor
, 1);
524 *f
= (MMapFileDescriptor
) {
529 r
= hashmap_ensure_put(&m
->fds
, NULL
, FD_TO_PTR(fd
), f
);
534 f
->cache
= mmap_cache_ref(m
);
543 MMapFileDescriptor
* mmap_cache_fd_free(MMapFileDescriptor
*f
) {
547 /* Make sure that any queued SIGBUS are first dispatched, so that we don't end up with a SIGBUS entry
548 * we cannot relate to any existing memory map. */
550 mmap_cache_process_sigbus(f
->cache
);
553 window_free(f
->windows
);
555 assert_se(hashmap_remove(f
->cache
->fds
, FD_TO_PTR(f
->fd
)) == f
);
557 /* Unref the cache at the end. Otherwise, the assertions in mmap_cache_free() may be triggered. */
558 f
->cache
= mmap_cache_unref(f
->cache
);
563 MMapCache
* mmap_cache_fd_cache(MMapFileDescriptor
*f
) {
565 return ASSERT_PTR(f
->cache
);