2 This file is part of systemd.
4 Copyright 2012 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
24 #include "alloc-util.h"
30 #include "mmap-cache.h"
34 typedef struct Window Window
;
35 typedef struct Context Context
;
36 typedef struct FileDescriptor FileDescriptor
;
52 LIST_FIELDS(Window
, by_fd
);
53 LIST_FIELDS(Window
, unused
);
55 LIST_HEAD(Context
, contexts
);
63 LIST_FIELDS(Context
, by_window
);
66 struct FileDescriptor
{
70 LIST_HEAD(Window
, windows
);
77 unsigned n_hit
, n_missed
;
80 Context
*contexts
[MMAP_CACHE_MAX_CONTEXTS
];
82 LIST_HEAD(Window
, unused
);
86 #define WINDOWS_MIN 64
88 #ifdef ENABLE_DEBUG_MMAP_CACHE
89 /* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
90 # define WINDOW_SIZE (page_size())
92 # define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
95 MMapCache
* mmap_cache_new(void) {
98 m
= new0(MMapCache
, 1);
106 MMapCache
* mmap_cache_ref(MMapCache
*m
) {
108 assert(m
->n_ref
> 0);
114 static void window_unlink(Window
*w
) {
120 munmap(w
->ptr
, w
->size
);
123 LIST_REMOVE(by_fd
, w
->fd
->windows
, w
);
126 if (w
->cache
->last_unused
== w
)
127 w
->cache
->last_unused
= w
->unused_prev
;
129 LIST_REMOVE(unused
, w
->cache
->unused
, w
);
132 LIST_FOREACH(by_window
, c
, w
->contexts
) {
133 assert(c
->window
== w
);
138 static void window_invalidate(Window
*w
) {
144 /* Replace the window with anonymous pages. This is useful
145 * when we hit a SIGBUS and want to make sure the file cannot
146 * trigger any further SIGBUS, possibly overrunning the sigbus
149 assert_se(mmap(w
->ptr
, w
->size
, w
->prot
, MAP_PRIVATE
|MAP_ANONYMOUS
|MAP_FIXED
, -1, 0) == w
->ptr
);
150 w
->invalidated
= true;
153 static void window_free(Window
*w
) {
157 w
->cache
->n_windows
--;
161 _pure_
static bool window_matches(Window
*w
, int fd
, int prot
, uint64_t offset
, size_t size
) {
170 offset
>= w
->offset
&&
171 offset
+ size
<= w
->offset
+ w
->size
;
174 static Window
*window_add(MMapCache
*m
, FileDescriptor
*fd
, int prot
, bool keep_always
, uint64_t offset
, size_t size
, void *ptr
) {
180 if (!m
->last_unused
|| m
->n_windows
<= WINDOWS_MIN
) {
182 /* Allocate a new window */
189 /* Reuse an existing one */
198 w
->keep_always
= keep_always
;
203 LIST_PREPEND(by_fd
, fd
->windows
, w
);
208 static void context_detach_window(Context
*c
) {
218 LIST_REMOVE(by_window
, w
->contexts
, c
);
220 if (!w
->contexts
&& !w
->keep_always
) {
221 /* Not used anymore? */
222 #ifdef ENABLE_DEBUG_MMAP_CACHE
223 /* Unmap unused windows immediately to expose use-after-unmap
227 LIST_PREPEND(unused
, c
->cache
->unused
, w
);
228 if (!c
->cache
->last_unused
)
229 c
->cache
->last_unused
= w
;
236 static void context_attach_window(Context
*c
, Window
*w
) {
243 context_detach_window(c
);
247 LIST_REMOVE(unused
, c
->cache
->unused
, w
);
248 if (c
->cache
->last_unused
== w
)
249 c
->cache
->last_unused
= w
->unused_prev
;
251 w
->in_unused
= false;
255 LIST_PREPEND(by_window
, w
->contexts
, c
);
258 static Context
*context_add(MMapCache
*m
, unsigned id
) {
267 c
= new0(Context
, 1);
274 assert(!m
->contexts
[id
]);
280 static void context_free(Context
*c
) {
283 context_detach_window(c
);
286 assert(c
->cache
->contexts
[c
->id
] == c
);
287 c
->cache
->contexts
[c
->id
] = NULL
;
293 static void fd_free(FileDescriptor
*f
) {
297 window_free(f
->windows
);
300 assert_se(hashmap_remove(f
->cache
->fds
, FD_TO_PTR(f
->fd
)));
305 static FileDescriptor
* fd_add(MMapCache
*m
, int fd
) {
312 f
= hashmap_get(m
->fds
, FD_TO_PTR(fd
));
316 r
= hashmap_ensure_allocated(&m
->fds
, NULL
);
320 f
= new0(FileDescriptor
, 1);
327 r
= hashmap_put(m
->fds
, FD_TO_PTR(fd
), f
);
336 static void mmap_cache_free(MMapCache
*m
) {
342 for (i
= 0; i
< MMAP_CACHE_MAX_CONTEXTS
; i
++)
344 context_free(m
->contexts
[i
]);
346 while ((f
= hashmap_first(m
->fds
)))
349 hashmap_free(m
->fds
);
352 window_free(m
->unused
);
357 MMapCache
* mmap_cache_unref(MMapCache
*m
) {
362 assert(m
->n_ref
> 0);
371 static int make_room(MMapCache
*m
) {
377 window_free(m
->last_unused
);
381 static int try_context(
394 assert(m
->n_ref
> 0);
399 c
= m
->contexts
[context
];
403 assert(c
->id
== context
);
408 if (!window_matches(c
->window
, fd
, prot
, offset
, size
)) {
410 /* Drop the reference to the window, since it's unnecessary now */
411 context_detach_window(c
);
415 if (c
->window
->fd
->sigbus
)
418 c
->window
->keep_always
= c
->window
->keep_always
|| keep_always
;
420 *ret
= (uint8_t*) c
->window
->ptr
+ (offset
- c
->window
->offset
);
424 static int find_mmap(
439 assert(m
->n_ref
> 0);
443 f
= hashmap_get(m
->fds
, FD_TO_PTR(fd
));
452 LIST_FOREACH(by_fd
, w
, f
->windows
)
453 if (window_matches(w
, fd
, prot
, offset
, size
))
459 c
= context_add(m
, context
);
463 context_attach_window(c
, w
);
464 w
->keep_always
= w
->keep_always
|| keep_always
;
466 *ret
= (uint8_t*) w
->ptr
+ (offset
- w
->offset
);
470 static int mmap_try_harder(MMapCache
*m
, void *addr
, int fd
, int prot
, int flags
, uint64_t offset
, size_t size
, void **res
) {
480 ptr
= mmap(addr
, size
, prot
, flags
, fd
, offset
);
481 if (ptr
!= MAP_FAILED
)
508 uint64_t woffset
, wsize
;
516 assert(m
->n_ref
> 0);
521 woffset
= offset
& ~((uint64_t) page_size() - 1ULL);
522 wsize
= size
+ (offset
- woffset
);
523 wsize
= PAGE_ALIGN(wsize
);
525 if (wsize
< WINDOW_SIZE
) {
528 delta
= PAGE_ALIGN((WINDOW_SIZE
- wsize
) / 2);
539 /* Memory maps that are larger then the files
540 underneath have undefined behavior. Hence, clamp
541 things to the file size if we know it */
543 if (woffset
>= (uint64_t) st
->st_size
)
544 return -EADDRNOTAVAIL
;
546 if (woffset
+ wsize
> (uint64_t) st
->st_size
)
547 wsize
= PAGE_ALIGN(st
->st_size
- woffset
);
550 r
= mmap_try_harder(m
, NULL
, fd
, prot
, MAP_SHARED
, woffset
, wsize
, &d
);
554 c
= context_add(m
, context
);
562 w
= window_add(m
, f
, prot
, keep_always
, woffset
, wsize
, d
);
566 context_detach_window(c
);
568 LIST_PREPEND(by_window
, w
->contexts
, c
);
570 *ret
= (uint8_t*) w
->ptr
+ (offset
- w
->offset
);
592 assert(m
->n_ref
> 0);
596 assert(context
< MMAP_CACHE_MAX_CONTEXTS
);
598 /* Check whether the current context is the right one already */
599 r
= try_context(m
, fd
, prot
, context
, keep_always
, offset
, size
, ret
);
605 /* Search for a matching mmap */
606 r
= find_mmap(m
, fd
, prot
, context
, keep_always
, offset
, size
, ret
);
614 /* Create a new mmap */
615 return add_mmap(m
, fd
, prot
, context
, keep_always
, offset
, size
, st
, ret
);
618 unsigned mmap_cache_get_hit(MMapCache
*m
) {
624 unsigned mmap_cache_get_missed(MMapCache
*m
) {
630 static void mmap_cache_process_sigbus(MMapCache
*m
) {
638 /* Iterate through all triggered pages and mark their files as
644 r
= sigbus_pop(&addr
);
645 if (_likely_(r
== 0))
648 log_error_errno(r
, "SIGBUS handling failed: %m");
653 HASHMAP_FOREACH(f
, m
->fds
, i
) {
656 LIST_FOREACH(by_fd
, w
, f
->windows
) {
657 if ((uint8_t*) addr
>= (uint8_t*) w
->ptr
&&
658 (uint8_t*) addr
< (uint8_t*) w
->ptr
+ w
->size
) {
659 found
= ours
= f
->sigbus
= true;
668 /* Didn't find a matching window, give up */
670 log_error("Unknown SIGBUS page, aborting.");
675 /* The list of triggered pages is now empty. Now, let's remap
676 * all windows of the triggered file to anonymous maps, so
677 * that no page of the file in question is triggered again, so
678 * that we can be sure not to hit the queue size limit. */
679 if (_likely_(!found
))
682 HASHMAP_FOREACH(f
, m
->fds
, i
) {
688 LIST_FOREACH(by_fd
, w
, f
->windows
)
689 window_invalidate(w
);
693 bool mmap_cache_got_sigbus(MMapCache
*m
, int fd
) {
699 mmap_cache_process_sigbus(m
);
701 f
= hashmap_get(m
->fds
, FD_TO_PTR(fd
));
708 void mmap_cache_close_fd(MMapCache
*m
, int fd
) {
714 /* Make sure that any queued SIGBUS are first dispatched, so
715 * that we don't end up with a SIGBUS entry we cannot relate
716 * to any existing memory map */
718 mmap_cache_process_sigbus(m
);
720 f
= hashmap_get(m
->fds
, FD_TO_PTR(fd
));