1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2012 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
26 #include "alloc-util.h"
32 #include "mmap-cache.h"
36 typedef struct Window Window
;
37 typedef struct Context Context
;
38 typedef struct FileDescriptor FileDescriptor
;
54 LIST_FIELDS(Window
, by_fd
);
55 LIST_FIELDS(Window
, unused
);
57 LIST_HEAD(Context
, contexts
);
65 LIST_FIELDS(Context
, by_window
);
68 struct FileDescriptor
{
72 LIST_HEAD(Window
, windows
);
79 unsigned n_hit
, n_missed
;
82 Context
*contexts
[MMAP_CACHE_MAX_CONTEXTS
];
84 LIST_HEAD(Window
, unused
);
88 #define WINDOWS_MIN 64
90 #ifdef ENABLE_DEBUG_MMAP_CACHE
91 /* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
92 # define WINDOW_SIZE (page_size())
94 # define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
97 MMapCache
* mmap_cache_new(void) {
100 m
= new0(MMapCache
, 1);
108 MMapCache
* mmap_cache_ref(MMapCache
*m
) {
110 assert(m
->n_ref
> 0);
116 static void window_unlink(Window
*w
) {
122 munmap(w
->ptr
, w
->size
);
125 LIST_REMOVE(by_fd
, w
->fd
->windows
, w
);
128 if (w
->cache
->last_unused
== w
)
129 w
->cache
->last_unused
= w
->unused_prev
;
131 LIST_REMOVE(unused
, w
->cache
->unused
, w
);
134 LIST_FOREACH(by_window
, c
, w
->contexts
) {
135 assert(c
->window
== w
);
140 static void window_invalidate(Window
*w
) {
146 /* Replace the window with anonymous pages. This is useful
147 * when we hit a SIGBUS and want to make sure the file cannot
148 * trigger any further SIGBUS, possibly overrunning the sigbus
151 assert_se(mmap(w
->ptr
, w
->size
, w
->prot
, MAP_PRIVATE
|MAP_ANONYMOUS
|MAP_FIXED
, -1, 0) == w
->ptr
);
152 w
->invalidated
= true;
155 static void window_free(Window
*w
) {
159 w
->cache
->n_windows
--;
163 _pure_
static bool window_matches(Window
*w
, int fd
, int prot
, uint64_t offset
, size_t size
) {
172 offset
>= w
->offset
&&
173 offset
+ size
<= w
->offset
+ w
->size
;
176 static Window
*window_add(MMapCache
*m
, FileDescriptor
*fd
, int prot
, bool keep_always
, uint64_t offset
, size_t size
, void *ptr
) {
182 if (!m
->last_unused
|| m
->n_windows
<= WINDOWS_MIN
) {
184 /* Allocate a new window */
191 /* Reuse an existing one */
200 w
->keep_always
= keep_always
;
205 LIST_PREPEND(by_fd
, fd
->windows
, w
);
210 static void context_detach_window(Context
*c
) {
220 LIST_REMOVE(by_window
, w
->contexts
, c
);
222 if (!w
->contexts
&& !w
->keep_always
) {
223 /* Not used anymore? */
224 #ifdef ENABLE_DEBUG_MMAP_CACHE
225 /* Unmap unused windows immediately to expose use-after-unmap
229 LIST_PREPEND(unused
, c
->cache
->unused
, w
);
230 if (!c
->cache
->last_unused
)
231 c
->cache
->last_unused
= w
;
238 static void context_attach_window(Context
*c
, Window
*w
) {
245 context_detach_window(c
);
249 LIST_REMOVE(unused
, c
->cache
->unused
, w
);
250 if (c
->cache
->last_unused
== w
)
251 c
->cache
->last_unused
= w
->unused_prev
;
253 w
->in_unused
= false;
257 LIST_PREPEND(by_window
, w
->contexts
, c
);
260 static Context
*context_add(MMapCache
*m
, unsigned id
) {
269 c
= new0(Context
, 1);
276 assert(!m
->contexts
[id
]);
282 static void context_free(Context
*c
) {
285 context_detach_window(c
);
288 assert(c
->cache
->contexts
[c
->id
] == c
);
289 c
->cache
->contexts
[c
->id
] = NULL
;
295 static void fd_free(FileDescriptor
*f
) {
299 window_free(f
->windows
);
302 assert_se(hashmap_remove(f
->cache
->fds
, FD_TO_PTR(f
->fd
)));
307 static FileDescriptor
* fd_add(MMapCache
*m
, int fd
) {
314 f
= hashmap_get(m
->fds
, FD_TO_PTR(fd
));
318 r
= hashmap_ensure_allocated(&m
->fds
, NULL
);
322 f
= new0(FileDescriptor
, 1);
329 r
= hashmap_put(m
->fds
, FD_TO_PTR(fd
), f
);
338 static void mmap_cache_free(MMapCache
*m
) {
344 for (i
= 0; i
< MMAP_CACHE_MAX_CONTEXTS
; i
++)
346 context_free(m
->contexts
[i
]);
348 while ((f
= hashmap_first(m
->fds
)))
351 hashmap_free(m
->fds
);
354 window_free(m
->unused
);
359 MMapCache
* mmap_cache_unref(MMapCache
*m
) {
364 assert(m
->n_ref
> 0);
373 static int make_room(MMapCache
*m
) {
379 window_free(m
->last_unused
);
383 static int try_context(
396 assert(m
->n_ref
> 0);
401 c
= m
->contexts
[context
];
405 assert(c
->id
== context
);
410 if (!window_matches(c
->window
, fd
, prot
, offset
, size
)) {
412 /* Drop the reference to the window, since it's unnecessary now */
413 context_detach_window(c
);
417 if (c
->window
->fd
->sigbus
)
420 c
->window
->keep_always
= c
->window
->keep_always
|| keep_always
;
422 *ret
= (uint8_t*) c
->window
->ptr
+ (offset
- c
->window
->offset
);
426 static int find_mmap(
441 assert(m
->n_ref
> 0);
445 f
= hashmap_get(m
->fds
, FD_TO_PTR(fd
));
454 LIST_FOREACH(by_fd
, w
, f
->windows
)
455 if (window_matches(w
, fd
, prot
, offset
, size
))
461 c
= context_add(m
, context
);
465 context_attach_window(c
, w
);
466 w
->keep_always
= w
->keep_always
|| keep_always
;
468 *ret
= (uint8_t*) w
->ptr
+ (offset
- w
->offset
);
472 static int mmap_try_harder(MMapCache
*m
, void *addr
, int fd
, int prot
, int flags
, uint64_t offset
, size_t size
, void **res
) {
482 ptr
= mmap(addr
, size
, prot
, flags
, fd
, offset
);
483 if (ptr
!= MAP_FAILED
)
510 uint64_t woffset
, wsize
;
518 assert(m
->n_ref
> 0);
523 woffset
= offset
& ~((uint64_t) page_size() - 1ULL);
524 wsize
= size
+ (offset
- woffset
);
525 wsize
= PAGE_ALIGN(wsize
);
527 if (wsize
< WINDOW_SIZE
) {
530 delta
= PAGE_ALIGN((WINDOW_SIZE
- wsize
) / 2);
541 /* Memory maps that are larger then the files
542 underneath have undefined behavior. Hence, clamp
543 things to the file size if we know it */
545 if (woffset
>= (uint64_t) st
->st_size
)
546 return -EADDRNOTAVAIL
;
548 if (woffset
+ wsize
> (uint64_t) st
->st_size
)
549 wsize
= PAGE_ALIGN(st
->st_size
- woffset
);
552 r
= mmap_try_harder(m
, NULL
, fd
, prot
, MAP_SHARED
, woffset
, wsize
, &d
);
556 c
= context_add(m
, context
);
564 w
= window_add(m
, f
, prot
, keep_always
, woffset
, wsize
, d
);
568 context_detach_window(c
);
570 LIST_PREPEND(by_window
, w
->contexts
, c
);
572 *ret
= (uint8_t*) w
->ptr
+ (offset
- w
->offset
);
594 assert(m
->n_ref
> 0);
598 assert(context
< MMAP_CACHE_MAX_CONTEXTS
);
600 /* Check whether the current context is the right one already */
601 r
= try_context(m
, fd
, prot
, context
, keep_always
, offset
, size
, ret
);
607 /* Search for a matching mmap */
608 r
= find_mmap(m
, fd
, prot
, context
, keep_always
, offset
, size
, ret
);
616 /* Create a new mmap */
617 return add_mmap(m
, fd
, prot
, context
, keep_always
, offset
, size
, st
, ret
);
620 unsigned mmap_cache_get_hit(MMapCache
*m
) {
626 unsigned mmap_cache_get_missed(MMapCache
*m
) {
632 static void mmap_cache_process_sigbus(MMapCache
*m
) {
640 /* Iterate through all triggered pages and mark their files as
646 r
= sigbus_pop(&addr
);
647 if (_likely_(r
== 0))
650 log_error_errno(r
, "SIGBUS handling failed: %m");
655 HASHMAP_FOREACH(f
, m
->fds
, i
) {
658 LIST_FOREACH(by_fd
, w
, f
->windows
) {
659 if ((uint8_t*) addr
>= (uint8_t*) w
->ptr
&&
660 (uint8_t*) addr
< (uint8_t*) w
->ptr
+ w
->size
) {
661 found
= ours
= f
->sigbus
= true;
670 /* Didn't find a matching window, give up */
672 log_error("Unknown SIGBUS page, aborting.");
677 /* The list of triggered pages is now empty. Now, let's remap
678 * all windows of the triggered file to anonymous maps, so
679 * that no page of the file in question is triggered again, so
680 * that we can be sure not to hit the queue size limit. */
681 if (_likely_(!found
))
684 HASHMAP_FOREACH(f
, m
->fds
, i
) {
690 LIST_FOREACH(by_fd
, w
, f
->windows
)
691 window_invalidate(w
);
695 bool mmap_cache_got_sigbus(MMapCache
*m
, int fd
) {
701 mmap_cache_process_sigbus(m
);
703 f
= hashmap_get(m
->fds
, FD_TO_PTR(fd
));
710 void mmap_cache_close_fd(MMapCache
*m
, int fd
) {
716 /* Make sure that any queued SIGBUS are first dispatched, so
717 * that we don't end up with a SIGBUS entry we cannot relate
718 * to any existing memory map */
720 mmap_cache_process_sigbus(m
);
722 f
= hashmap_get(m
->fds
, FD_TO_PTR(fd
));