]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/mmap-cache.c
remove unused includes
[thirdparty/systemd.git] / src / journal / mmap-cache.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2012 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <stdlib.h>
24 #include <sys/mman.h>
25
26 #include "hashmap.h"
27 #include "list.h"
28 #include "log.h"
29 #include "util.h"
30 #include "macro.h"
31 #include "sigbus.h"
32 #include "mmap-cache.h"
33
34 typedef struct Window Window;
35 typedef struct Context Context;
36 typedef struct FileDescriptor FileDescriptor;
37
38 struct Window {
39 MMapCache *cache;
40
41 bool invalidated;
42 bool keep_always;
43 bool in_unused;
44
45 int prot;
46 void *ptr;
47 uint64_t offset;
48 size_t size;
49
50 FileDescriptor *fd;
51
52 LIST_FIELDS(Window, by_fd);
53 LIST_FIELDS(Window, unused);
54
55 LIST_HEAD(Context, contexts);
56 };
57
58 struct Context {
59 MMapCache *cache;
60 unsigned id;
61 Window *window;
62
63 LIST_FIELDS(Context, by_window);
64 };
65
66 struct FileDescriptor {
67 MMapCache *cache;
68 int fd;
69 bool sigbus;
70 LIST_HEAD(Window, windows);
71 };
72
73 struct MMapCache {
74 int n_ref;
75 unsigned n_windows;
76
77 unsigned n_hit, n_missed;
78
79
80 Hashmap *fds;
81 Context *contexts[MMAP_CACHE_MAX_CONTEXTS];
82
83 LIST_HEAD(Window, unused);
84 Window *last_unused;
85 };
86
87 #define WINDOWS_MIN 64
88
89 #ifdef ENABLE_DEBUG_MMAP_CACHE
90 /* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
91 # define WINDOW_SIZE (page_size())
92 #else
93 # define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
94 #endif
95
96 MMapCache* mmap_cache_new(void) {
97 MMapCache *m;
98
99 m = new0(MMapCache, 1);
100 if (!m)
101 return NULL;
102
103 m->n_ref = 1;
104 return m;
105 }
106
107 MMapCache* mmap_cache_ref(MMapCache *m) {
108 assert(m);
109 assert(m->n_ref > 0);
110
111 m->n_ref ++;
112 return m;
113 }
114
115 static void window_unlink(Window *w) {
116 Context *c;
117
118 assert(w);
119
120 if (w->ptr)
121 munmap(w->ptr, w->size);
122
123 if (w->fd)
124 LIST_REMOVE(by_fd, w->fd->windows, w);
125
126 if (w->in_unused) {
127 if (w->cache->last_unused == w)
128 w->cache->last_unused = w->unused_prev;
129
130 LIST_REMOVE(unused, w->cache->unused, w);
131 }
132
133 LIST_FOREACH(by_window, c, w->contexts) {
134 assert(c->window == w);
135 c->window = NULL;
136 }
137 }
138
139 static void window_invalidate(Window *w) {
140 assert(w);
141
142 if (w->invalidated)
143 return;
144
145 /* Replace the window with anonymous pages. This is useful
146 * when we hit a SIGBUS and want to make sure the file cannot
147 * trigger any further SIGBUS, possibly overrunning the sigbus
148 * queue. */
149
150 assert_se(mmap(w->ptr, w->size, w->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
151 w->invalidated = true;
152 }
153
154 static void window_free(Window *w) {
155 assert(w);
156
157 window_unlink(w);
158 w->cache->n_windows--;
159 free(w);
160 }
161
162 _pure_ static bool window_matches(Window *w, int fd, int prot, uint64_t offset, size_t size) {
163 assert(w);
164 assert(fd >= 0);
165 assert(size > 0);
166
167 return
168 w->fd &&
169 fd == w->fd->fd &&
170 prot == w->prot &&
171 offset >= w->offset &&
172 offset + size <= w->offset + w->size;
173 }
174
175 static Window *window_add(MMapCache *m) {
176 Window *w;
177
178 assert(m);
179
180 if (!m->last_unused || m->n_windows <= WINDOWS_MIN) {
181
182 /* Allocate a new window */
183 w = new0(Window, 1);
184 if (!w)
185 return NULL;
186 m->n_windows++;
187 } else {
188
189 /* Reuse an existing one */
190 w = m->last_unused;
191 window_unlink(w);
192 zero(*w);
193 }
194
195 w->cache = m;
196 return w;
197 }
198
199 static void context_detach_window(Context *c) {
200 Window *w;
201
202 assert(c);
203
204 if (!c->window)
205 return;
206
207 w = c->window;
208 c->window = NULL;
209 LIST_REMOVE(by_window, w->contexts, c);
210
211 if (!w->contexts && !w->keep_always) {
212 /* Not used anymore? */
213 #ifdef ENABLE_DEBUG_MMAP_CACHE
214 /* Unmap unused windows immediately to expose use-after-unmap
215 * by SIGSEGV. */
216 window_free(w);
217 #else
218 LIST_PREPEND(unused, c->cache->unused, w);
219 if (!c->cache->last_unused)
220 c->cache->last_unused = w;
221
222 w->in_unused = true;
223 #endif
224 }
225 }
226
227 static void context_attach_window(Context *c, Window *w) {
228 assert(c);
229 assert(w);
230
231 if (c->window == w)
232 return;
233
234 context_detach_window(c);
235
236 if (w->in_unused) {
237 /* Used again? */
238 LIST_REMOVE(unused, c->cache->unused, w);
239 if (c->cache->last_unused == w)
240 c->cache->last_unused = w->unused_prev;
241
242 w->in_unused = false;
243 }
244
245 c->window = w;
246 LIST_PREPEND(by_window, w->contexts, c);
247 }
248
249 static Context *context_add(MMapCache *m, unsigned id) {
250 Context *c;
251
252 assert(m);
253
254 c = m->contexts[id];
255 if (c)
256 return c;
257
258 c = new0(Context, 1);
259 if (!c)
260 return NULL;
261
262 c->cache = m;
263 c->id = id;
264
265 assert(!m->contexts[id]);
266 m->contexts[id] = c;
267
268 return c;
269 }
270
271 static void context_free(Context *c) {
272 assert(c);
273
274 context_detach_window(c);
275
276 if (c->cache) {
277 assert(c->cache->contexts[c->id] == c);
278 c->cache->contexts[c->id] = NULL;
279 }
280
281 free(c);
282 }
283
284 static void fd_free(FileDescriptor *f) {
285 assert(f);
286
287 while (f->windows)
288 window_free(f->windows);
289
290 if (f->cache)
291 assert_se(hashmap_remove(f->cache->fds, INT_TO_PTR(f->fd + 1)));
292
293 free(f);
294 }
295
296 static FileDescriptor* fd_add(MMapCache *m, int fd) {
297 FileDescriptor *f;
298 int r;
299
300 assert(m);
301 assert(fd >= 0);
302
303 f = hashmap_get(m->fds, INT_TO_PTR(fd + 1));
304 if (f)
305 return f;
306
307 r = hashmap_ensure_allocated(&m->fds, NULL);
308 if (r < 0)
309 return NULL;
310
311 f = new0(FileDescriptor, 1);
312 if (!f)
313 return NULL;
314
315 f->cache = m;
316 f->fd = fd;
317
318 r = hashmap_put(m->fds, UINT_TO_PTR(fd + 1), f);
319 if (r < 0) {
320 free(f);
321 return NULL;
322 }
323
324 return f;
325 }
326
327 static void mmap_cache_free(MMapCache *m) {
328 FileDescriptor *f;
329 int i;
330
331 assert(m);
332
333 for (i = 0; i < MMAP_CACHE_MAX_CONTEXTS; i++)
334 if (m->contexts[i])
335 context_free(m->contexts[i]);
336
337 while ((f = hashmap_first(m->fds)))
338 fd_free(f);
339
340 hashmap_free(m->fds);
341
342 while (m->unused)
343 window_free(m->unused);
344
345 free(m);
346 }
347
348 MMapCache* mmap_cache_unref(MMapCache *m) {
349 assert(m);
350 assert(m->n_ref > 0);
351
352 m->n_ref --;
353 if (m->n_ref == 0)
354 mmap_cache_free(m);
355
356 return NULL;
357 }
358
359 static int make_room(MMapCache *m) {
360 assert(m);
361
362 if (!m->last_unused)
363 return 0;
364
365 window_free(m->last_unused);
366 return 1;
367 }
368
369 static int try_context(
370 MMapCache *m,
371 int fd,
372 int prot,
373 unsigned context,
374 bool keep_always,
375 uint64_t offset,
376 size_t size,
377 void **ret) {
378
379 Context *c;
380
381 assert(m);
382 assert(m->n_ref > 0);
383 assert(fd >= 0);
384 assert(size > 0);
385 assert(ret);
386
387 c = m->contexts[context];
388 if (!c)
389 return 0;
390
391 assert(c->id == context);
392
393 if (!c->window)
394 return 0;
395
396 if (!window_matches(c->window, fd, prot, offset, size)) {
397
398 /* Drop the reference to the window, since it's unnecessary now */
399 context_detach_window(c);
400 return 0;
401 }
402
403 if (c->window->fd->sigbus)
404 return -EIO;
405
406 c->window->keep_always |= keep_always;
407
408 *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset);
409 return 1;
410 }
411
412 static int find_mmap(
413 MMapCache *m,
414 int fd,
415 int prot,
416 unsigned context,
417 bool keep_always,
418 uint64_t offset,
419 size_t size,
420 void **ret) {
421
422 FileDescriptor *f;
423 Window *w;
424 Context *c;
425
426 assert(m);
427 assert(m->n_ref > 0);
428 assert(fd >= 0);
429 assert(size > 0);
430
431 f = hashmap_get(m->fds, INT_TO_PTR(fd + 1));
432 if (!f)
433 return 0;
434
435 assert(f->fd == fd);
436
437 if (f->sigbus)
438 return -EIO;
439
440 LIST_FOREACH(by_fd, w, f->windows)
441 if (window_matches(w, fd, prot, offset, size))
442 break;
443
444 if (!w)
445 return 0;
446
447 c = context_add(m, context);
448 if (!c)
449 return -ENOMEM;
450
451 context_attach_window(c, w);
452 w->keep_always += keep_always;
453
454 *ret = (uint8_t*) w->ptr + (offset - w->offset);
455 return 1;
456 }
457
458 static int add_mmap(
459 MMapCache *m,
460 int fd,
461 int prot,
462 unsigned context,
463 bool keep_always,
464 uint64_t offset,
465 size_t size,
466 struct stat *st,
467 void **ret) {
468
469 uint64_t woffset, wsize;
470 Context *c;
471 FileDescriptor *f;
472 Window *w;
473 void *d;
474 int r;
475
476 assert(m);
477 assert(m->n_ref > 0);
478 assert(fd >= 0);
479 assert(size > 0);
480 assert(ret);
481
482 woffset = offset & ~((uint64_t) page_size() - 1ULL);
483 wsize = size + (offset - woffset);
484 wsize = PAGE_ALIGN(wsize);
485
486 if (wsize < WINDOW_SIZE) {
487 uint64_t delta;
488
489 delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
490
491 if (delta > offset)
492 woffset = 0;
493 else
494 woffset -= delta;
495
496 wsize = WINDOW_SIZE;
497 }
498
499 if (st) {
500 /* Memory maps that are larger then the files
501 underneath have undefined behavior. Hence, clamp
502 things to the file size if we know it */
503
504 if (woffset >= (uint64_t) st->st_size)
505 return -EADDRNOTAVAIL;
506
507 if (woffset + wsize > (uint64_t) st->st_size)
508 wsize = PAGE_ALIGN(st->st_size - woffset);
509 }
510
511 for (;;) {
512 d = mmap(NULL, wsize, prot, MAP_SHARED, fd, woffset);
513 if (d != MAP_FAILED)
514 break;
515 if (errno != ENOMEM)
516 return -errno;
517
518 r = make_room(m);
519 if (r < 0)
520 return r;
521 if (r == 0)
522 return -ENOMEM;
523 }
524
525 c = context_add(m, context);
526 if (!c)
527 goto outofmem;
528
529 f = fd_add(m, fd);
530 if (!f)
531 goto outofmem;
532
533 w = window_add(m);
534 if (!w)
535 goto outofmem;
536
537 w->keep_always = keep_always;
538 w->ptr = d;
539 w->offset = woffset;
540 w->prot = prot;
541 w->size = wsize;
542 w->fd = f;
543
544 LIST_PREPEND(by_fd, f->windows, w);
545
546 context_detach_window(c);
547 c->window = w;
548 LIST_PREPEND(by_window, w->contexts, c);
549
550 *ret = (uint8_t*) w->ptr + (offset - w->offset);
551 return 1;
552
553 outofmem:
554 munmap(d, wsize);
555 return -ENOMEM;
556 }
557
558 int mmap_cache_get(
559 MMapCache *m,
560 int fd,
561 int prot,
562 unsigned context,
563 bool keep_always,
564 uint64_t offset,
565 size_t size,
566 struct stat *st,
567 void **ret) {
568
569 int r;
570
571 assert(m);
572 assert(m->n_ref > 0);
573 assert(fd >= 0);
574 assert(size > 0);
575 assert(ret);
576 assert(context < MMAP_CACHE_MAX_CONTEXTS);
577
578 /* Check whether the current context is the right one already */
579 r = try_context(m, fd, prot, context, keep_always, offset, size, ret);
580 if (r != 0) {
581 m->n_hit ++;
582 return r;
583 }
584
585 /* Search for a matching mmap */
586 r = find_mmap(m, fd, prot, context, keep_always, offset, size, ret);
587 if (r != 0) {
588 m->n_hit ++;
589 return r;
590 }
591
592 m->n_missed++;
593
594 /* Create a new mmap */
595 return add_mmap(m, fd, prot, context, keep_always, offset, size, st, ret);
596 }
597
598 unsigned mmap_cache_get_hit(MMapCache *m) {
599 assert(m);
600
601 return m->n_hit;
602 }
603
604 unsigned mmap_cache_get_missed(MMapCache *m) {
605 assert(m);
606
607 return m->n_missed;
608 }
609
610 static void mmap_cache_process_sigbus(MMapCache *m) {
611 bool found = false;
612 FileDescriptor *f;
613 Iterator i;
614 int r;
615
616 assert(m);
617
618 /* Iterate through all triggered pages and mark their files as
619 * invalidated */
620 for (;;) {
621 bool ours;
622 void *addr;
623
624 r = sigbus_pop(&addr);
625 if (_likely_(r == 0))
626 break;
627 if (r < 0) {
628 log_error_errno(r, "SIGBUS handling failed: %m");
629 abort();
630 }
631
632 ours = false;
633 HASHMAP_FOREACH(f, m->fds, i) {
634 Window *w;
635
636 LIST_FOREACH(by_fd, w, f->windows) {
637 if ((uint8_t*) addr >= (uint8_t*) w->ptr &&
638 (uint8_t*) addr < (uint8_t*) w->ptr + w->size) {
639 found = ours = f->sigbus = true;
640 break;
641 }
642 }
643
644 if (ours)
645 break;
646 }
647
648 /* Didn't find a matching window, give up */
649 if (!ours) {
650 log_error("Unknown SIGBUS page, aborting.");
651 abort();
652 }
653 }
654
655 /* The list of triggered pages is now empty. Now, let's remap
656 * all windows of the triggered file to anonymous maps, so
657 * that no page of the file in question is triggered again, so
658 * that we can be sure not to hit the queue size limit. */
659 if (_likely_(!found))
660 return;
661
662 HASHMAP_FOREACH(f, m->fds, i) {
663 Window *w;
664
665 if (!f->sigbus)
666 continue;
667
668 LIST_FOREACH(by_fd, w, f->windows)
669 window_invalidate(w);
670 }
671 }
672
673 bool mmap_cache_got_sigbus(MMapCache *m, int fd) {
674 FileDescriptor *f;
675
676 assert(m);
677 assert(fd >= 0);
678
679 mmap_cache_process_sigbus(m);
680
681 f = hashmap_get(m->fds, INT_TO_PTR(fd + 1));
682 if (!f)
683 return false;
684
685 return f->sigbus;
686 }
687
688 void mmap_cache_close_fd(MMapCache *m, int fd) {
689 FileDescriptor *f;
690
691 assert(m);
692 assert(fd >= 0);
693
694 /* Make sure that any queued SIGBUS are first dispatched, so
695 * that we don't end up with a SIGBUS entry we cannot relate
696 * to any existing memory map */
697
698 mmap_cache_process_sigbus(m);
699
700 f = hashmap_get(m->fds, INT_TO_PTR(fd + 1));
701 if (!f)
702 return;
703
704 fd_free(f);
705 }