]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/mmap-cache.c
journal: make mmap_cache_unref() a NOP when NULL is passed, like all other destructors
[thirdparty/systemd.git] / src / journal / mmap-cache.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2012 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <stdlib.h>
24 #include <sys/mman.h>
25
26 #include "alloc-util.h"
27 #include "fd-util.h"
28 #include "hashmap.h"
29 #include "list.h"
30 #include "log.h"
31 #include "macro.h"
32 #include "mmap-cache.h"
33 #include "sigbus.h"
34 #include "util.h"
35
36 typedef struct Window Window;
37 typedef struct Context Context;
38 typedef struct FileDescriptor FileDescriptor;
39
40 struct Window {
41 MMapCache *cache;
42
43 bool invalidated;
44 bool keep_always;
45 bool in_unused;
46
47 int prot;
48 void *ptr;
49 uint64_t offset;
50 size_t size;
51
52 FileDescriptor *fd;
53
54 LIST_FIELDS(Window, by_fd);
55 LIST_FIELDS(Window, unused);
56
57 LIST_HEAD(Context, contexts);
58 };
59
60 struct Context {
61 MMapCache *cache;
62 unsigned id;
63 Window *window;
64
65 LIST_FIELDS(Context, by_window);
66 };
67
68 struct FileDescriptor {
69 MMapCache *cache;
70 int fd;
71 bool sigbus;
72 LIST_HEAD(Window, windows);
73 };
74
75 struct MMapCache {
76 int n_ref;
77 unsigned n_windows;
78
79 unsigned n_hit, n_missed;
80
81
82 Hashmap *fds;
83 Context *contexts[MMAP_CACHE_MAX_CONTEXTS];
84
85 LIST_HEAD(Window, unused);
86 Window *last_unused;
87 };
88
89 #define WINDOWS_MIN 64
90
91 #ifdef ENABLE_DEBUG_MMAP_CACHE
92 /* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
93 # define WINDOW_SIZE (page_size())
94 #else
95 # define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
96 #endif
97
98 MMapCache* mmap_cache_new(void) {
99 MMapCache *m;
100
101 m = new0(MMapCache, 1);
102 if (!m)
103 return NULL;
104
105 m->n_ref = 1;
106 return m;
107 }
108
109 MMapCache* mmap_cache_ref(MMapCache *m) {
110 assert(m);
111 assert(m->n_ref > 0);
112
113 m->n_ref ++;
114 return m;
115 }
116
117 static void window_unlink(Window *w) {
118 Context *c;
119
120 assert(w);
121
122 if (w->ptr)
123 munmap(w->ptr, w->size);
124
125 if (w->fd)
126 LIST_REMOVE(by_fd, w->fd->windows, w);
127
128 if (w->in_unused) {
129 if (w->cache->last_unused == w)
130 w->cache->last_unused = w->unused_prev;
131
132 LIST_REMOVE(unused, w->cache->unused, w);
133 }
134
135 LIST_FOREACH(by_window, c, w->contexts) {
136 assert(c->window == w);
137 c->window = NULL;
138 }
139 }
140
141 static void window_invalidate(Window *w) {
142 assert(w);
143
144 if (w->invalidated)
145 return;
146
147 /* Replace the window with anonymous pages. This is useful
148 * when we hit a SIGBUS and want to make sure the file cannot
149 * trigger any further SIGBUS, possibly overrunning the sigbus
150 * queue. */
151
152 assert_se(mmap(w->ptr, w->size, w->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
153 w->invalidated = true;
154 }
155
156 static void window_free(Window *w) {
157 assert(w);
158
159 window_unlink(w);
160 w->cache->n_windows--;
161 free(w);
162 }
163
164 _pure_ static bool window_matches(Window *w, int fd, int prot, uint64_t offset, size_t size) {
165 assert(w);
166 assert(fd >= 0);
167 assert(size > 0);
168
169 return
170 w->fd &&
171 fd == w->fd->fd &&
172 prot == w->prot &&
173 offset >= w->offset &&
174 offset + size <= w->offset + w->size;
175 }
176
177 static Window *window_add(MMapCache *m) {
178 Window *w;
179
180 assert(m);
181
182 if (!m->last_unused || m->n_windows <= WINDOWS_MIN) {
183
184 /* Allocate a new window */
185 w = new0(Window, 1);
186 if (!w)
187 return NULL;
188 m->n_windows++;
189 } else {
190
191 /* Reuse an existing one */
192 w = m->last_unused;
193 window_unlink(w);
194 zero(*w);
195 }
196
197 w->cache = m;
198 return w;
199 }
200
201 static void context_detach_window(Context *c) {
202 Window *w;
203
204 assert(c);
205
206 if (!c->window)
207 return;
208
209 w = c->window;
210 c->window = NULL;
211 LIST_REMOVE(by_window, w->contexts, c);
212
213 if (!w->contexts && !w->keep_always) {
214 /* Not used anymore? */
215 #ifdef ENABLE_DEBUG_MMAP_CACHE
216 /* Unmap unused windows immediately to expose use-after-unmap
217 * by SIGSEGV. */
218 window_free(w);
219 #else
220 LIST_PREPEND(unused, c->cache->unused, w);
221 if (!c->cache->last_unused)
222 c->cache->last_unused = w;
223
224 w->in_unused = true;
225 #endif
226 }
227 }
228
229 static void context_attach_window(Context *c, Window *w) {
230 assert(c);
231 assert(w);
232
233 if (c->window == w)
234 return;
235
236 context_detach_window(c);
237
238 if (w->in_unused) {
239 /* Used again? */
240 LIST_REMOVE(unused, c->cache->unused, w);
241 if (c->cache->last_unused == w)
242 c->cache->last_unused = w->unused_prev;
243
244 w->in_unused = false;
245 }
246
247 c->window = w;
248 LIST_PREPEND(by_window, w->contexts, c);
249 }
250
251 static Context *context_add(MMapCache *m, unsigned id) {
252 Context *c;
253
254 assert(m);
255
256 c = m->contexts[id];
257 if (c)
258 return c;
259
260 c = new0(Context, 1);
261 if (!c)
262 return NULL;
263
264 c->cache = m;
265 c->id = id;
266
267 assert(!m->contexts[id]);
268 m->contexts[id] = c;
269
270 return c;
271 }
272
273 static void context_free(Context *c) {
274 assert(c);
275
276 context_detach_window(c);
277
278 if (c->cache) {
279 assert(c->cache->contexts[c->id] == c);
280 c->cache->contexts[c->id] = NULL;
281 }
282
283 free(c);
284 }
285
286 static void fd_free(FileDescriptor *f) {
287 assert(f);
288
289 while (f->windows)
290 window_free(f->windows);
291
292 if (f->cache)
293 assert_se(hashmap_remove(f->cache->fds, FD_TO_PTR(f->fd)));
294
295 free(f);
296 }
297
298 static FileDescriptor* fd_add(MMapCache *m, int fd) {
299 FileDescriptor *f;
300 int r;
301
302 assert(m);
303 assert(fd >= 0);
304
305 f = hashmap_get(m->fds, FD_TO_PTR(fd));
306 if (f)
307 return f;
308
309 r = hashmap_ensure_allocated(&m->fds, NULL);
310 if (r < 0)
311 return NULL;
312
313 f = new0(FileDescriptor, 1);
314 if (!f)
315 return NULL;
316
317 f->cache = m;
318 f->fd = fd;
319
320 r = hashmap_put(m->fds, FD_TO_PTR(fd), f);
321 if (r < 0) {
322 free(f);
323 return NULL;
324 }
325
326 return f;
327 }
328
329 static void mmap_cache_free(MMapCache *m) {
330 FileDescriptor *f;
331 int i;
332
333 assert(m);
334
335 for (i = 0; i < MMAP_CACHE_MAX_CONTEXTS; i++)
336 if (m->contexts[i])
337 context_free(m->contexts[i]);
338
339 while ((f = hashmap_first(m->fds)))
340 fd_free(f);
341
342 hashmap_free(m->fds);
343
344 while (m->unused)
345 window_free(m->unused);
346
347 free(m);
348 }
349
350 MMapCache* mmap_cache_unref(MMapCache *m) {
351
352 if (!m)
353 return NULL;
354
355 assert(m->n_ref > 0);
356
357 m->n_ref --;
358 if (m->n_ref == 0)
359 mmap_cache_free(m);
360
361 return NULL;
362 }
363
364 static int make_room(MMapCache *m) {
365 assert(m);
366
367 if (!m->last_unused)
368 return 0;
369
370 window_free(m->last_unused);
371 return 1;
372 }
373
374 static int try_context(
375 MMapCache *m,
376 int fd,
377 int prot,
378 unsigned context,
379 bool keep_always,
380 uint64_t offset,
381 size_t size,
382 void **ret) {
383
384 Context *c;
385
386 assert(m);
387 assert(m->n_ref > 0);
388 assert(fd >= 0);
389 assert(size > 0);
390 assert(ret);
391
392 c = m->contexts[context];
393 if (!c)
394 return 0;
395
396 assert(c->id == context);
397
398 if (!c->window)
399 return 0;
400
401 if (!window_matches(c->window, fd, prot, offset, size)) {
402
403 /* Drop the reference to the window, since it's unnecessary now */
404 context_detach_window(c);
405 return 0;
406 }
407
408 if (c->window->fd->sigbus)
409 return -EIO;
410
411 c->window->keep_always |= keep_always;
412
413 *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset);
414 return 1;
415 }
416
417 static int find_mmap(
418 MMapCache *m,
419 int fd,
420 int prot,
421 unsigned context,
422 bool keep_always,
423 uint64_t offset,
424 size_t size,
425 void **ret) {
426
427 FileDescriptor *f;
428 Window *w;
429 Context *c;
430
431 assert(m);
432 assert(m->n_ref > 0);
433 assert(fd >= 0);
434 assert(size > 0);
435
436 f = hashmap_get(m->fds, FD_TO_PTR(fd));
437 if (!f)
438 return 0;
439
440 assert(f->fd == fd);
441
442 if (f->sigbus)
443 return -EIO;
444
445 LIST_FOREACH(by_fd, w, f->windows)
446 if (window_matches(w, fd, prot, offset, size))
447 break;
448
449 if (!w)
450 return 0;
451
452 c = context_add(m, context);
453 if (!c)
454 return -ENOMEM;
455
456 context_attach_window(c, w);
457 w->keep_always += keep_always;
458
459 *ret = (uint8_t*) w->ptr + (offset - w->offset);
460 return 1;
461 }
462
463 static int add_mmap(
464 MMapCache *m,
465 int fd,
466 int prot,
467 unsigned context,
468 bool keep_always,
469 uint64_t offset,
470 size_t size,
471 struct stat *st,
472 void **ret) {
473
474 uint64_t woffset, wsize;
475 Context *c;
476 FileDescriptor *f;
477 Window *w;
478 void *d;
479 int r;
480
481 assert(m);
482 assert(m->n_ref > 0);
483 assert(fd >= 0);
484 assert(size > 0);
485 assert(ret);
486
487 woffset = offset & ~((uint64_t) page_size() - 1ULL);
488 wsize = size + (offset - woffset);
489 wsize = PAGE_ALIGN(wsize);
490
491 if (wsize < WINDOW_SIZE) {
492 uint64_t delta;
493
494 delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
495
496 if (delta > offset)
497 woffset = 0;
498 else
499 woffset -= delta;
500
501 wsize = WINDOW_SIZE;
502 }
503
504 if (st) {
505 /* Memory maps that are larger then the files
506 underneath have undefined behavior. Hence, clamp
507 things to the file size if we know it */
508
509 if (woffset >= (uint64_t) st->st_size)
510 return -EADDRNOTAVAIL;
511
512 if (woffset + wsize > (uint64_t) st->st_size)
513 wsize = PAGE_ALIGN(st->st_size - woffset);
514 }
515
516 for (;;) {
517 d = mmap(NULL, wsize, prot, MAP_SHARED, fd, woffset);
518 if (d != MAP_FAILED)
519 break;
520 if (errno != ENOMEM)
521 return -errno;
522
523 r = make_room(m);
524 if (r < 0)
525 return r;
526 if (r == 0)
527 return -ENOMEM;
528 }
529
530 c = context_add(m, context);
531 if (!c)
532 goto outofmem;
533
534 f = fd_add(m, fd);
535 if (!f)
536 goto outofmem;
537
538 w = window_add(m);
539 if (!w)
540 goto outofmem;
541
542 w->keep_always = keep_always;
543 w->ptr = d;
544 w->offset = woffset;
545 w->prot = prot;
546 w->size = wsize;
547 w->fd = f;
548
549 LIST_PREPEND(by_fd, f->windows, w);
550
551 context_detach_window(c);
552 c->window = w;
553 LIST_PREPEND(by_window, w->contexts, c);
554
555 *ret = (uint8_t*) w->ptr + (offset - w->offset);
556 return 1;
557
558 outofmem:
559 munmap(d, wsize);
560 return -ENOMEM;
561 }
562
563 int mmap_cache_get(
564 MMapCache *m,
565 int fd,
566 int prot,
567 unsigned context,
568 bool keep_always,
569 uint64_t offset,
570 size_t size,
571 struct stat *st,
572 void **ret) {
573
574 int r;
575
576 assert(m);
577 assert(m->n_ref > 0);
578 assert(fd >= 0);
579 assert(size > 0);
580 assert(ret);
581 assert(context < MMAP_CACHE_MAX_CONTEXTS);
582
583 /* Check whether the current context is the right one already */
584 r = try_context(m, fd, prot, context, keep_always, offset, size, ret);
585 if (r != 0) {
586 m->n_hit ++;
587 return r;
588 }
589
590 /* Search for a matching mmap */
591 r = find_mmap(m, fd, prot, context, keep_always, offset, size, ret);
592 if (r != 0) {
593 m->n_hit ++;
594 return r;
595 }
596
597 m->n_missed++;
598
599 /* Create a new mmap */
600 return add_mmap(m, fd, prot, context, keep_always, offset, size, st, ret);
601 }
602
603 unsigned mmap_cache_get_hit(MMapCache *m) {
604 assert(m);
605
606 return m->n_hit;
607 }
608
609 unsigned mmap_cache_get_missed(MMapCache *m) {
610 assert(m);
611
612 return m->n_missed;
613 }
614
615 static void mmap_cache_process_sigbus(MMapCache *m) {
616 bool found = false;
617 FileDescriptor *f;
618 Iterator i;
619 int r;
620
621 assert(m);
622
623 /* Iterate through all triggered pages and mark their files as
624 * invalidated */
625 for (;;) {
626 bool ours;
627 void *addr;
628
629 r = sigbus_pop(&addr);
630 if (_likely_(r == 0))
631 break;
632 if (r < 0) {
633 log_error_errno(r, "SIGBUS handling failed: %m");
634 abort();
635 }
636
637 ours = false;
638 HASHMAP_FOREACH(f, m->fds, i) {
639 Window *w;
640
641 LIST_FOREACH(by_fd, w, f->windows) {
642 if ((uint8_t*) addr >= (uint8_t*) w->ptr &&
643 (uint8_t*) addr < (uint8_t*) w->ptr + w->size) {
644 found = ours = f->sigbus = true;
645 break;
646 }
647 }
648
649 if (ours)
650 break;
651 }
652
653 /* Didn't find a matching window, give up */
654 if (!ours) {
655 log_error("Unknown SIGBUS page, aborting.");
656 abort();
657 }
658 }
659
660 /* The list of triggered pages is now empty. Now, let's remap
661 * all windows of the triggered file to anonymous maps, so
662 * that no page of the file in question is triggered again, so
663 * that we can be sure not to hit the queue size limit. */
664 if (_likely_(!found))
665 return;
666
667 HASHMAP_FOREACH(f, m->fds, i) {
668 Window *w;
669
670 if (!f->sigbus)
671 continue;
672
673 LIST_FOREACH(by_fd, w, f->windows)
674 window_invalidate(w);
675 }
676 }
677
678 bool mmap_cache_got_sigbus(MMapCache *m, int fd) {
679 FileDescriptor *f;
680
681 assert(m);
682 assert(fd >= 0);
683
684 mmap_cache_process_sigbus(m);
685
686 f = hashmap_get(m->fds, FD_TO_PTR(fd));
687 if (!f)
688 return false;
689
690 return f->sigbus;
691 }
692
693 void mmap_cache_close_fd(MMapCache *m, int fd) {
694 FileDescriptor *f;
695
696 assert(m);
697 assert(fd >= 0);
698
699 /* Make sure that any queued SIGBUS are first dispatched, so
700 * that we don't end up with a SIGBUS entry we cannot relate
701 * to any existing memory map */
702
703 mmap_cache_process_sigbus(m);
704
705 f = hashmap_get(m->fds, FD_TO_PTR(fd));
706 if (!f)
707 return;
708
709 fd_free(f);
710 }