]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/mmap-cache.c
journal: elide fd matching from window_matches() (#6340)
[thirdparty/systemd.git] / src / journal / mmap-cache.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2012 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <errno.h>
21 #include <stdlib.h>
22 #include <sys/mman.h>
23
24 #include "alloc-util.h"
25 #include "fd-util.h"
26 #include "hashmap.h"
27 #include "list.h"
28 #include "log.h"
29 #include "macro.h"
30 #include "mmap-cache.h"
31 #include "sigbus.h"
32 #include "util.h"
33
34 typedef struct Window Window;
35 typedef struct Context Context;
36
37 struct Window {
38 MMapCache *cache;
39
40 bool invalidated:1;
41 bool keep_always:1;
42 bool in_unused:1;
43
44 int prot;
45 void *ptr;
46 uint64_t offset;
47 size_t size;
48
49 MMapFileDescriptor *fd;
50
51 LIST_FIELDS(Window, by_fd);
52 LIST_FIELDS(Window, unused);
53
54 LIST_HEAD(Context, contexts);
55 };
56
57 struct Context {
58 MMapCache *cache;
59 unsigned id;
60 Window *window;
61
62 LIST_FIELDS(Context, by_window);
63 };
64
65 struct MMapFileDescriptor {
66 MMapCache *cache;
67 int fd;
68 bool sigbus;
69 LIST_HEAD(Window, windows);
70 };
71
72 struct MMapCache {
73 int n_ref;
74 unsigned n_windows;
75
76 unsigned n_hit, n_missed;
77
78 Hashmap *fds;
79 Context *contexts[MMAP_CACHE_MAX_CONTEXTS];
80
81 LIST_HEAD(Window, unused);
82 Window *last_unused;
83 };
84
85 #define WINDOWS_MIN 64
86
87 #ifdef ENABLE_DEBUG_MMAP_CACHE
88 /* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
89 # define WINDOW_SIZE (page_size())
90 #else
91 # define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
92 #endif
93
94 MMapCache* mmap_cache_new(void) {
95 MMapCache *m;
96
97 m = new0(MMapCache, 1);
98 if (!m)
99 return NULL;
100
101 m->n_ref = 1;
102 return m;
103 }
104
105 MMapCache* mmap_cache_ref(MMapCache *m) {
106 assert(m);
107 assert(m->n_ref > 0);
108
109 m->n_ref++;
110 return m;
111 }
112
113 static void window_unlink(Window *w) {
114 Context *c;
115
116 assert(w);
117
118 if (w->ptr)
119 munmap(w->ptr, w->size);
120
121 if (w->fd)
122 LIST_REMOVE(by_fd, w->fd->windows, w);
123
124 if (w->in_unused) {
125 if (w->cache->last_unused == w)
126 w->cache->last_unused = w->unused_prev;
127
128 LIST_REMOVE(unused, w->cache->unused, w);
129 }
130
131 LIST_FOREACH(by_window, c, w->contexts) {
132 assert(c->window == w);
133 c->window = NULL;
134 }
135 }
136
137 static void window_invalidate(Window *w) {
138 assert(w);
139
140 if (w->invalidated)
141 return;
142
143 /* Replace the window with anonymous pages. This is useful
144 * when we hit a SIGBUS and want to make sure the file cannot
145 * trigger any further SIGBUS, possibly overrunning the sigbus
146 * queue. */
147
148 assert_se(mmap(w->ptr, w->size, w->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
149 w->invalidated = true;
150 }
151
152 static void window_free(Window *w) {
153 assert(w);
154
155 window_unlink(w);
156 w->cache->n_windows--;
157 free(w);
158 }
159
160 _pure_ static inline bool window_matches(Window *w, int prot, uint64_t offset, size_t size) {
161 assert(w);
162 assert(size > 0);
163
164 return
165 prot == w->prot &&
166 offset >= w->offset &&
167 offset + size <= w->offset + w->size;
168 }
169
170 _pure_ static bool window_matches_fd(Window *w, MMapFileDescriptor *f, int prot, uint64_t offset, size_t size) {
171 assert(w);
172 assert(f);
173
174 return
175 w->fd &&
176 f->fd == w->fd->fd &&
177 window_matches(w, prot, offset, size);
178 }
179
180 static Window *window_add(MMapCache *m, MMapFileDescriptor *f, int prot, bool keep_always, uint64_t offset, size_t size, void *ptr) {
181 Window *w;
182
183 assert(m);
184 assert(f);
185
186 if (!m->last_unused || m->n_windows <= WINDOWS_MIN) {
187
188 /* Allocate a new window */
189 w = new0(Window, 1);
190 if (!w)
191 return NULL;
192 m->n_windows++;
193 } else {
194
195 /* Reuse an existing one */
196 w = m->last_unused;
197 window_unlink(w);
198 zero(*w);
199 }
200
201 w->cache = m;
202 w->fd = f;
203 w->prot = prot;
204 w->keep_always = keep_always;
205 w->offset = offset;
206 w->size = size;
207 w->ptr = ptr;
208
209 LIST_PREPEND(by_fd, f->windows, w);
210
211 return w;
212 }
213
214 static void context_detach_window(Context *c) {
215 Window *w;
216
217 assert(c);
218
219 if (!c->window)
220 return;
221
222 w = c->window;
223 c->window = NULL;
224 LIST_REMOVE(by_window, w->contexts, c);
225
226 if (!w->contexts && !w->keep_always) {
227 /* Not used anymore? */
228 #ifdef ENABLE_DEBUG_MMAP_CACHE
229 /* Unmap unused windows immediately to expose use-after-unmap
230 * by SIGSEGV. */
231 window_free(w);
232 #else
233 LIST_PREPEND(unused, c->cache->unused, w);
234 if (!c->cache->last_unused)
235 c->cache->last_unused = w;
236
237 w->in_unused = true;
238 #endif
239 }
240 }
241
242 static void context_attach_window(Context *c, Window *w) {
243 assert(c);
244 assert(w);
245
246 if (c->window == w)
247 return;
248
249 context_detach_window(c);
250
251 if (w->in_unused) {
252 /* Used again? */
253 LIST_REMOVE(unused, c->cache->unused, w);
254 if (c->cache->last_unused == w)
255 c->cache->last_unused = w->unused_prev;
256
257 w->in_unused = false;
258 }
259
260 c->window = w;
261 LIST_PREPEND(by_window, w->contexts, c);
262 }
263
264 static Context *context_add(MMapCache *m, unsigned id) {
265 Context *c;
266
267 assert(m);
268
269 c = m->contexts[id];
270 if (c)
271 return c;
272
273 c = new0(Context, 1);
274 if (!c)
275 return NULL;
276
277 c->cache = m;
278 c->id = id;
279
280 assert(!m->contexts[id]);
281 m->contexts[id] = c;
282
283 return c;
284 }
285
286 static void context_free(Context *c) {
287 assert(c);
288
289 context_detach_window(c);
290
291 if (c->cache) {
292 assert(c->cache->contexts[c->id] == c);
293 c->cache->contexts[c->id] = NULL;
294 }
295
296 free(c);
297 }
298
299 static void mmap_cache_free(MMapCache *m) {
300 int i;
301
302 assert(m);
303
304 for (i = 0; i < MMAP_CACHE_MAX_CONTEXTS; i++)
305 if (m->contexts[i])
306 context_free(m->contexts[i]);
307
308 hashmap_free(m->fds);
309
310 while (m->unused)
311 window_free(m->unused);
312
313 free(m);
314 }
315
316 MMapCache* mmap_cache_unref(MMapCache *m) {
317
318 if (!m)
319 return NULL;
320
321 assert(m->n_ref > 0);
322
323 m->n_ref--;
324 if (m->n_ref == 0)
325 mmap_cache_free(m);
326
327 return NULL;
328 }
329
330 static int make_room(MMapCache *m) {
331 assert(m);
332
333 if (!m->last_unused)
334 return 0;
335
336 window_free(m->last_unused);
337 return 1;
338 }
339
340 static int try_context(
341 MMapCache *m,
342 MMapFileDescriptor *f,
343 int prot,
344 unsigned context,
345 bool keep_always,
346 uint64_t offset,
347 size_t size,
348 void **ret) {
349
350 Context *c;
351
352 assert(m);
353 assert(m->n_ref > 0);
354 assert(f);
355 assert(size > 0);
356 assert(ret);
357
358 c = m->contexts[context];
359 if (!c)
360 return 0;
361
362 assert(c->id == context);
363
364 if (!c->window)
365 return 0;
366
367 if (!window_matches_fd(c->window, f, prot, offset, size)) {
368
369 /* Drop the reference to the window, since it's unnecessary now */
370 context_detach_window(c);
371 return 0;
372 }
373
374 if (c->window->fd->sigbus)
375 return -EIO;
376
377 c->window->keep_always = c->window->keep_always || keep_always;
378
379 *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset);
380 return 1;
381 }
382
383 static int find_mmap(
384 MMapCache *m,
385 MMapFileDescriptor *f,
386 int prot,
387 unsigned context,
388 bool keep_always,
389 uint64_t offset,
390 size_t size,
391 void **ret) {
392
393 Window *w;
394 Context *c;
395
396 assert(m);
397 assert(m->n_ref > 0);
398 assert(f);
399 assert(size > 0);
400
401 if (f->sigbus)
402 return -EIO;
403
404 LIST_FOREACH(by_fd, w, f->windows)
405 if (window_matches(w, prot, offset, size))
406 break;
407
408 if (!w)
409 return 0;
410
411 c = context_add(m, context);
412 if (!c)
413 return -ENOMEM;
414
415 context_attach_window(c, w);
416 w->keep_always = w->keep_always || keep_always;
417
418 *ret = (uint8_t*) w->ptr + (offset - w->offset);
419 return 1;
420 }
421
422 static int mmap_try_harder(MMapCache *m, void *addr, MMapFileDescriptor *f, int prot, int flags, uint64_t offset, size_t size, void **res) {
423 void *ptr;
424
425 assert(m);
426 assert(f);
427 assert(res);
428
429 for (;;) {
430 int r;
431
432 ptr = mmap(addr, size, prot, flags, f->fd, offset);
433 if (ptr != MAP_FAILED)
434 break;
435 if (errno != ENOMEM)
436 return negative_errno();
437
438 r = make_room(m);
439 if (r < 0)
440 return r;
441 if (r == 0)
442 return -ENOMEM;
443 }
444
445 *res = ptr;
446 return 0;
447 }
448
449 static int add_mmap(
450 MMapCache *m,
451 MMapFileDescriptor *f,
452 int prot,
453 unsigned context,
454 bool keep_always,
455 uint64_t offset,
456 size_t size,
457 struct stat *st,
458 void **ret) {
459
460 uint64_t woffset, wsize;
461 Context *c;
462 Window *w;
463 void *d;
464 int r;
465
466 assert(m);
467 assert(m->n_ref > 0);
468 assert(f);
469 assert(size > 0);
470 assert(ret);
471
472 woffset = offset & ~((uint64_t) page_size() - 1ULL);
473 wsize = size + (offset - woffset);
474 wsize = PAGE_ALIGN(wsize);
475
476 if (wsize < WINDOW_SIZE) {
477 uint64_t delta;
478
479 delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
480
481 if (delta > offset)
482 woffset = 0;
483 else
484 woffset -= delta;
485
486 wsize = WINDOW_SIZE;
487 }
488
489 if (st) {
490 /* Memory maps that are larger then the files
491 underneath have undefined behavior. Hence, clamp
492 things to the file size if we know it */
493
494 if (woffset >= (uint64_t) st->st_size)
495 return -EADDRNOTAVAIL;
496
497 if (woffset + wsize > (uint64_t) st->st_size)
498 wsize = PAGE_ALIGN(st->st_size - woffset);
499 }
500
501 r = mmap_try_harder(m, NULL, f, prot, MAP_SHARED, woffset, wsize, &d);
502 if (r < 0)
503 return r;
504
505 c = context_add(m, context);
506 if (!c)
507 goto outofmem;
508
509 w = window_add(m, f, prot, keep_always, woffset, wsize, d);
510 if (!w)
511 goto outofmem;
512
513 context_attach_window(c, w);
514
515 *ret = (uint8_t*) w->ptr + (offset - w->offset);
516 return 1;
517
518 outofmem:
519 (void) munmap(d, wsize);
520 return -ENOMEM;
521 }
522
523 int mmap_cache_get(
524 MMapCache *m,
525 MMapFileDescriptor *f,
526 int prot,
527 unsigned context,
528 bool keep_always,
529 uint64_t offset,
530 size_t size,
531 struct stat *st,
532 void **ret) {
533
534 int r;
535
536 assert(m);
537 assert(m->n_ref > 0);
538 assert(f);
539 assert(size > 0);
540 assert(ret);
541 assert(context < MMAP_CACHE_MAX_CONTEXTS);
542
543 /* Check whether the current context is the right one already */
544 r = try_context(m, f, prot, context, keep_always, offset, size, ret);
545 if (r != 0) {
546 m->n_hit++;
547 return r;
548 }
549
550 /* Search for a matching mmap */
551 r = find_mmap(m, f, prot, context, keep_always, offset, size, ret);
552 if (r != 0) {
553 m->n_hit++;
554 return r;
555 }
556
557 m->n_missed++;
558
559 /* Create a new mmap */
560 return add_mmap(m, f, prot, context, keep_always, offset, size, st, ret);
561 }
562
563 unsigned mmap_cache_get_hit(MMapCache *m) {
564 assert(m);
565
566 return m->n_hit;
567 }
568
569 unsigned mmap_cache_get_missed(MMapCache *m) {
570 assert(m);
571
572 return m->n_missed;
573 }
574
575 static void mmap_cache_process_sigbus(MMapCache *m) {
576 bool found = false;
577 MMapFileDescriptor *f;
578 Iterator i;
579 int r;
580
581 assert(m);
582
583 /* Iterate through all triggered pages and mark their files as
584 * invalidated */
585 for (;;) {
586 bool ours;
587 void *addr;
588
589 r = sigbus_pop(&addr);
590 if (_likely_(r == 0))
591 break;
592 if (r < 0) {
593 log_error_errno(r, "SIGBUS handling failed: %m");
594 abort();
595 }
596
597 ours = false;
598 HASHMAP_FOREACH(f, m->fds, i) {
599 Window *w;
600
601 LIST_FOREACH(by_fd, w, f->windows) {
602 if ((uint8_t*) addr >= (uint8_t*) w->ptr &&
603 (uint8_t*) addr < (uint8_t*) w->ptr + w->size) {
604 found = ours = f->sigbus = true;
605 break;
606 }
607 }
608
609 if (ours)
610 break;
611 }
612
613 /* Didn't find a matching window, give up */
614 if (!ours) {
615 log_error("Unknown SIGBUS page, aborting.");
616 abort();
617 }
618 }
619
620 /* The list of triggered pages is now empty. Now, let's remap
621 * all windows of the triggered file to anonymous maps, so
622 * that no page of the file in question is triggered again, so
623 * that we can be sure not to hit the queue size limit. */
624 if (_likely_(!found))
625 return;
626
627 HASHMAP_FOREACH(f, m->fds, i) {
628 Window *w;
629
630 if (!f->sigbus)
631 continue;
632
633 LIST_FOREACH(by_fd, w, f->windows)
634 window_invalidate(w);
635 }
636 }
637
638 bool mmap_cache_got_sigbus(MMapCache *m, MMapFileDescriptor *f) {
639 assert(m);
640 assert(f);
641
642 mmap_cache_process_sigbus(m);
643
644 return f->sigbus;
645 }
646
647 MMapFileDescriptor* mmap_cache_add_fd(MMapCache *m, int fd) {
648 MMapFileDescriptor *f;
649 int r;
650
651 assert(m);
652 assert(fd >= 0);
653
654 f = hashmap_get(m->fds, FD_TO_PTR(fd));
655 if (f)
656 return f;
657
658 r = hashmap_ensure_allocated(&m->fds, NULL);
659 if (r < 0)
660 return NULL;
661
662 f = new0(MMapFileDescriptor, 1);
663 if (!f)
664 return NULL;
665
666 f->cache = m;
667 f->fd = fd;
668
669 r = hashmap_put(m->fds, FD_TO_PTR(fd), f);
670 if (r < 0)
671 return mfree(f);
672
673 return f;
674 }
675
676 void mmap_cache_free_fd(MMapCache *m, MMapFileDescriptor *f) {
677 assert(m);
678 assert(f);
679
680 /* Make sure that any queued SIGBUS are first dispatched, so
681 * that we don't end up with a SIGBUS entry we cannot relate
682 * to any existing memory map */
683
684 mmap_cache_process_sigbus(m);
685
686 while (f->windows)
687 window_free(f->windows);
688
689 if (f->cache)
690 assert_se(hashmap_remove(f->cache->fds, FD_TO_PTR(f->fd)));
691
692 free(f);
693 }