]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/mmap-cache.c
resolved: don't accept doing queries for invalid RR types
[thirdparty/systemd.git] / src / journal / mmap-cache.c
CommitLineData
16e9f408
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2012 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
16e9f408
LP
22#include <errno.h>
23#include <stdlib.h>
f8019684 24#include <sys/mman.h>
16e9f408 25
b5efdb8a 26#include "alloc-util.h"
23e096cc 27#include "fd-util.h"
f8019684
LP
28#include "hashmap.h"
29#include "list.h"
30#include "log.h"
f8019684 31#include "macro.h"
16e9f408 32#include "mmap-cache.h"
cf0fbc49
TA
33#include "sigbus.h"
34#include "util.h"
16e9f408 35
f8019684
LP
36typedef struct Window Window;
37typedef struct Context Context;
38typedef struct FileDescriptor FileDescriptor;
84168d80 39
f8019684
LP
40struct Window {
41 MMapCache *cache;
42
fa6ac760 43 bool invalidated;
1b8951e5 44 bool keep_always;
f8019684 45 bool in_unused;
16e9f408 46
68667801 47 int prot;
16e9f408
LP
48 void *ptr;
49 uint64_t offset;
f8019684
LP
50 size_t size;
51
52 FileDescriptor *fd;
16e9f408 53
f8019684
LP
54 LIST_FIELDS(Window, by_fd);
55 LIST_FIELDS(Window, unused);
56
57 LIST_HEAD(Context, contexts);
58};
16e9f408 59
f8019684
LP
60struct Context {
61 MMapCache *cache;
62 unsigned id;
63 Window *window;
16e9f408 64
f8019684
LP
65 LIST_FIELDS(Context, by_window);
66};
67
68struct FileDescriptor {
69 MMapCache *cache;
16e9f408 70 int fd;
fa6ac760 71 bool sigbus;
f8019684
LP
72 LIST_HEAD(Window, windows);
73};
16e9f408
LP
74
75struct MMapCache {
f8019684 76 int n_ref;
68667801 77 unsigned n_windows;
16e9f408 78
bf807d4d
LP
79 unsigned n_hit, n_missed;
80
81
f8019684 82 Hashmap *fds;
69adae51 83 Context *contexts[MMAP_CACHE_MAX_CONTEXTS];
16e9f408 84
f8019684
LP
85 LIST_HEAD(Window, unused);
86 Window *last_unused;
16e9f408
LP
87};
88
f8019684 89#define WINDOWS_MIN 64
fad5a6c6
MS
90
91#ifdef ENABLE_DEBUG_MMAP_CACHE
92/* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
93# define WINDOW_SIZE (page_size())
94#else
95# define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
96#endif
16e9f408 97
f8019684
LP
98MMapCache* mmap_cache_new(void) {
99 MMapCache *m;
16e9f408 100
f8019684
LP
101 m = new0(MMapCache, 1);
102 if (!m)
103 return NULL;
16e9f408 104
f8019684
LP
105 m->n_ref = 1;
106 return m;
16e9f408
LP
107}
108
f8019684 109MMapCache* mmap_cache_ref(MMapCache *m) {
16e9f408 110 assert(m);
f8019684 111 assert(m->n_ref > 0);
16e9f408 112
f8019684
LP
113 m->n_ref ++;
114 return m;
115}
f65425cb 116
f8019684
LP
117static void window_unlink(Window *w) {
118 Context *c;
f65425cb 119
f8019684 120 assert(w);
16e9f408 121
f8019684
LP
122 if (w->ptr)
123 munmap(w->ptr, w->size);
16e9f408 124
f8019684 125 if (w->fd)
71fda00f 126 LIST_REMOVE(by_fd, w->fd->windows, w);
16e9f408 127
f8019684
LP
128 if (w->in_unused) {
129 if (w->cache->last_unused == w)
130 w->cache->last_unused = w->unused_prev;
16e9f408 131
71fda00f 132 LIST_REMOVE(unused, w->cache->unused, w);
f65425cb 133 }
16e9f408 134
f8019684
LP
135 LIST_FOREACH(by_window, c, w->contexts) {
136 assert(c->window == w);
137 c->window = NULL;
f65425cb 138 }
16e9f408
LP
139}
140
fa6ac760
LP
141static void window_invalidate(Window *w) {
142 assert(w);
143
144 if (w->invalidated)
145 return;
146
147 /* Replace the window with anonymous pages. This is useful
148 * when we hit a SIGBUS and want to make sure the file cannot
149 * trigger any further SIGBUS, possibly overrunning the sigbus
150 * queue. */
151
152 assert_se(mmap(w->ptr, w->size, w->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
153 w->invalidated = true;
154}
155
f8019684
LP
156static void window_free(Window *w) {
157 assert(w);
f65425cb 158
f8019684 159 window_unlink(w);
89de6947 160 w->cache->n_windows--;
f8019684
LP
161 free(w);
162}
f65425cb 163
44a6b1b6 164_pure_ static bool window_matches(Window *w, int fd, int prot, uint64_t offset, size_t size) {
f8019684
LP
165 assert(w);
166 assert(fd >= 0);
167 assert(size > 0);
16e9f408 168
f8019684
LP
169 return
170 w->fd &&
171 fd == w->fd->fd &&
172 prot == w->prot &&
173 offset >= w->offset &&
174 offset + size <= w->offset + w->size;
16e9f408
LP
175}
176
f8019684
LP
177static Window *window_add(MMapCache *m) {
178 Window *w;
16e9f408
LP
179
180 assert(m);
16e9f408 181
f8019684 182 if (!m->last_unused || m->n_windows <= WINDOWS_MIN) {
f65425cb 183
f8019684
LP
184 /* Allocate a new window */
185 w = new0(Window, 1);
186 if (!w)
187 return NULL;
89de6947 188 m->n_windows++;
f65425cb 189 } else {
16e9f408 190
f8019684
LP
191 /* Reuse an existing one */
192 w = m->last_unused;
193 window_unlink(w);
194 zero(*w);
f65425cb 195 }
f8019684
LP
196
197 w->cache = m;
198 return w;
16e9f408
LP
199}
200
f8019684
LP
201static void context_detach_window(Context *c) {
202 Window *w;
16e9f408 203
f8019684 204 assert(c);
16e9f408 205
f8019684 206 if (!c->window)
16e9f408
LP
207 return;
208
f8019684
LP
209 w = c->window;
210 c->window = NULL;
71fda00f 211 LIST_REMOVE(by_window, w->contexts, c);
16e9f408 212
1b8951e5 213 if (!w->contexts && !w->keep_always) {
f8019684 214 /* Not used anymore? */
fad5a6c6
MS
215#ifdef ENABLE_DEBUG_MMAP_CACHE
216 /* Unmap unused windows immediately to expose use-after-unmap
217 * by SIGSEGV. */
218 window_free(w);
219#else
71fda00f 220 LIST_PREPEND(unused, c->cache->unused, w);
f8019684
LP
221 if (!c->cache->last_unused)
222 c->cache->last_unused = w;
16e9f408 223
f8019684 224 w->in_unused = true;
fad5a6c6 225#endif
f8019684 226 }
16e9f408
LP
227}
228
f8019684
LP
229static void context_attach_window(Context *c, Window *w) {
230 assert(c);
231 assert(w);
16e9f408 232
f8019684 233 if (c->window == w)
16e9f408
LP
234 return;
235
f8019684 236 context_detach_window(c);
16e9f408 237
e18021f7 238 if (w->in_unused) {
f8019684 239 /* Used again? */
71fda00f 240 LIST_REMOVE(unused, c->cache->unused, w);
a2ab7ee6
CG
241 if (c->cache->last_unused == w)
242 c->cache->last_unused = w->unused_prev;
16e9f408 243
f8019684
LP
244 w->in_unused = false;
245 }
f65425cb 246
f8019684 247 c->window = w;
71fda00f 248 LIST_PREPEND(by_window, w->contexts, c);
16e9f408
LP
249}
250
f8019684
LP
251static Context *context_add(MMapCache *m, unsigned id) {
252 Context *c;
16e9f408
LP
253
254 assert(m);
255
69adae51 256 c = m->contexts[id];
f8019684
LP
257 if (c)
258 return c;
259
f8019684
LP
260 c = new0(Context, 1);
261 if (!c)
262 return NULL;
16e9f408 263
f8019684
LP
264 c->cache = m;
265 c->id = id;
16e9f408 266
69adae51
MS
267 assert(!m->contexts[id]);
268 m->contexts[id] = c;
16e9f408 269
f8019684 270 return c;
16e9f408
LP
271}
272
f8019684
LP
273static void context_free(Context *c) {
274 assert(c);
16e9f408 275
f8019684 276 context_detach_window(c);
16e9f408 277
69adae51
MS
278 if (c->cache) {
279 assert(c->cache->contexts[c->id] == c);
280 c->cache->contexts[c->id] = NULL;
281 }
16e9f408 282
f8019684
LP
283 free(c);
284}
285
286static void fd_free(FileDescriptor *f) {
287 assert(f);
288
289 while (f->windows)
290 window_free(f->windows);
291
292 if (f->cache)
23e096cc 293 assert_se(hashmap_remove(f->cache->fds, FD_TO_PTR(f->fd)));
f8019684
LP
294
295 free(f);
296}
297
298static FileDescriptor* fd_add(MMapCache *m, int fd) {
299 FileDescriptor *f;
300 int r;
301
302 assert(m);
303 assert(fd >= 0);
304
23e096cc 305 f = hashmap_get(m->fds, FD_TO_PTR(fd));
f8019684
LP
306 if (f)
307 return f;
308
d5099efc 309 r = hashmap_ensure_allocated(&m->fds, NULL);
f8019684 310 if (r < 0)
16e9f408 311 return NULL;
16e9f408 312
f8019684
LP
313 f = new0(FileDescriptor, 1);
314 if (!f)
16e9f408 315 return NULL;
16e9f408 316
f8019684
LP
317 f->cache = m;
318 f->fd = fd;
319
23e096cc 320 r = hashmap_put(m->fds, FD_TO_PTR(fd), f);
f8019684
LP
321 if (r < 0) {
322 free(f);
16e9f408
LP
323 return NULL;
324 }
325
f8019684 326 return f;
16e9f408
LP
327}
328
f8019684 329static void mmap_cache_free(MMapCache *m) {
f8019684 330 FileDescriptor *f;
69adae51 331 int i;
f8019684 332
16e9f408 333 assert(m);
16e9f408 334
69adae51
MS
335 for (i = 0; i < MMAP_CACHE_MAX_CONTEXTS; i++)
336 if (m->contexts[i])
337 context_free(m->contexts[i]);
8e6d9397 338
f8019684
LP
339 while ((f = hashmap_first(m->fds)))
340 fd_free(f);
341
8e6d9397
GM
342 hashmap_free(m->fds);
343
f8019684
LP
344 while (m->unused)
345 window_free(m->unused);
346
347 free(m);
16e9f408
LP
348}
349
350MMapCache* mmap_cache_unref(MMapCache *m) {
351 assert(m);
352 assert(m->n_ref > 0);
353
f8019684
LP
354 m->n_ref --;
355 if (m->n_ref == 0)
16e9f408 356 mmap_cache_free(m);
16e9f408
LP
357
358 return NULL;
359}
360
f8019684
LP
361static int make_room(MMapCache *m) {
362 assert(m);
363
364 if (!m->last_unused)
365 return 0;
366
367 window_free(m->last_unused);
368 return 1;
369}
370
371static int try_context(
372 MMapCache *m,
373 int fd,
374 int prot,
375 unsigned context,
376 bool keep_always,
377 uint64_t offset,
378 size_t size,
1b8951e5 379 void **ret) {
f8019684
LP
380
381 Context *c;
f65425cb 382
16e9f408 383 assert(m);
f8019684
LP
384 assert(m->n_ref > 0);
385 assert(fd >= 0);
386 assert(size > 0);
1b8951e5 387 assert(ret);
16e9f408 388
69adae51 389 c = m->contexts[context];
f8019684 390 if (!c)
16e9f408 391 return 0;
16e9f408 392
f8019684 393 assert(c->id == context);
16e9f408 394
f8019684
LP
395 if (!c->window)
396 return 0;
f65425cb 397
f8019684 398 if (!window_matches(c->window, fd, prot, offset, size)) {
f65425cb 399
f8019684
LP
400 /* Drop the reference to the window, since it's unnecessary now */
401 context_detach_window(c);
402 return 0;
f65425cb
LP
403 }
404
fa6ac760
LP
405 if (c->window->fd->sigbus)
406 return -EIO;
407
1b8951e5 408 c->window->keep_always |= keep_always;
16e9f408 409
1b8951e5 410 *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset);
f8019684 411 return 1;
16e9f408
LP
412}
413
f8019684
LP
414static int find_mmap(
415 MMapCache *m,
416 int fd,
417 int prot,
418 unsigned context,
419 bool keep_always,
420 uint64_t offset,
421 size_t size,
1b8951e5 422 void **ret) {
f8019684
LP
423
424 FileDescriptor *f;
425 Window *w;
426 Context *c;
16e9f408
LP
427
428 assert(m);
f8019684
LP
429 assert(m->n_ref > 0);
430 assert(fd >= 0);
431 assert(size > 0);
16e9f408 432
23e096cc 433 f = hashmap_get(m->fds, FD_TO_PTR(fd));
f8019684
LP
434 if (!f)
435 return 0;
16e9f408 436
f8019684 437 assert(f->fd == fd);
16e9f408 438
fa6ac760
LP
439 if (f->sigbus)
440 return -EIO;
441
f8019684
LP
442 LIST_FOREACH(by_fd, w, f->windows)
443 if (window_matches(w, fd, prot, offset, size))
444 break;
16e9f408 445
f8019684
LP
446 if (!w)
447 return 0;
448
449 c = context_add(m, context);
450 if (!c)
451 return -ENOMEM;
452
453 context_attach_window(c, w);
ae97089d 454 w->keep_always += keep_always;
16e9f408 455
1b8951e5 456 *ret = (uint8_t*) w->ptr + (offset - w->offset);
f8019684 457 return 1;
16e9f408
LP
458}
459
f8019684 460static int add_mmap(
16e9f408
LP
461 MMapCache *m,
462 int fd,
16e9f408
LP
463 int prot,
464 unsigned context,
fcde2389 465 bool keep_always,
16e9f408 466 uint64_t offset,
f8019684 467 size_t size,
fcde2389 468 struct stat *st,
1b8951e5 469 void **ret) {
16e9f408 470
16e9f408 471 uint64_t woffset, wsize;
f8019684
LP
472 Context *c;
473 FileDescriptor *f;
474 Window *w;
475 void *d;
16e9f408
LP
476 int r;
477
478 assert(m);
f8019684 479 assert(m->n_ref > 0);
16e9f408 480 assert(fd >= 0);
16e9f408 481 assert(size > 0);
1b8951e5 482 assert(ret);
16e9f408
LP
483
484 woffset = offset & ~((uint64_t) page_size() - 1ULL);
485 wsize = size + (offset - woffset);
486 wsize = PAGE_ALIGN(wsize);
487
488 if (wsize < WINDOW_SIZE) {
489 uint64_t delta;
490
beec0085 491 delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
16e9f408
LP
492
493 if (delta > offset)
494 woffset = 0;
495 else
496 woffset -= delta;
497
498 wsize = WINDOW_SIZE;
499 }
500
fcde2389
LP
501 if (st) {
502 /* Memory maps that are larger then the files
c5315881 503 underneath have undefined behavior. Hence, clamp
fcde2389
LP
504 things to the file size if we know it */
505
506 if (woffset >= (uint64_t) st->st_size)
507 return -EADDRNOTAVAIL;
508
509 if (woffset + wsize > (uint64_t) st->st_size)
510 wsize = PAGE_ALIGN(st->st_size - woffset);
511 }
512
16e9f408
LP
513 for (;;) {
514 d = mmap(NULL, wsize, prot, MAP_SHARED, fd, woffset);
515 if (d != MAP_FAILED)
516 break;
517 if (errno != ENOMEM)
518 return -errno;
519
f8019684 520 r = make_room(m);
16e9f408
LP
521 if (r < 0)
522 return r;
523 if (r == 0)
524 return -ENOMEM;
525 }
526
f8019684
LP
527 c = context_add(m, context);
528 if (!c)
b67ddc7b 529 goto outofmem;
16e9f408 530
f8019684
LP
531 f = fd_add(m, fd);
532 if (!f)
b67ddc7b 533 goto outofmem;
16e9f408 534
f8019684
LP
535 w = window_add(m);
536 if (!w)
b67ddc7b 537 goto outofmem;
16e9f408 538
f8019684
LP
539 w->keep_always = keep_always;
540 w->ptr = d;
541 w->offset = woffset;
542 w->prot = prot;
543 w->size = wsize;
544 w->fd = f;
16e9f408 545
71fda00f 546 LIST_PREPEND(by_fd, f->windows, w);
16e9f408 547
f8019684
LP
548 context_detach_window(c);
549 c->window = w;
71fda00f 550 LIST_PREPEND(by_window, w->contexts, c);
16e9f408 551
1b8951e5 552 *ret = (uint8_t*) w->ptr + (offset - w->offset);
16e9f408 553 return 1;
b67ddc7b
PDS
554
555outofmem:
556 munmap(d, wsize);
557 return -ENOMEM;
16e9f408
LP
558}
559
560int mmap_cache_get(
561 MMapCache *m,
562 int fd,
563 int prot,
564 unsigned context,
fcde2389 565 bool keep_always,
16e9f408 566 uint64_t offset,
f8019684 567 size_t size,
fcde2389 568 struct stat *st,
1b8951e5 569 void **ret) {
16e9f408 570
16e9f408
LP
571 int r;
572
573 assert(m);
f8019684 574 assert(m->n_ref > 0);
16e9f408 575 assert(fd >= 0);
16e9f408 576 assert(size > 0);
1b8951e5 577 assert(ret);
69adae51 578 assert(context < MMAP_CACHE_MAX_CONTEXTS);
16e9f408 579
f8019684 580 /* Check whether the current context is the right one already */
1b8951e5 581 r = try_context(m, fd, prot, context, keep_always, offset, size, ret);
bf807d4d
LP
582 if (r != 0) {
583 m->n_hit ++;
16e9f408 584 return r;
bf807d4d 585 }
16e9f408 586
f8019684 587 /* Search for a matching mmap */
1b8951e5 588 r = find_mmap(m, fd, prot, context, keep_always, offset, size, ret);
bf807d4d
LP
589 if (r != 0) {
590 m->n_hit ++;
16e9f408 591 return r;
bf807d4d
LP
592 }
593
594 m->n_missed++;
16e9f408 595
f8019684 596 /* Create a new mmap */
1b8951e5 597 return add_mmap(m, fd, prot, context, keep_always, offset, size, st, ret);
ae97089d
ZJS
598}
599
fa6ac760
LP
600unsigned mmap_cache_get_hit(MMapCache *m) {
601 assert(m);
602
603 return m->n_hit;
604}
605
606unsigned mmap_cache_get_missed(MMapCache *m) {
607 assert(m);
608
609 return m->n_missed;
610}
611
612static void mmap_cache_process_sigbus(MMapCache *m) {
613 bool found = false;
f8019684 614 FileDescriptor *f;
fa6ac760
LP
615 Iterator i;
616 int r;
16e9f408
LP
617
618 assert(m);
16e9f408 619
fa6ac760
LP
620 /* Iterate through all triggered pages and mark their files as
621 * invalidated */
622 for (;;) {
623 bool ours;
624 void *addr;
625
626 r = sigbus_pop(&addr);
627 if (_likely_(r == 0))
628 break;
629 if (r < 0) {
630 log_error_errno(r, "SIGBUS handling failed: %m");
631 abort();
632 }
633
634 ours = false;
635 HASHMAP_FOREACH(f, m->fds, i) {
636 Window *w;
637
638 LIST_FOREACH(by_fd, w, f->windows) {
639 if ((uint8_t*) addr >= (uint8_t*) w->ptr &&
640 (uint8_t*) addr < (uint8_t*) w->ptr + w->size) {
641 found = ours = f->sigbus = true;
642 break;
643 }
644 }
645
646 if (ours)
647 break;
648 }
649
650 /* Didn't find a matching window, give up */
651 if (!ours) {
652 log_error("Unknown SIGBUS page, aborting.");
653 abort();
654 }
655 }
656
657 /* The list of triggered pages is now empty. Now, let's remap
658 * all windows of the triggered file to anonymous maps, so
659 * that no page of the file in question is triggered again, so
660 * that we can be sure not to hit the queue size limit. */
661 if (_likely_(!found))
16e9f408 662 return;
16e9f408 663
fa6ac760
LP
664 HASHMAP_FOREACH(f, m->fds, i) {
665 Window *w;
666
667 if (!f->sigbus)
668 continue;
669
670 LIST_FOREACH(by_fd, w, f->windows)
671 window_invalidate(w);
672 }
f8019684 673}
16e9f408 674
fa6ac760
LP
675bool mmap_cache_got_sigbus(MMapCache *m, int fd) {
676 FileDescriptor *f;
677
bf807d4d 678 assert(m);
fa6ac760 679 assert(fd >= 0);
bf807d4d 680
fa6ac760
LP
681 mmap_cache_process_sigbus(m);
682
23e096cc 683 f = hashmap_get(m->fds, FD_TO_PTR(fd));
fa6ac760
LP
684 if (!f)
685 return false;
686
687 return f->sigbus;
bf807d4d
LP
688}
689
fa6ac760
LP
690void mmap_cache_close_fd(MMapCache *m, int fd) {
691 FileDescriptor *f;
692
bf807d4d 693 assert(m);
fa6ac760 694 assert(fd >= 0);
bf807d4d 695
fa6ac760
LP
696 /* Make sure that any queued SIGBUS are first dispatched, so
697 * that we don't end up with a SIGBUS entry we cannot relate
698 * to any existing memory map */
699
700 mmap_cache_process_sigbus(m);
701
23e096cc 702 f = hashmap_get(m->fds, FD_TO_PTR(fd));
fa6ac760
LP
703 if (!f)
704 return;
705
706 fd_free(f);
bf807d4d 707}