]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/mmap-cache.c
build-sys: use #if Y instead of #ifdef Y everywhere
[thirdparty/systemd.git] / src / journal / mmap-cache.c
CommitLineData
16e9f408
LP
1/***
2 This file is part of systemd.
3
4 Copyright 2012 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
16e9f408
LP
20#include <errno.h>
21#include <stdlib.h>
f8019684 22#include <sys/mman.h>
16e9f408 23
b5efdb8a 24#include "alloc-util.h"
23e096cc 25#include "fd-util.h"
f8019684
LP
26#include "hashmap.h"
27#include "list.h"
28#include "log.h"
f8019684 29#include "macro.h"
16e9f408 30#include "mmap-cache.h"
cf0fbc49
TA
31#include "sigbus.h"
32#include "util.h"
16e9f408 33
f8019684
LP
34typedef struct Window Window;
35typedef struct Context Context;
84168d80 36
f8019684
LP
37struct Window {
38 MMapCache *cache;
39
739731cd
LP
40 bool invalidated:1;
41 bool keep_always:1;
42 bool in_unused:1;
16e9f408 43
68667801 44 int prot;
16e9f408
LP
45 void *ptr;
46 uint64_t offset;
f8019684
LP
47 size_t size;
48
be7cdd8e 49 MMapFileDescriptor *fd;
16e9f408 50
f8019684
LP
51 LIST_FIELDS(Window, by_fd);
52 LIST_FIELDS(Window, unused);
53
54 LIST_HEAD(Context, contexts);
55};
16e9f408 56
f8019684
LP
57struct Context {
58 MMapCache *cache;
59 unsigned id;
60 Window *window;
16e9f408 61
f8019684
LP
62 LIST_FIELDS(Context, by_window);
63};
64
be7cdd8e 65struct MMapFileDescriptor {
f8019684 66 MMapCache *cache;
16e9f408 67 int fd;
fa6ac760 68 bool sigbus;
f8019684
LP
69 LIST_HEAD(Window, windows);
70};
16e9f408
LP
71
72struct MMapCache {
f8019684 73 int n_ref;
68667801 74 unsigned n_windows;
16e9f408 75
bf807d4d
LP
76 unsigned n_hit, n_missed;
77
f8019684 78 Hashmap *fds;
69adae51 79 Context *contexts[MMAP_CACHE_MAX_CONTEXTS];
16e9f408 80
f8019684
LP
81 LIST_HEAD(Window, unused);
82 Window *last_unused;
16e9f408
LP
83};
84
f8019684 85#define WINDOWS_MIN 64
fad5a6c6 86
349cc4a5 87#if ENABLE_DEBUG_MMAP_CACHE
fad5a6c6
MS
88/* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
89# define WINDOW_SIZE (page_size())
90#else
91# define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
92#endif
16e9f408 93
f8019684
LP
94MMapCache* mmap_cache_new(void) {
95 MMapCache *m;
16e9f408 96
f8019684
LP
97 m = new0(MMapCache, 1);
98 if (!m)
99 return NULL;
16e9f408 100
f8019684
LP
101 m->n_ref = 1;
102 return m;
16e9f408
LP
103}
104
f8019684 105MMapCache* mmap_cache_ref(MMapCache *m) {
16e9f408 106 assert(m);
f8019684 107 assert(m->n_ref > 0);
16e9f408 108
313cefa1 109 m->n_ref++;
f8019684
LP
110 return m;
111}
f65425cb 112
f8019684
LP
113static void window_unlink(Window *w) {
114 Context *c;
f65425cb 115
f8019684 116 assert(w);
16e9f408 117
f8019684
LP
118 if (w->ptr)
119 munmap(w->ptr, w->size);
16e9f408 120
f8019684 121 if (w->fd)
71fda00f 122 LIST_REMOVE(by_fd, w->fd->windows, w);
16e9f408 123
f8019684
LP
124 if (w->in_unused) {
125 if (w->cache->last_unused == w)
126 w->cache->last_unused = w->unused_prev;
16e9f408 127
71fda00f 128 LIST_REMOVE(unused, w->cache->unused, w);
f65425cb 129 }
16e9f408 130
f8019684
LP
131 LIST_FOREACH(by_window, c, w->contexts) {
132 assert(c->window == w);
133 c->window = NULL;
f65425cb 134 }
16e9f408
LP
135}
136
fa6ac760
LP
137static void window_invalidate(Window *w) {
138 assert(w);
139
140 if (w->invalidated)
141 return;
142
143 /* Replace the window with anonymous pages. This is useful
144 * when we hit a SIGBUS and want to make sure the file cannot
145 * trigger any further SIGBUS, possibly overrunning the sigbus
146 * queue. */
147
148 assert_se(mmap(w->ptr, w->size, w->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
149 w->invalidated = true;
150}
151
f8019684
LP
152static void window_free(Window *w) {
153 assert(w);
f65425cb 154
f8019684 155 window_unlink(w);
89de6947 156 w->cache->n_windows--;
f8019684
LP
157 free(w);
158}
f65425cb 159
8c3d9662 160_pure_ static inline bool window_matches(Window *w, int prot, uint64_t offset, size_t size) {
f8019684 161 assert(w);
f8019684 162 assert(size > 0);
16e9f408 163
f8019684 164 return
f8019684
LP
165 prot == w->prot &&
166 offset >= w->offset &&
167 offset + size <= w->offset + w->size;
16e9f408
LP
168}
169
8c3d9662
VC
170_pure_ static bool window_matches_fd(Window *w, MMapFileDescriptor *f, int prot, uint64_t offset, size_t size) {
171 assert(w);
172 assert(f);
173
174 return
175 w->fd &&
176 f->fd == w->fd->fd &&
177 window_matches(w, prot, offset, size);
178}
179
be7cdd8e 180static Window *window_add(MMapCache *m, MMapFileDescriptor *f, int prot, bool keep_always, uint64_t offset, size_t size, void *ptr) {
f8019684 181 Window *w;
16e9f408
LP
182
183 assert(m);
be7cdd8e 184 assert(f);
16e9f408 185
f8019684 186 if (!m->last_unused || m->n_windows <= WINDOWS_MIN) {
f65425cb 187
f8019684
LP
188 /* Allocate a new window */
189 w = new0(Window, 1);
190 if (!w)
191 return NULL;
89de6947 192 m->n_windows++;
f65425cb 193 } else {
16e9f408 194
f8019684
LP
195 /* Reuse an existing one */
196 w = m->last_unused;
197 window_unlink(w);
198 zero(*w);
f65425cb 199 }
f8019684
LP
200
201 w->cache = m;
be7cdd8e 202 w->fd = f;
6a491490
VC
203 w->prot = prot;
204 w->keep_always = keep_always;
205 w->offset = offset;
206 w->size = size;
207 w->ptr = ptr;
208
be7cdd8e 209 LIST_PREPEND(by_fd, f->windows, w);
6a491490 210
f8019684 211 return w;
16e9f408
LP
212}
213
f8019684
LP
214static void context_detach_window(Context *c) {
215 Window *w;
16e9f408 216
f8019684 217 assert(c);
16e9f408 218
f8019684 219 if (!c->window)
16e9f408
LP
220 return;
221
f8019684
LP
222 w = c->window;
223 c->window = NULL;
71fda00f 224 LIST_REMOVE(by_window, w->contexts, c);
16e9f408 225
1b8951e5 226 if (!w->contexts && !w->keep_always) {
f8019684 227 /* Not used anymore? */
349cc4a5 228#if ENABLE_DEBUG_MMAP_CACHE
fad5a6c6
MS
229 /* Unmap unused windows immediately to expose use-after-unmap
230 * by SIGSEGV. */
231 window_free(w);
232#else
71fda00f 233 LIST_PREPEND(unused, c->cache->unused, w);
f8019684
LP
234 if (!c->cache->last_unused)
235 c->cache->last_unused = w;
16e9f408 236
f8019684 237 w->in_unused = true;
fad5a6c6 238#endif
f8019684 239 }
16e9f408
LP
240}
241
f8019684
LP
242static void context_attach_window(Context *c, Window *w) {
243 assert(c);
244 assert(w);
16e9f408 245
f8019684 246 if (c->window == w)
16e9f408
LP
247 return;
248
f8019684 249 context_detach_window(c);
16e9f408 250
e18021f7 251 if (w->in_unused) {
f8019684 252 /* Used again? */
71fda00f 253 LIST_REMOVE(unused, c->cache->unused, w);
a2ab7ee6
CG
254 if (c->cache->last_unused == w)
255 c->cache->last_unused = w->unused_prev;
16e9f408 256
f8019684
LP
257 w->in_unused = false;
258 }
f65425cb 259
f8019684 260 c->window = w;
71fda00f 261 LIST_PREPEND(by_window, w->contexts, c);
16e9f408
LP
262}
263
f8019684
LP
264static Context *context_add(MMapCache *m, unsigned id) {
265 Context *c;
16e9f408
LP
266
267 assert(m);
268
69adae51 269 c = m->contexts[id];
f8019684
LP
270 if (c)
271 return c;
272
f8019684
LP
273 c = new0(Context, 1);
274 if (!c)
275 return NULL;
16e9f408 276
f8019684
LP
277 c->cache = m;
278 c->id = id;
16e9f408 279
69adae51
MS
280 assert(!m->contexts[id]);
281 m->contexts[id] = c;
16e9f408 282
f8019684 283 return c;
16e9f408
LP
284}
285
f8019684
LP
286static void context_free(Context *c) {
287 assert(c);
16e9f408 288
f8019684 289 context_detach_window(c);
16e9f408 290
69adae51
MS
291 if (c->cache) {
292 assert(c->cache->contexts[c->id] == c);
293 c->cache->contexts[c->id] = NULL;
294 }
16e9f408 295
f8019684
LP
296 free(c);
297}
298
f8019684 299static void mmap_cache_free(MMapCache *m) {
69adae51 300 int i;
f8019684 301
16e9f408 302 assert(m);
16e9f408 303
69adae51
MS
304 for (i = 0; i < MMAP_CACHE_MAX_CONTEXTS; i++)
305 if (m->contexts[i])
306 context_free(m->contexts[i]);
8e6d9397 307
8e6d9397
GM
308 hashmap_free(m->fds);
309
f8019684
LP
310 while (m->unused)
311 window_free(m->unused);
312
313 free(m);
16e9f408
LP
314}
315
316MMapCache* mmap_cache_unref(MMapCache *m) {
f649045c
LP
317
318 if (!m)
319 return NULL;
320
16e9f408
LP
321 assert(m->n_ref > 0);
322
313cefa1 323 m->n_ref--;
f8019684 324 if (m->n_ref == 0)
16e9f408 325 mmap_cache_free(m);
16e9f408
LP
326
327 return NULL;
328}
329
f8019684
LP
330static int make_room(MMapCache *m) {
331 assert(m);
332
333 if (!m->last_unused)
334 return 0;
335
336 window_free(m->last_unused);
337 return 1;
338}
339
340static int try_context(
341 MMapCache *m,
be7cdd8e 342 MMapFileDescriptor *f,
f8019684
LP
343 int prot,
344 unsigned context,
345 bool keep_always,
346 uint64_t offset,
347 size_t size,
b42549ad
VC
348 void **ret,
349 size_t *ret_size) {
f8019684
LP
350
351 Context *c;
f65425cb 352
16e9f408 353 assert(m);
f8019684 354 assert(m->n_ref > 0);
be7cdd8e 355 assert(f);
f8019684 356 assert(size > 0);
1b8951e5 357 assert(ret);
16e9f408 358
69adae51 359 c = m->contexts[context];
f8019684 360 if (!c)
16e9f408 361 return 0;
16e9f408 362
f8019684 363 assert(c->id == context);
16e9f408 364
f8019684
LP
365 if (!c->window)
366 return 0;
f65425cb 367
8c3d9662 368 if (!window_matches_fd(c->window, f, prot, offset, size)) {
f65425cb 369
f8019684
LP
370 /* Drop the reference to the window, since it's unnecessary now */
371 context_detach_window(c);
372 return 0;
f65425cb
LP
373 }
374
fa6ac760
LP
375 if (c->window->fd->sigbus)
376 return -EIO;
377
739731cd 378 c->window->keep_always = c->window->keep_always || keep_always;
16e9f408 379
1b8951e5 380 *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset);
b42549ad
VC
381 if (ret_size)
382 *ret_size = c->window->size - (offset - c->window->offset);
383
f8019684 384 return 1;
16e9f408
LP
385}
386
f8019684
LP
387static int find_mmap(
388 MMapCache *m,
be7cdd8e 389 MMapFileDescriptor *f,
f8019684
LP
390 int prot,
391 unsigned context,
392 bool keep_always,
393 uint64_t offset,
394 size_t size,
b42549ad
VC
395 void **ret,
396 size_t *ret_size) {
f8019684 397
f8019684
LP
398 Window *w;
399 Context *c;
16e9f408
LP
400
401 assert(m);
f8019684 402 assert(m->n_ref > 0);
be7cdd8e 403 assert(f);
f8019684 404 assert(size > 0);
16e9f408 405
fa6ac760
LP
406 if (f->sigbus)
407 return -EIO;
408
f8019684 409 LIST_FOREACH(by_fd, w, f->windows)
8c3d9662 410 if (window_matches(w, prot, offset, size))
f8019684 411 break;
16e9f408 412
f8019684
LP
413 if (!w)
414 return 0;
415
416 c = context_add(m, context);
417 if (!c)
418 return -ENOMEM;
419
420 context_attach_window(c, w);
739731cd 421 w->keep_always = w->keep_always || keep_always;
16e9f408 422
1b8951e5 423 *ret = (uint8_t*) w->ptr + (offset - w->offset);
b42549ad
VC
424 if (ret_size)
425 *ret_size = w->size - (offset - w->offset);
426
f8019684 427 return 1;
16e9f408
LP
428}
429
be7cdd8e 430static int mmap_try_harder(MMapCache *m, void *addr, MMapFileDescriptor *f, int prot, int flags, uint64_t offset, size_t size, void **res) {
db87967e
VC
431 void *ptr;
432
433 assert(m);
be7cdd8e 434 assert(f);
db87967e
VC
435 assert(res);
436
437 for (;;) {
438 int r;
439
be7cdd8e 440 ptr = mmap(addr, size, prot, flags, f->fd, offset);
db87967e
VC
441 if (ptr != MAP_FAILED)
442 break;
443 if (errno != ENOMEM)
3f0083a2 444 return negative_errno();
db87967e
VC
445
446 r = make_room(m);
447 if (r < 0)
448 return r;
449 if (r == 0)
450 return -ENOMEM;
451 }
452
453 *res = ptr;
454 return 0;
455}
456
f8019684 457static int add_mmap(
16e9f408 458 MMapCache *m,
be7cdd8e 459 MMapFileDescriptor *f,
16e9f408
LP
460 int prot,
461 unsigned context,
fcde2389 462 bool keep_always,
16e9f408 463 uint64_t offset,
f8019684 464 size_t size,
fcde2389 465 struct stat *st,
b42549ad
VC
466 void **ret,
467 size_t *ret_size) {
16e9f408 468
16e9f408 469 uint64_t woffset, wsize;
f8019684 470 Context *c;
f8019684
LP
471 Window *w;
472 void *d;
16e9f408
LP
473 int r;
474
475 assert(m);
f8019684 476 assert(m->n_ref > 0);
be7cdd8e 477 assert(f);
16e9f408 478 assert(size > 0);
1b8951e5 479 assert(ret);
16e9f408
LP
480
481 woffset = offset & ~((uint64_t) page_size() - 1ULL);
482 wsize = size + (offset - woffset);
483 wsize = PAGE_ALIGN(wsize);
484
485 if (wsize < WINDOW_SIZE) {
486 uint64_t delta;
487
beec0085 488 delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
16e9f408
LP
489
490 if (delta > offset)
491 woffset = 0;
492 else
493 woffset -= delta;
494
495 wsize = WINDOW_SIZE;
496 }
497
fcde2389
LP
498 if (st) {
499 /* Memory maps that are larger then the files
c5315881 500 underneath have undefined behavior. Hence, clamp
fcde2389
LP
501 things to the file size if we know it */
502
503 if (woffset >= (uint64_t) st->st_size)
504 return -EADDRNOTAVAIL;
505
506 if (woffset + wsize > (uint64_t) st->st_size)
507 wsize = PAGE_ALIGN(st->st_size - woffset);
508 }
509
be7cdd8e 510 r = mmap_try_harder(m, NULL, f, prot, MAP_SHARED, woffset, wsize, &d);
db87967e
VC
511 if (r < 0)
512 return r;
16e9f408 513
f8019684
LP
514 c = context_add(m, context);
515 if (!c)
b67ddc7b 516 goto outofmem;
16e9f408 517
6a491490 518 w = window_add(m, f, prot, keep_always, woffset, wsize, d);
f8019684 519 if (!w)
b67ddc7b 520 goto outofmem;
16e9f408 521
c7884da9 522 context_attach_window(c, w);
16e9f408 523
1b8951e5 524 *ret = (uint8_t*) w->ptr + (offset - w->offset);
b42549ad
VC
525 if (ret_size)
526 *ret_size = w->size - (offset - w->offset);
527
16e9f408 528 return 1;
b67ddc7b
PDS
529
530outofmem:
3f0083a2 531 (void) munmap(d, wsize);
b67ddc7b 532 return -ENOMEM;
16e9f408
LP
533}
534
535int mmap_cache_get(
536 MMapCache *m,
be7cdd8e 537 MMapFileDescriptor *f,
16e9f408
LP
538 int prot,
539 unsigned context,
fcde2389 540 bool keep_always,
16e9f408 541 uint64_t offset,
f8019684 542 size_t size,
fcde2389 543 struct stat *st,
b42549ad
VC
544 void **ret,
545 size_t *ret_size) {
16e9f408 546
16e9f408
LP
547 int r;
548
549 assert(m);
f8019684 550 assert(m->n_ref > 0);
be7cdd8e 551 assert(f);
16e9f408 552 assert(size > 0);
1b8951e5 553 assert(ret);
69adae51 554 assert(context < MMAP_CACHE_MAX_CONTEXTS);
16e9f408 555
f8019684 556 /* Check whether the current context is the right one already */
b42549ad 557 r = try_context(m, f, prot, context, keep_always, offset, size, ret, ret_size);
bf807d4d 558 if (r != 0) {
313cefa1 559 m->n_hit++;
16e9f408 560 return r;
bf807d4d 561 }
16e9f408 562
f8019684 563 /* Search for a matching mmap */
b42549ad 564 r = find_mmap(m, f, prot, context, keep_always, offset, size, ret, ret_size);
bf807d4d 565 if (r != 0) {
313cefa1 566 m->n_hit++;
16e9f408 567 return r;
bf807d4d
LP
568 }
569
570 m->n_missed++;
16e9f408 571
f8019684 572 /* Create a new mmap */
b42549ad 573 return add_mmap(m, f, prot, context, keep_always, offset, size, st, ret, ret_size);
ae97089d
ZJS
574}
575
fa6ac760
LP
576unsigned mmap_cache_get_hit(MMapCache *m) {
577 assert(m);
578
579 return m->n_hit;
580}
581
582unsigned mmap_cache_get_missed(MMapCache *m) {
583 assert(m);
584
585 return m->n_missed;
586}
587
588static void mmap_cache_process_sigbus(MMapCache *m) {
589 bool found = false;
be7cdd8e 590 MMapFileDescriptor *f;
fa6ac760
LP
591 Iterator i;
592 int r;
16e9f408
LP
593
594 assert(m);
16e9f408 595
fa6ac760
LP
596 /* Iterate through all triggered pages and mark their files as
597 * invalidated */
598 for (;;) {
599 bool ours;
600 void *addr;
601
602 r = sigbus_pop(&addr);
603 if (_likely_(r == 0))
604 break;
605 if (r < 0) {
606 log_error_errno(r, "SIGBUS handling failed: %m");
607 abort();
608 }
609
610 ours = false;
611 HASHMAP_FOREACH(f, m->fds, i) {
612 Window *w;
613
614 LIST_FOREACH(by_fd, w, f->windows) {
615 if ((uint8_t*) addr >= (uint8_t*) w->ptr &&
616 (uint8_t*) addr < (uint8_t*) w->ptr + w->size) {
617 found = ours = f->sigbus = true;
618 break;
619 }
620 }
621
622 if (ours)
623 break;
624 }
625
626 /* Didn't find a matching window, give up */
627 if (!ours) {
628 log_error("Unknown SIGBUS page, aborting.");
629 abort();
630 }
631 }
632
633 /* The list of triggered pages is now empty. Now, let's remap
634 * all windows of the triggered file to anonymous maps, so
635 * that no page of the file in question is triggered again, so
636 * that we can be sure not to hit the queue size limit. */
637 if (_likely_(!found))
16e9f408 638 return;
16e9f408 639
fa6ac760
LP
640 HASHMAP_FOREACH(f, m->fds, i) {
641 Window *w;
642
643 if (!f->sigbus)
644 continue;
645
646 LIST_FOREACH(by_fd, w, f->windows)
647 window_invalidate(w);
648 }
f8019684 649}
16e9f408 650
be7cdd8e 651bool mmap_cache_got_sigbus(MMapCache *m, MMapFileDescriptor *f) {
bf807d4d 652 assert(m);
be7cdd8e 653 assert(f);
bf807d4d 654
fa6ac760
LP
655 mmap_cache_process_sigbus(m);
656
fa6ac760 657 return f->sigbus;
bf807d4d
LP
658}
659
be7cdd8e
VC
660MMapFileDescriptor* mmap_cache_add_fd(MMapCache *m, int fd) {
661 MMapFileDescriptor *f;
662 int r;
fa6ac760 663
bf807d4d 664 assert(m);
fa6ac760 665 assert(fd >= 0);
bf807d4d 666
be7cdd8e
VC
667 f = hashmap_get(m->fds, FD_TO_PTR(fd));
668 if (f)
669 return f;
670
671 r = hashmap_ensure_allocated(&m->fds, NULL);
672 if (r < 0)
673 return NULL;
674
675 f = new0(MMapFileDescriptor, 1);
676 if (!f)
677 return NULL;
678
679 f->cache = m;
680 f->fd = fd;
681
682 r = hashmap_put(m->fds, FD_TO_PTR(fd), f);
683 if (r < 0)
684 return mfree(f);
685
686 return f;
687}
688
689void mmap_cache_free_fd(MMapCache *m, MMapFileDescriptor *f) {
690 assert(m);
691 assert(f);
692
fa6ac760
LP
693 /* Make sure that any queued SIGBUS are first dispatched, so
694 * that we don't end up with a SIGBUS entry we cannot relate
695 * to any existing memory map */
696
697 mmap_cache_process_sigbus(m);
698
be7cdd8e
VC
699 while (f->windows)
700 window_free(f->windows);
701
702 if (f->cache)
703 assert_se(hashmap_remove(f->cache->fds, FD_TO_PTR(f->fd)));
fa6ac760 704
be7cdd8e 705 free(f);
bf807d4d 706}