]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/mmap-cache.c
journal: completely rework the mmap cache as I too dumb to actually understand it
[thirdparty/systemd.git] / src / journal / mmap-cache.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2012 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <stdlib.h>
24 #include <sys/mman.h>
25 #include <string.h>
26
27 #include "hashmap.h"
28 #include "list.h"
29 #include "log.h"
30 #include "util.h"
31 #include "macro.h"
32 #include "mmap-cache.h"
33
34 typedef struct Window Window;
35 typedef struct Context Context;
36 typedef struct FileDescriptor FileDescriptor;
37
38 struct Window {
39 MMapCache *cache;
40
41 bool keep_always;
42 bool in_unused;
43
44 void *ptr;
45 uint64_t offset;
46 int prot;
47 size_t size;
48
49 FileDescriptor *fd;
50
51 LIST_FIELDS(Window, by_fd);
52 LIST_FIELDS(Window, unused);
53
54 LIST_HEAD(Context, contexts);
55 };
56
57 struct Context {
58 MMapCache *cache;
59 unsigned id;
60 Window *window;
61
62 LIST_FIELDS(Context, by_window);
63 };
64
65 struct FileDescriptor {
66 MMapCache *cache;
67 int fd;
68 LIST_HEAD(Window, windows);
69 };
70
71 struct MMapCache {
72 int n_ref;
73
74 Hashmap *fds;
75 Hashmap *contexts;
76
77 unsigned n_windows;
78
79 LIST_HEAD(Window, unused);
80 Window *last_unused;
81 };
82
83 #define WINDOWS_MIN 64
84 #define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
85
86 MMapCache* mmap_cache_new(void) {
87 MMapCache *m;
88
89 m = new0(MMapCache, 1);
90 if (!m)
91 return NULL;
92
93 m->n_ref = 1;
94 return m;
95 }
96
97 MMapCache* mmap_cache_ref(MMapCache *m) {
98 assert(m);
99 assert(m->n_ref > 0);
100
101 m->n_ref ++;
102 return m;
103 }
104
105 static void window_unlink(Window *w) {
106 Context *c;
107
108 assert(w);
109
110 if (w->ptr)
111 munmap(w->ptr, w->size);
112
113 if (w->fd)
114 LIST_REMOVE(Window, by_fd, w->fd->windows, w);
115
116 if (w->in_unused) {
117 if (w->cache->last_unused == w)
118 w->cache->last_unused = w->unused_prev;
119
120 LIST_REMOVE(Window, unused, w->cache->unused, w);
121 }
122
123 LIST_FOREACH(by_window, c, w->contexts) {
124 assert(c->window == w);
125 c->window = NULL;
126 }
127 }
128
129 static void window_free(Window *w) {
130 assert(w);
131
132 window_unlink(w);
133 free(w);
134 }
135
136 static bool window_matches(Window *w, int fd, int prot, uint64_t offset, size_t size) {
137 assert(w);
138 assert(fd >= 0);
139 assert(size > 0);
140
141 return
142 w->fd &&
143 fd == w->fd->fd &&
144 prot == w->prot &&
145 offset >= w->offset &&
146 offset + size <= w->offset + w->size;
147 }
148
149 static Window *window_add(MMapCache *m) {
150 Window *w;
151
152 assert(m);
153
154 if (!m->last_unused || m->n_windows <= WINDOWS_MIN) {
155
156 /* Allocate a new window */
157 w = new0(Window, 1);
158 if (!w)
159 return NULL;
160 } else {
161
162 /* Reuse an existing one */
163 w = m->last_unused;
164 window_unlink(w);
165 zero(*w);
166 }
167
168 w->cache = m;
169 return w;
170 }
171
172 static void context_detach_window(Context *c) {
173 Window *w;
174
175 assert(c);
176
177 if (!c->window)
178 return;
179
180 w = c->window;
181 c->window = NULL;
182 LIST_REMOVE(Context, by_window, w->contexts, c);
183
184 if (!w->contexts) {
185 /* Not used anymore? */
186 LIST_PREPEND(Window, unused, c->cache->unused, w);
187 if (!c->cache->last_unused)
188 c->cache->last_unused = w;
189
190 w->in_unused = true;
191 }
192 }
193
194 static void context_attach_window(Context *c, Window *w) {
195 assert(c);
196 assert(w);
197
198 if (c->window == w)
199 return;
200
201 context_detach_window(c);
202
203 if (!w->contexts) {
204 /* Used again? */
205 LIST_REMOVE(Window, unused, c->cache->unused, w);
206 if (!c->cache->last_unused)
207 c->cache->last_unused = w;
208
209 w->in_unused = false;
210 }
211
212 c->window = w;
213 LIST_PREPEND(Context, by_window, w->contexts, c);
214 }
215
216 static Context *context_add(MMapCache *m, unsigned id) {
217 Context *c;
218 int r;
219
220 assert(m);
221
222 c = hashmap_get(m->contexts, UINT_TO_PTR(id + 1));
223 if (c)
224 return c;
225
226 r = hashmap_ensure_allocated(&m->contexts, trivial_hash_func, trivial_compare_func);
227 if (r < 0)
228 return NULL;
229
230 c = new0(Context, 1);
231 if (!c)
232 return NULL;
233
234 c->cache = m;
235 c->id = id;
236
237 r = hashmap_put(m->contexts, UINT_TO_PTR(id + 1), c);
238 if (r < 0) {
239 free(c);
240 return NULL;
241 }
242
243 return c;
244 }
245
246 static void context_free(Context *c) {
247 assert(c);
248
249 context_detach_window(c);
250
251 if (c->cache)
252 assert_se(hashmap_remove(c->cache->contexts, UINT_TO_PTR(c->id + 1)));
253
254 free(c);
255 }
256
257 static void fd_free(FileDescriptor *f) {
258 assert(f);
259
260 while (f->windows)
261 window_free(f->windows);
262
263 if (f->cache)
264 assert_se(hashmap_remove(f->cache->fds, INT_TO_PTR(f->fd + 1)));
265
266 free(f);
267 }
268
269 static FileDescriptor* fd_add(MMapCache *m, int fd) {
270 FileDescriptor *f;
271 int r;
272
273 assert(m);
274 assert(fd >= 0);
275
276 f = hashmap_get(m->fds, INT_TO_PTR(fd + 1));
277 if (f)
278 return f;
279
280 r = hashmap_ensure_allocated(&m->fds, trivial_hash_func, trivial_compare_func);
281 if (r < 0)
282 return NULL;
283
284 f = new0(FileDescriptor, 1);
285 if (!f)
286 return NULL;
287
288 f->cache = m;
289 f->fd = fd;
290
291 r = hashmap_put(m->fds, UINT_TO_PTR(fd + 1), f);
292 if (r < 0) {
293 free(f);
294 return NULL;
295 }
296
297 return f;
298 }
299
300 static void mmap_cache_free(MMapCache *m) {
301 Context *c;
302 FileDescriptor *f;
303
304 assert(m);
305
306 while ((c = hashmap_first(m->contexts)))
307 context_free(c);
308
309 while ((f = hashmap_first(m->fds)))
310 fd_free(f);
311
312 while (m->unused)
313 window_free(m->unused);
314
315 free(m);
316 }
317
318 MMapCache* mmap_cache_unref(MMapCache *m) {
319 assert(m);
320 assert(m->n_ref > 0);
321
322 m->n_ref --;
323 if (m->n_ref == 0)
324 mmap_cache_free(m);
325
326 return NULL;
327 }
328
329 static int make_room(MMapCache *m) {
330 assert(m);
331
332 if (!m->last_unused)
333 return 0;
334
335 window_free(m->last_unused);
336 return 1;
337 }
338
339 static int try_context(
340 MMapCache *m,
341 int fd,
342 int prot,
343 unsigned context,
344 bool keep_always,
345 uint64_t offset,
346 size_t size,
347 void **ret) {
348
349 Context *c;
350
351 assert(m);
352 assert(m->n_ref > 0);
353 assert(fd >= 0);
354 assert(size > 0);
355 assert(ret);
356
357 c = hashmap_get(m->contexts, UINT_TO_PTR(context+1));
358 if (!c)
359 return 0;
360
361 assert(c->id == context);
362
363 if (!c->window)
364 return 0;
365
366 if (!window_matches(c->window, fd, prot, offset, size)) {
367
368 /* Drop the reference to the window, since it's unnecessary now */
369 context_detach_window(c);
370 return 0;
371 }
372
373 c->window->keep_always = c->window->keep_always || keep_always;
374
375 *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset);
376 return 1;
377 }
378
379 static int find_mmap(
380 MMapCache *m,
381 int fd,
382 int prot,
383 unsigned context,
384 bool keep_always,
385 uint64_t offset,
386 size_t size,
387 void **ret) {
388
389 FileDescriptor *f;
390 Window *w;
391 Context *c;
392
393 assert(m);
394 assert(m->n_ref > 0);
395 assert(fd >= 0);
396 assert(size > 0);
397 assert(ret);
398
399 f = hashmap_get(m->fds, INT_TO_PTR(fd + 1));
400 if (!f)
401 return 0;
402
403 assert(f->fd == fd);
404
405 LIST_FOREACH(by_fd, w, f->windows)
406 if (window_matches(w, fd, prot, offset, size))
407 break;
408
409 if (!w)
410 return 0;
411
412 c = context_add(m, context);
413 if (!c)
414 return -ENOMEM;
415
416 context_attach_window(c, w);
417 w->keep_always = w->keep_always || keep_always;
418
419 *ret = (uint8_t*) w->ptr + (offset - w->offset);
420 return 1;
421 }
422
423 static int add_mmap(
424 MMapCache *m,
425 int fd,
426 int prot,
427 unsigned context,
428 bool keep_always,
429 uint64_t offset,
430 size_t size,
431 struct stat *st,
432 void **ret) {
433
434 uint64_t woffset, wsize;
435 Context *c;
436 FileDescriptor *f;
437 Window *w;
438 void *d;
439 int r;
440
441 assert(m);
442 assert(m->n_ref > 0);
443 assert(fd >= 0);
444 assert(size > 0);
445 assert(ret);
446
447 woffset = offset & ~((uint64_t) page_size() - 1ULL);
448 wsize = size + (offset - woffset);
449 wsize = PAGE_ALIGN(wsize);
450
451 if (wsize < WINDOW_SIZE) {
452 uint64_t delta;
453
454 delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
455
456 if (delta > offset)
457 woffset = 0;
458 else
459 woffset -= delta;
460
461 wsize = WINDOW_SIZE;
462 }
463
464 if (st) {
465 /* Memory maps that are larger then the files
466 underneath have undefined behavior. Hence, clamp
467 things to the file size if we know it */
468
469 if (woffset >= (uint64_t) st->st_size)
470 return -EADDRNOTAVAIL;
471
472 if (woffset + wsize > (uint64_t) st->st_size)
473 wsize = PAGE_ALIGN(st->st_size - woffset);
474 }
475
476 for (;;) {
477 d = mmap(NULL, wsize, prot, MAP_SHARED, fd, woffset);
478 if (d != MAP_FAILED)
479 break;
480 if (errno != ENOMEM)
481 return -errno;
482
483 r = make_room(m);
484 if (r < 0)
485 return r;
486 if (r == 0)
487 return -ENOMEM;
488 }
489
490 c = context_add(m, context);
491 if (!c)
492 return -ENOMEM;
493
494 f = fd_add(m, fd);
495 if (!f)
496 return -ENOMEM;
497
498 w = window_add(m);
499 if (!w)
500 return -ENOMEM;
501
502 w->keep_always = keep_always;
503 w->ptr = d;
504 w->offset = woffset;
505 w->prot = prot;
506 w->size = wsize;
507 w->fd = f;
508
509 LIST_PREPEND(Window, by_fd, f->windows, w);
510
511 context_detach_window(c);
512 c->window = w;
513 LIST_PREPEND(Context, by_window, w->contexts, c);
514
515 *ret = (uint8_t*) w->ptr + (offset - w->offset);
516 return 1;
517 }
518
519 int mmap_cache_get(
520 MMapCache *m,
521 int fd,
522 int prot,
523 unsigned context,
524 bool keep_always,
525 uint64_t offset,
526 size_t size,
527 struct stat *st,
528 void **ret) {
529
530 int r;
531
532 assert(m);
533 assert(m->n_ref > 0);
534 assert(fd >= 0);
535 assert(size > 0);
536 assert(ret);
537
538 /* Check whether the current context is the right one already */
539 r = try_context(m, fd, prot, context, keep_always, offset, size, ret);
540 if (r != 0)
541 return r;
542
543 /* Search for a matching mmap */
544 r = find_mmap(m, fd, prot, context, keep_always, offset, size, ret);
545 if (r != 0)
546 return r;
547
548 /* Create a new mmap */
549 return add_mmap(m, fd, prot, context, keep_always, offset, size, st, ret);
550 }
551
552 void mmap_cache_close_fd(MMapCache *m, int fd) {
553 FileDescriptor *f;
554
555 assert(m);
556 assert(fd >= 0);
557
558 f = hashmap_get(m->fds, INT_TO_PTR(fd + 1));
559 if (!f)
560 return;
561
562 fd_free(f);
563 }
564
565 void mmap_cache_close_context(MMapCache *m, unsigned context) {
566 Context *c;
567
568 assert(m);
569
570 c = hashmap_get(m->contexts, UINT_TO_PTR(context + 1));
571 if (!c)
572 return;
573
574 context_free(c);
575 }