]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/readahead/readahead-collect.c
use "Out of memory." consistantly (or with "\n")
[thirdparty/systemd.git] / src / readahead / readahead-collect.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <inttypes.h>
24 #include <fcntl.h>
25 #include <linux/limits.h>
26 #include <stdbool.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/select.h>
31 #include <sys/time.h>
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <unistd.h>
35 #include <linux/fanotify.h>
36 #include <sys/signalfd.h>
37 #include <sys/poll.h>
38 #include <sys/mman.h>
39 #include <linux/fs.h>
40 #include <linux/fiemap.h>
41 #include <sys/ioctl.h>
42 #include <sys/vfs.h>
43 #include <getopt.h>
44 #include <sys/inotify.h>
45
46 #include <systemd/sd-daemon.h>
47
48 #include "missing.h"
49 #include "util.h"
50 #include "set.h"
51 #include "ioprio.h"
52 #include "readahead-common.h"
53 #include "virt.h"
54
55 /* fixme:
56 *
57 * - detect ssd on btrfs/lvm...
58 * - read ahead directories
59 * - gzip?
60 * - remount rw?
61 * - handle files where nothing is in mincore
62 * - does ioprio_set work with fadvise()?
63 */
64
65 static ReadaheadShared *shared = NULL;
66
67 /* Avoid collisions with the NULL pointer */
68 #define SECTOR_TO_PTR(s) ULONG_TO_PTR((s)+1)
69 #define PTR_TO_SECTOR(p) (PTR_TO_ULONG(p)-1)
70
71 static int btrfs_defrag(int fd) {
72 struct btrfs_ioctl_vol_args data;
73
74 zero(data);
75 data.fd = fd;
76
77 return ioctl(fd, BTRFS_IOC_DEFRAG, &data);
78 }
79
80 static int pack_file(FILE *pack, const char *fn, bool on_btrfs) {
81 struct stat st;
82 void *start = MAP_FAILED;
83 uint8_t *vec;
84 uint32_t b, c;
85 uint64_t inode;
86 size_t l, pages;
87 bool mapped;
88 int r = 0, fd = -1, k;
89
90 assert(pack);
91 assert(fn);
92
93 fd = open(fn, O_RDONLY|O_CLOEXEC|O_NOATIME|O_NOCTTY|O_NOFOLLOW);
94 if (fd < 0) {
95
96 if (errno == ENOENT)
97 return 0;
98
99 if (errno == EPERM || errno == EACCES)
100 return 0;
101
102 log_warning("open(%s) failed: %m", fn);
103 r = -errno;
104 goto finish;
105 }
106
107 k = file_verify(fd, fn, arg_file_size_max, &st);
108 if (k <= 0) {
109 r = k;
110 goto finish;
111 }
112
113 if (on_btrfs)
114 btrfs_defrag(fd);
115
116 l = PAGE_ALIGN(st.st_size);
117 start = mmap(NULL, l, PROT_READ, MAP_SHARED, fd, 0);
118 if (start == MAP_FAILED) {
119 log_warning("mmap(%s) failed: %m", fn);
120 r = -errno;
121 goto finish;
122 }
123
124 pages = l / page_size();
125 vec = alloca(pages);
126 memset(vec, 0, pages);
127 if (mincore(start, l, vec) < 0) {
128 log_warning("mincore(%s) failed: %m", fn);
129 r = -errno;
130 goto finish;
131 }
132
133 fputs(fn, pack);
134 fputc('\n', pack);
135
136 /* Store the inode, so that we notice when the file is deleted */
137 inode = (uint64_t) st.st_ino;
138 fwrite(&inode, sizeof(inode), 1, pack);
139
140 mapped = false;
141 for (c = 0; c < pages; c++) {
142 bool new_mapped = !!(vec[c] & 1);
143
144 if (!mapped && new_mapped)
145 b = c;
146 else if (mapped && !new_mapped) {
147 fwrite(&b, sizeof(b), 1, pack);
148 fwrite(&c, sizeof(c), 1, pack);
149
150 log_debug("%s: page %u to %u", fn, b, c);
151 }
152
153 mapped = new_mapped;
154 }
155
156 /* We don't write any range data if we should read the entire file */
157 if (mapped && b > 0) {
158 fwrite(&b, sizeof(b), 1, pack);
159 fwrite(&c, sizeof(c), 1, pack);
160
161 log_debug("%s: page %u to %u", fn, b, c);
162 }
163
164 /* End marker */
165 b = 0;
166 fwrite(&b, sizeof(b), 1, pack);
167 fwrite(&b, sizeof(b), 1, pack);
168
169 finish:
170 if (start != MAP_FAILED)
171 munmap(start, l);
172
173 if (fd >= 0)
174 close_nointr_nofail(fd);
175
176 return r;
177 }
178
179 static unsigned long fd_first_block(int fd) {
180 struct {
181 struct fiemap fiemap;
182 struct fiemap_extent extent;
183 } data;
184
185 zero(data);
186 data.fiemap.fm_length = ~0ULL;
187 data.fiemap.fm_extent_count = 1;
188
189 if (ioctl(fd, FS_IOC_FIEMAP, &data) < 0)
190 return 0;
191
192 if (data.fiemap.fm_mapped_extents <= 0)
193 return 0;
194
195 if (data.fiemap.fm_extents[0].fe_flags & FIEMAP_EXTENT_UNKNOWN)
196 return 0;
197
198 return (unsigned long) data.fiemap.fm_extents[0].fe_physical;
199 }
200
201 struct item {
202 const char *path;
203 unsigned long block;
204 };
205
206 static int qsort_compare(const void *a, const void *b) {
207 const struct item *i, *j;
208
209 i = a;
210 j = b;
211
212 if (i->block < j->block)
213 return -1;
214 if (i->block > j->block)
215 return 1;
216
217 return strcmp(i->path, j->path);
218 }
219
220 static int collect(const char *root) {
221 enum {
222 FD_FANOTIFY, /* Get the actual fs events */
223 FD_SIGNAL,
224 FD_INOTIFY, /* We get notifications to quit early via this fd */
225 _FD_MAX
226 };
227 struct pollfd pollfd[_FD_MAX];
228 int fanotify_fd = -1, signal_fd = -1, inotify_fd = -1, r = 0;
229 pid_t my_pid;
230 Hashmap *files = NULL;
231 Iterator i;
232 char *p, *q;
233 sigset_t mask;
234 FILE *pack = NULL;
235 char *pack_fn_new = NULL, *pack_fn = NULL;
236 bool on_ssd, on_btrfs;
237 struct statfs sfs;
238 usec_t not_after;
239 uint64_t previous_block_readahead;
240 bool previous_block_readahead_set = false;
241
242 assert(root);
243
244 if (asprintf(&pack_fn, "%s/.readahead", root) < 0) {
245 log_error("Out of memory.");
246 r = -ENOMEM;
247 goto finish;
248 }
249
250 /* If there's no pack file yet we lower the kernel readahead
251 * so that mincore() is accurate. If there is a pack file
252 * already we assume it is accurate enough so that kernel
253 * readahead is never triggered. */
254 previous_block_readahead_set =
255 access(pack_fn, F_OK) < 0 &&
256 block_get_readahead(root, &previous_block_readahead) >= 0 &&
257 block_set_readahead(root, 8*1024) >= 0;
258
259 if (ioprio_set(IOPRIO_WHO_PROCESS, getpid(), IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)) < 0)
260 log_warning("Failed to set IDLE IO priority class: %m");
261
262 assert_se(sigemptyset(&mask) == 0);
263 sigset_add_many(&mask, SIGINT, SIGTERM, -1);
264 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
265
266 if ((signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC)) < 0) {
267 log_error("signalfd(): %m");
268 r = -errno;
269 goto finish;
270 }
271
272 if (!(files = hashmap_new(string_hash_func, string_compare_func))) {
273 log_error("Failed to allocate set.");
274 r = -ENOMEM;
275 goto finish;
276 }
277
278 if ((fanotify_fd = fanotify_init(FAN_CLOEXEC|FAN_NONBLOCK, O_RDONLY|O_LARGEFILE|O_CLOEXEC|O_NOATIME)) < 0) {
279 log_error("Failed to create fanotify object: %m");
280 r = -errno;
281 goto finish;
282 }
283
284 if (fanotify_mark(fanotify_fd, FAN_MARK_ADD|FAN_MARK_MOUNT, FAN_OPEN, AT_FDCWD, root) < 0) {
285 log_error("Failed to mark %s: %m", root);
286 r = -errno;
287 goto finish;
288 }
289
290 if ((inotify_fd = open_inotify()) < 0) {
291 r = inotify_fd;
292 goto finish;
293 }
294
295 not_after = now(CLOCK_MONOTONIC) + arg_timeout;
296
297 my_pid = getpid();
298
299 zero(pollfd);
300 pollfd[FD_FANOTIFY].fd = fanotify_fd;
301 pollfd[FD_FANOTIFY].events = POLLIN;
302 pollfd[FD_SIGNAL].fd = signal_fd;
303 pollfd[FD_SIGNAL].events = POLLIN;
304 pollfd[FD_INOTIFY].fd = inotify_fd;
305 pollfd[FD_INOTIFY].events = POLLIN;
306
307 sd_notify(0,
308 "READY=1\n"
309 "STATUS=Collecting readahead data");
310
311 log_debug("Collecting...");
312
313 if (access("/run/systemd/readahead/cancel", F_OK) >= 0) {
314 log_debug("Collection canceled");
315 r = -ECANCELED;
316 goto finish;
317 }
318
319 if (access("/run/systemd/readahead/done", F_OK) >= 0) {
320 log_debug("Got termination request");
321 goto done;
322 }
323
324 for (;;) {
325 union {
326 struct fanotify_event_metadata metadata;
327 char buffer[4096];
328 } data;
329 ssize_t n;
330 struct fanotify_event_metadata *m;
331 usec_t t;
332 int h;
333
334 if (hashmap_size(files) > arg_files_max) {
335 log_debug("Reached maximum number of read ahead files, ending collection.");
336 break;
337 }
338
339 t = now(CLOCK_MONOTONIC);
340 if (t >= not_after) {
341 log_debug("Reached maximum collection time, ending collection.");
342 break;
343 }
344
345 if ((h = poll(pollfd, _FD_MAX, (int) ((not_after - t) / USEC_PER_MSEC))) < 0) {
346
347 if (errno == EINTR)
348 continue;
349
350 log_error("poll(): %m");
351 r = -errno;
352 goto finish;
353 }
354
355 if (h == 0) {
356 log_debug("Reached maximum collection time, ending collection.");
357 break;
358 }
359
360 if (pollfd[FD_SIGNAL].revents) {
361 log_debug("Got signal.");
362 break;
363 }
364
365 if (pollfd[FD_INOTIFY].revents) {
366 uint8_t inotify_buffer[sizeof(struct inotify_event) + FILENAME_MAX];
367 struct inotify_event *e;
368
369 if ((n = read(inotify_fd, &inotify_buffer, sizeof(inotify_buffer))) < 0) {
370 if (errno == EINTR || errno == EAGAIN)
371 continue;
372
373 log_error("Failed to read inotify event: %m");
374 r = -errno;
375 goto finish;
376 }
377
378 e = (struct inotify_event*) inotify_buffer;
379 while (n > 0) {
380 size_t step;
381
382 if ((e->mask & IN_CREATE) && streq(e->name, "cancel")) {
383 log_debug("Collection canceled");
384 r = -ECANCELED;
385 goto finish;
386 }
387
388 if ((e->mask & IN_CREATE) && streq(e->name, "done")) {
389 log_debug("Got termination request");
390 goto done;
391 }
392
393 step = sizeof(struct inotify_event) + e->len;
394 assert(step <= (size_t) n);
395
396 e = (struct inotify_event*) ((uint8_t*) e + step);
397 n -= step;
398 }
399 }
400
401 if ((n = read(fanotify_fd, &data, sizeof(data))) < 0) {
402
403 if (errno == EINTR || errno == EAGAIN)
404 continue;
405
406 /* fanotify sometimes returns EACCES on read()
407 * where it shouldn't. For now let's just
408 * ignore it here (which is safe), but
409 * eventually this should be
410 * dropped when the kernel is fixed.
411 *
412 * https://bugzilla.redhat.com/show_bug.cgi?id=707577 */
413 if (errno == EACCES)
414 continue;
415
416 log_error("Failed to read event: %m");
417 r = -errno;
418 goto finish;
419 }
420
421 for (m = &data.metadata; FAN_EVENT_OK(m, n); m = FAN_EVENT_NEXT(m, n)) {
422 char fn[PATH_MAX];
423 int k;
424
425 if (m->fd < 0)
426 goto next_iteration;
427
428 if (m->pid == my_pid)
429 goto next_iteration;
430
431 __sync_synchronize();
432 if (m->pid == shared->replay)
433 goto next_iteration;
434
435 snprintf(fn, sizeof(fn), "/proc/self/fd/%i", m->fd);
436 char_array_0(fn);
437
438 if ((k = readlink_malloc(fn, &p)) >= 0) {
439 if (startswith(p, "/tmp") ||
440 endswith(p, " (deleted)") ||
441 hashmap_get(files, p))
442 /* Not interesting, or
443 * already read */
444 free(p);
445 else {
446 unsigned long ul;
447
448 ul = fd_first_block(m->fd);
449
450 if ((k = hashmap_put(files, p, SECTOR_TO_PTR(ul))) < 0) {
451 log_warning("set_put() failed: %s", strerror(-k));
452 free(p);
453 }
454 }
455
456 } else
457 log_warning("readlink(%s) failed: %s", fn, strerror(-k));
458
459 next_iteration:
460 if (m->fd)
461 close_nointr_nofail(m->fd);
462 }
463 }
464
465 done:
466 if (fanotify_fd >= 0) {
467 close_nointr_nofail(fanotify_fd);
468 fanotify_fd = -1;
469 }
470
471 log_debug("Writing Pack File...");
472
473 on_ssd = fs_on_ssd(root) > 0;
474 log_debug("On SSD: %s", yes_no(on_ssd));
475
476 on_btrfs = statfs(root, &sfs) >= 0 && (long) sfs.f_type == (long) BTRFS_SUPER_MAGIC;
477 log_debug("On btrfs: %s", yes_no(on_btrfs));
478
479 if (asprintf(&pack_fn_new, "%s/.readahead.new", root) < 0) {
480 log_error("Out of memory.");
481 r = -ENOMEM;
482 goto finish;
483 }
484
485 pack = fopen(pack_fn_new, "we");
486 if (!pack) {
487 log_error("Failed to open pack file: %m");
488 r = -errno;
489 goto finish;
490 }
491
492 fputs(CANONICAL_HOST READAHEAD_PACK_FILE_VERSION, pack);
493 putc(on_ssd ? 'S' : 'R', pack);
494
495 if (on_ssd || on_btrfs) {
496
497 /* On SSD or on btrfs, just write things out in the
498 * order the files were accessed. */
499
500 HASHMAP_FOREACH_KEY(q, p, files, i)
501 pack_file(pack, p, on_btrfs);
502 } else {
503 struct item *ordered, *j;
504 unsigned k, n;
505
506 /* On rotating media, order things by the block
507 * numbers */
508
509 log_debug("Ordering...");
510
511 n = hashmap_size(files);
512 if (!(ordered = new(struct item, n))) {
513 log_error("Out of memory.");
514 r = -ENOMEM;
515 goto finish;
516 }
517
518 j = ordered;
519 HASHMAP_FOREACH_KEY(q, p, files, i) {
520 j->path = p;
521 j->block = PTR_TO_SECTOR(q);
522 j++;
523 }
524
525 assert(ordered + n == j);
526
527 qsort(ordered, n, sizeof(struct item), qsort_compare);
528
529 for (k = 0; k < n; k++)
530 pack_file(pack, ordered[k].path, on_btrfs);
531
532 free(ordered);
533 }
534
535 log_debug("Finalizing...");
536
537 fflush(pack);
538
539 if (ferror(pack)) {
540 log_error("Failed to write pack file.");
541 r = -EIO;
542 goto finish;
543 }
544
545 if (rename(pack_fn_new, pack_fn) < 0) {
546 log_error("Failed to rename readahead file: %m");
547 r = -errno;
548 goto finish;
549 }
550
551 fclose(pack);
552 pack = NULL;
553
554 log_debug("Done.");
555
556 finish:
557 if (fanotify_fd >= 0)
558 close_nointr_nofail(fanotify_fd);
559
560 if (signal_fd >= 0)
561 close_nointr_nofail(signal_fd);
562
563 if (inotify_fd >= 0)
564 close_nointr_nofail(inotify_fd);
565
566 if (pack) {
567 fclose(pack);
568 unlink(pack_fn_new);
569 }
570 free(pack_fn_new);
571 free(pack_fn);
572
573 while ((p = hashmap_steal_first_key(files)))
574 free(p);
575
576 hashmap_free(files);
577
578 if (previous_block_readahead_set) {
579 uint64_t bytes;
580
581 /* Restore the original kernel readahead setting if we
582 * changed it, and nobody has overwritten it since
583 * yet. */
584 if (block_get_readahead(root, &bytes) >= 0 && bytes == 8*1024)
585 block_set_readahead(root, previous_block_readahead);
586 }
587
588 return r;
589 }
590
591 int main_collect(const char *root) {
592
593 if (!root)
594 root = "/";
595
596 /* Skip this step on read-only media. Note that we check the
597 * underlying block device here, not he read-only flag of the
598 * file system on top, since that one is most likely mounted
599 * read-only anyway at boot, even if the underlying block
600 * device is theoretically writable. */
601 if (fs_on_read_only(root) > 0) {
602 log_info("Disabling readahead collector due to read-only media.");
603 return EXIT_SUCCESS;
604 }
605
606 if (!enough_ram()) {
607 log_info("Disabling readahead collector due to low memory.");
608 return EXIT_SUCCESS;
609 }
610
611 shared = shared_get();
612 if (!shared)
613 return EXIT_FAILURE;
614
615 shared->collect = getpid();
616 __sync_synchronize();
617
618 if (collect(root) < 0)
619 return EXIT_FAILURE;
620
621 return EXIT_SUCCESS;
622 }