]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/machine-image.c
Merge pull request #1542 from keszybz/journal-audit-optional
[thirdparty/systemd.git] / src / shared / machine-image.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2013 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <fcntl.h>
23 #include <linux/fs.h>
24 #include <sys/statfs.h>
25
26 #include "btrfs-util.h"
27 #include "copy.h"
28 #include "mkdir.h"
29 #include "path-util.h"
30 #include "rm-rf.h"
31 #include "strv.h"
32 #include "utf8.h"
33
34 #include "machine-image.h"
35
36 static const char image_search_path[] =
37 "/var/lib/machines\0"
38 "/var/lib/container\0" /* legacy */
39 "/usr/local/lib/machines\0"
40 "/usr/lib/machines\0";
41
42 Image *image_unref(Image *i) {
43 if (!i)
44 return NULL;
45
46 free(i->name);
47 free(i->path);
48 free(i);
49 return NULL;
50 }
51
52 static char **image_settings_path(Image *image) {
53 _cleanup_strv_free_ char **l = NULL;
54 char **ret;
55 const char *fn, *s;
56 unsigned i = 0;
57
58 assert(image);
59
60 l = new0(char*, 4);
61 if (!l)
62 return NULL;
63
64 fn = strjoina(image->name, ".nspawn");
65
66 FOREACH_STRING(s, "/etc/systemd/nspawn/", "/run/systemd/nspawn/") {
67 l[i] = strappend(s, fn);
68 if (!l[i])
69 return NULL;
70
71 i++;
72 }
73
74 l[i] = file_in_same_dir(image->path, fn);
75 if (!l[i])
76 return NULL;
77
78 ret = l;
79 l = NULL;
80
81 return ret;
82 }
83
84 static int image_new(
85 ImageType t,
86 const char *pretty,
87 const char *path,
88 const char *filename,
89 bool read_only,
90 usec_t crtime,
91 usec_t mtime,
92 Image **ret) {
93
94 _cleanup_(image_unrefp) Image *i = NULL;
95
96 assert(t >= 0);
97 assert(t < _IMAGE_TYPE_MAX);
98 assert(pretty);
99 assert(filename);
100 assert(ret);
101
102 i = new0(Image, 1);
103 if (!i)
104 return -ENOMEM;
105
106 i->type = t;
107 i->read_only = read_only;
108 i->crtime = crtime;
109 i->mtime = mtime;
110 i->usage = i->usage_exclusive = (uint64_t) -1;
111 i->limit = i->limit_exclusive = (uint64_t) -1;
112
113 i->name = strdup(pretty);
114 if (!i->name)
115 return -ENOMEM;
116
117 if (path)
118 i->path = strjoin(path, "/", filename, NULL);
119 else
120 i->path = strdup(filename);
121
122 if (!i->path)
123 return -ENOMEM;
124
125 path_kill_slashes(i->path);
126
127 *ret = i;
128 i = NULL;
129
130 return 0;
131 }
132
133 static int image_make(
134 const char *pretty,
135 int dfd,
136 const char *path,
137 const char *filename,
138 Image **ret) {
139
140 struct stat st;
141 bool read_only;
142 int r;
143
144 assert(filename);
145
146 /* We explicitly *do* follow symlinks here, since we want to
147 * allow symlinking trees into /var/lib/machines/, and treat
148 * them normally. */
149
150 if (fstatat(dfd, filename, &st, 0) < 0)
151 return -errno;
152
153 read_only =
154 (path && path_startswith(path, "/usr")) ||
155 (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
156
157 if (S_ISDIR(st.st_mode)) {
158 _cleanup_close_ int fd = -1;
159 unsigned file_attr = 0;
160
161 if (!ret)
162 return 1;
163
164 if (!pretty)
165 pretty = filename;
166
167 fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
168 if (fd < 0)
169 return -errno;
170
171 /* btrfs subvolumes have inode 256 */
172 if (st.st_ino == 256) {
173
174 r = btrfs_is_filesystem(fd);
175 if (r < 0)
176 return r;
177 if (r) {
178 BtrfsSubvolInfo info;
179 BtrfsQuotaInfo quota;
180
181 /* It's a btrfs subvolume */
182
183 r = btrfs_subvol_get_info_fd(fd, &info);
184 if (r < 0)
185 return r;
186
187 r = image_new(IMAGE_SUBVOLUME,
188 pretty,
189 path,
190 filename,
191 info.read_only || read_only,
192 info.otime,
193 0,
194 ret);
195 if (r < 0)
196 return r;
197
198 r = btrfs_subvol_get_quota_fd(fd, &quota);
199 if (r >= 0) {
200 (*ret)->usage = quota.referenced;
201 (*ret)->usage_exclusive = quota.exclusive;
202
203 (*ret)->limit = quota.referenced_max;
204 (*ret)->limit_exclusive = quota.exclusive_max;
205 }
206
207 return 1;
208 }
209 }
210
211 /* If the IMMUTABLE bit is set, we consider the
212 * directory read-only. Since the ioctl is not
213 * supported everywhere we ignore failures. */
214 (void) read_attr_fd(fd, &file_attr);
215
216 /* It's just a normal directory. */
217 r = image_new(IMAGE_DIRECTORY,
218 pretty,
219 path,
220 filename,
221 read_only || (file_attr & FS_IMMUTABLE_FL),
222 0,
223 0,
224 ret);
225 if (r < 0)
226 return r;
227
228 return 1;
229
230 } else if (S_ISREG(st.st_mode) && endswith(filename, ".raw")) {
231 usec_t crtime = 0;
232
233 /* It's a RAW disk image */
234
235 if (!ret)
236 return 1;
237
238 fd_getcrtime_at(dfd, filename, &crtime, 0);
239
240 if (!pretty)
241 pretty = strndupa(filename, strlen(filename) - 4);
242
243 r = image_new(IMAGE_RAW,
244 pretty,
245 path,
246 filename,
247 !(st.st_mode & 0222) || read_only,
248 crtime,
249 timespec_load(&st.st_mtim),
250 ret);
251 if (r < 0)
252 return r;
253
254 (*ret)->usage = (*ret)->usage_exclusive = st.st_blocks * 512;
255 (*ret)->limit = (*ret)->limit_exclusive = st.st_size;
256
257 return 1;
258 }
259
260 return 0;
261 }
262
263 int image_find(const char *name, Image **ret) {
264 const char *path;
265 int r;
266
267 assert(name);
268
269 /* There are no images with invalid names */
270 if (!image_name_is_valid(name))
271 return 0;
272
273 NULSTR_FOREACH(path, image_search_path) {
274 _cleanup_closedir_ DIR *d = NULL;
275
276 d = opendir(path);
277 if (!d) {
278 if (errno == ENOENT)
279 continue;
280
281 return -errno;
282 }
283
284 r = image_make(NULL, dirfd(d), path, name, ret);
285 if (r == 0 || r == -ENOENT) {
286 _cleanup_free_ char *raw = NULL;
287
288 raw = strappend(name, ".raw");
289 if (!raw)
290 return -ENOMEM;
291
292 r = image_make(NULL, dirfd(d), path, raw, ret);
293 if (r == 0 || r == -ENOENT)
294 continue;
295 }
296 if (r < 0)
297 return r;
298
299 return 1;
300 }
301
302 if (streq(name, ".host"))
303 return image_make(".host", AT_FDCWD, NULL, "/", ret);
304
305 return 0;
306 };
307
308 int image_discover(Hashmap *h) {
309 const char *path;
310 int r;
311
312 assert(h);
313
314 NULSTR_FOREACH(path, image_search_path) {
315 _cleanup_closedir_ DIR *d = NULL;
316 struct dirent *de;
317
318 d = opendir(path);
319 if (!d) {
320 if (errno == ENOENT)
321 continue;
322
323 return -errno;
324 }
325
326 FOREACH_DIRENT_ALL(de, d, return -errno) {
327 _cleanup_(image_unrefp) Image *image = NULL;
328
329 if (!image_name_is_valid(de->d_name))
330 continue;
331
332 if (hashmap_contains(h, de->d_name))
333 continue;
334
335 r = image_make(NULL, dirfd(d), path, de->d_name, &image);
336 if (r == 0 || r == -ENOENT)
337 continue;
338 if (r < 0)
339 return r;
340
341 r = hashmap_put(h, image->name, image);
342 if (r < 0)
343 return r;
344
345 image = NULL;
346 }
347 }
348
349 if (!hashmap_contains(h, ".host")) {
350 _cleanup_(image_unrefp) Image *image = NULL;
351
352 r = image_make(".host", AT_FDCWD, NULL, "/", &image);
353 if (r < 0)
354 return r;
355
356 r = hashmap_put(h, image->name, image);
357 if (r < 0)
358 return r;
359
360 image = NULL;
361
362 }
363
364 return 0;
365 }
366
367 void image_hashmap_free(Hashmap *map) {
368 Image *i;
369
370 while ((i = hashmap_steal_first(map)))
371 image_unref(i);
372
373 hashmap_free(map);
374 }
375
376 int image_remove(Image *i) {
377 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
378 _cleanup_strv_free_ char **settings = NULL;
379 char **j;
380 int r;
381
382 assert(i);
383
384 if (path_equal(i->path, "/") ||
385 path_startswith(i->path, "/usr"))
386 return -EROFS;
387
388 settings = image_settings_path(i);
389 if (!settings)
390 return -ENOMEM;
391
392 /* Make sure we don't interfere with a running nspawn */
393 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
394 if (r < 0)
395 return r;
396
397 switch (i->type) {
398
399 case IMAGE_SUBVOLUME:
400 r = btrfs_subvol_remove(i->path, true);
401 if (r < 0)
402 return r;
403 break;
404
405 case IMAGE_DIRECTORY:
406 /* Allow deletion of read-only directories */
407 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
408 r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
409 if (r < 0)
410 return r;
411
412 break;
413
414 case IMAGE_RAW:
415 if (unlink(i->path) < 0)
416 return -errno;
417 break;
418
419 default:
420 return -EOPNOTSUPP;
421 }
422
423 STRV_FOREACH(j, settings) {
424 if (unlink(*j) < 0 && errno != ENOENT)
425 log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j);
426 }
427
428 return 0;
429 }
430
431 static int rename_settings_file(const char *path, const char *new_name) {
432 _cleanup_free_ char *rs = NULL;
433 const char *fn;
434
435 fn = strjoina(new_name, ".nspawn");
436
437 rs = file_in_same_dir(path, fn);
438 if (!rs)
439 return -ENOMEM;
440
441 return rename_noreplace(AT_FDCWD, path, AT_FDCWD, rs);
442 }
443
444 int image_rename(Image *i, const char *new_name) {
445 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
446 _cleanup_free_ char *new_path = NULL, *nn = NULL;
447 _cleanup_strv_free_ char **settings = NULL;
448 unsigned file_attr = 0;
449 char **j;
450 int r;
451
452 assert(i);
453
454 if (!image_name_is_valid(new_name))
455 return -EINVAL;
456
457 if (path_equal(i->path, "/") ||
458 path_startswith(i->path, "/usr"))
459 return -EROFS;
460
461 settings = image_settings_path(i);
462 if (!settings)
463 return -ENOMEM;
464
465 /* Make sure we don't interfere with a running nspawn */
466 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
467 if (r < 0)
468 return r;
469
470 /* Make sure nobody takes the new name, between the time we
471 * checked it is currently unused in all search paths, and the
472 * time we take possesion of it */
473 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
474 if (r < 0)
475 return r;
476
477 r = image_find(new_name, NULL);
478 if (r < 0)
479 return r;
480 if (r > 0)
481 return -EEXIST;
482
483 switch (i->type) {
484
485 case IMAGE_DIRECTORY:
486 /* Turn of the immutable bit while we rename the image, so that we can rename it */
487 (void) read_attr_path(i->path, &file_attr);
488
489 if (file_attr & FS_IMMUTABLE_FL)
490 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
491
492 /* fall through */
493
494 case IMAGE_SUBVOLUME:
495 new_path = file_in_same_dir(i->path, new_name);
496 break;
497
498 case IMAGE_RAW: {
499 const char *fn;
500
501 fn = strjoina(new_name, ".raw");
502 new_path = file_in_same_dir(i->path, fn);
503 break;
504 }
505
506 default:
507 return -EOPNOTSUPP;
508 }
509
510 if (!new_path)
511 return -ENOMEM;
512
513 nn = strdup(new_name);
514 if (!nn)
515 return -ENOMEM;
516
517 r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
518 if (r < 0)
519 return r;
520
521 /* Restore the immutable bit, if it was set before */
522 if (file_attr & FS_IMMUTABLE_FL)
523 (void) chattr_path(new_path, true, FS_IMMUTABLE_FL);
524
525 free(i->path);
526 i->path = new_path;
527 new_path = NULL;
528
529 free(i->name);
530 i->name = nn;
531 nn = NULL;
532
533 STRV_FOREACH(j, settings) {
534 r = rename_settings_file(*j, new_name);
535 if (r < 0 && r != -ENOENT)
536 log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j);
537 }
538
539 return 0;
540 }
541
542 static int clone_settings_file(const char *path, const char *new_name) {
543 _cleanup_free_ char *rs = NULL;
544 const char *fn;
545
546 fn = strjoina(new_name, ".nspawn");
547
548 rs = file_in_same_dir(path, fn);
549 if (!rs)
550 return -ENOMEM;
551
552 return copy_file_atomic(path, rs, 0664, false, 0);
553 }
554
555 int image_clone(Image *i, const char *new_name, bool read_only) {
556 _cleanup_release_lock_file_ LockFile name_lock = LOCK_FILE_INIT;
557 _cleanup_strv_free_ char **settings = NULL;
558 const char *new_path;
559 char **j;
560 int r;
561
562 assert(i);
563
564 if (!image_name_is_valid(new_name))
565 return -EINVAL;
566
567 settings = image_settings_path(i);
568 if (!settings)
569 return -ENOMEM;
570
571 /* Make sure nobody takes the new name, between the time we
572 * checked it is currently unused in all search paths, and the
573 * time we take possesion of it */
574 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
575 if (r < 0)
576 return r;
577
578 r = image_find(new_name, NULL);
579 if (r < 0)
580 return r;
581 if (r > 0)
582 return -EEXIST;
583
584 switch (i->type) {
585
586 case IMAGE_SUBVOLUME:
587 case IMAGE_DIRECTORY:
588 new_path = strjoina("/var/lib/machines/", new_name);
589
590 r = btrfs_subvol_snapshot(i->path, new_path, (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE);
591 break;
592
593 case IMAGE_RAW:
594 new_path = strjoina("/var/lib/machines/", new_name, ".raw");
595
596 r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, false, FS_NOCOW_FL);
597 break;
598
599 default:
600 return -EOPNOTSUPP;
601 }
602
603 if (r < 0)
604 return r;
605
606 STRV_FOREACH(j, settings) {
607 r = clone_settings_file(*j, new_name);
608 if (r < 0 && r != -ENOENT)
609 log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j);
610 }
611
612 return 0;
613 }
614
615 int image_read_only(Image *i, bool b) {
616 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
617 int r;
618 assert(i);
619
620 if (path_equal(i->path, "/") ||
621 path_startswith(i->path, "/usr"))
622 return -EROFS;
623
624 /* Make sure we don't interfere with a running nspawn */
625 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
626 if (r < 0)
627 return r;
628
629 switch (i->type) {
630
631 case IMAGE_SUBVOLUME:
632 r = btrfs_subvol_set_read_only(i->path, b);
633 if (r < 0)
634 return r;
635
636 break;
637
638 case IMAGE_DIRECTORY:
639 /* For simple directory trees we cannot use the access
640 mode of the top-level directory, since it has an
641 effect on the container itself. However, we can
642 use the "immutable" flag, to at least make the
643 top-level directory read-only. It's not as good as
644 a read-only subvolume, but at least something, and
645 we can read the value back.*/
646
647 r = chattr_path(i->path, b, FS_IMMUTABLE_FL);
648 if (r < 0)
649 return r;
650
651 break;
652
653 case IMAGE_RAW: {
654 struct stat st;
655
656 if (stat(i->path, &st) < 0)
657 return -errno;
658
659 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
660 return -errno;
661
662 /* If the images is now read-only, it's a good time to
663 * defrag it, given that no write patterns will
664 * fragment it again. */
665 if (b)
666 (void) btrfs_defrag(i->path);
667 break;
668 }
669
670 default:
671 return -EOPNOTSUPP;
672 }
673
674 return 0;
675 }
676
677 int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) {
678 _cleanup_free_ char *p = NULL;
679 LockFile t = LOCK_FILE_INIT;
680 struct stat st;
681 int r;
682
683 assert(path);
684 assert(global);
685 assert(local);
686
687 /* Locks an image path. This actually creates two locks: one
688 * "local" one, next to the image path itself, which might be
689 * shared via NFS. And another "global" one, in /run, that
690 * uses the device/inode number. This has the benefit that we
691 * can even lock a tree that is a mount point, correctly. */
692
693 if (path_equal(path, "/"))
694 return -EBUSY;
695
696 if (!path_is_absolute(path))
697 return -EINVAL;
698
699 if (stat(path, &st) >= 0) {
700 if (asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino) < 0)
701 return -ENOMEM;
702 }
703
704 r = make_lock_file_for(path, operation, &t);
705 if (r < 0)
706 return r;
707
708 if (p) {
709 mkdir_p("/run/systemd/nspawn/locks", 0700);
710
711 r = make_lock_file(p, operation, global);
712 if (r < 0) {
713 release_lock_file(&t);
714 return r;
715 }
716 }
717
718 *local = t;
719 return 0;
720 }
721
722 int image_set_limit(Image *i, uint64_t referenced_max) {
723 assert(i);
724
725 if (path_equal(i->path, "/") ||
726 path_startswith(i->path, "/usr"))
727 return -EROFS;
728
729 if (i->type != IMAGE_SUBVOLUME)
730 return -EOPNOTSUPP;
731
732 return btrfs_quota_limit(i->path, referenced_max);
733 }
734
735 int image_name_lock(const char *name, int operation, LockFile *ret) {
736 const char *p;
737
738 assert(name);
739 assert(ret);
740
741 /* Locks an image name, regardless of the precise path used. */
742
743 if (!image_name_is_valid(name))
744 return -EINVAL;
745
746 if (streq(name, ".host"))
747 return -EBUSY;
748
749 mkdir_p("/run/systemd/nspawn/locks", 0700);
750 p = strjoina("/run/systemd/nspawn/locks/name-", name);
751
752 return make_lock_file(p, operation, ret);
753 }
754
755 bool image_name_is_valid(const char *s) {
756 if (!filename_is_valid(s))
757 return false;
758
759 if (string_has_cc(s, NULL))
760 return false;
761
762 if (!utf8_is_valid(s))
763 return false;
764
765 /* Temporary files for atomically creating new files */
766 if (startswith(s, ".#"))
767 return false;
768
769 return true;
770 }
771
772 static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
773 [IMAGE_DIRECTORY] = "directory",
774 [IMAGE_SUBVOLUME] = "subvolume",
775 [IMAGE_RAW] = "raw",
776 };
777
778 DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);