]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/shared/machine-image.c
Merge pull request #1213 from evverx/systemd-notify-log
[thirdparty/systemd.git] / src / shared / machine-image.c
CommitLineData
cd61c3bf
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2013 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/statfs.h>
f2068bcc 23#include <linux/fs.h>
ebd93cb6 24#include <fcntl.h>
cd61c3bf 25
cd61c3bf
LP
26#include "utf8.h"
27#include "btrfs-util.h"
ebeccf9e 28#include "path-util.h"
ebd93cb6 29#include "copy.h"
30535c16 30#include "mkdir.h"
c6878637 31#include "rm-rf.h"
003dffde 32#include "machine-image.h"
cd61c3bf 33
c2ce6a3d 34static const char image_search_path[] =
42c6f2c9 35 "/var/lib/machines\0"
7d105503 36 "/var/lib/container\0" /* legacy */
42c6f2c9
LP
37 "/usr/local/lib/machines\0"
38 "/usr/lib/machines\0";
c2ce6a3d 39
cd61c3bf
LP
40Image *image_unref(Image *i) {
41 if (!i)
42 return NULL;
43
44 free(i->name);
45 free(i->path);
46 free(i);
47 return NULL;
48}
49
c2ce6a3d 50static int image_new(
cd61c3bf 51 ImageType t,
5fc7f358 52 const char *pretty,
cd61c3bf 53 const char *path,
5fc7f358 54 const char *filename,
cd61c3bf 55 bool read_only,
10f9c755 56 usec_t crtime,
cd61c3bf 57 usec_t mtime,
c2ce6a3d 58 Image **ret) {
cd61c3bf
LP
59
60 _cleanup_(image_unrefp) Image *i = NULL;
cd61c3bf 61
cd61c3bf
LP
62 assert(t >= 0);
63 assert(t < _IMAGE_TYPE_MAX);
5fc7f358
LP
64 assert(pretty);
65 assert(filename);
c2ce6a3d 66 assert(ret);
cd61c3bf 67
c2ce6a3d 68 i = new0(Image, 1);
cd61c3bf
LP
69 if (!i)
70 return -ENOMEM;
71
72 i->type = t;
73 i->read_only = read_only;
10f9c755 74 i->crtime = crtime;
cd61c3bf 75 i->mtime = mtime;
c19de711 76 i->usage = i->usage_exclusive = (uint64_t) -1;
b6b18498 77 i->limit = i->limit_exclusive = (uint64_t) -1;
cd61c3bf 78
5fc7f358 79 i->name = strdup(pretty);
cd61c3bf
LP
80 if (!i->name)
81 return -ENOMEM;
82
5fc7f358
LP
83 if (path)
84 i->path = strjoin(path, "/", filename, NULL);
85 else
86 i->path = strdup(filename);
ebeccf9e 87
5fc7f358
LP
88 if (!i->path)
89 return -ENOMEM;
90
91 path_kill_slashes(i->path);
cd61c3bf 92
c2ce6a3d 93 *ret = i;
cd61c3bf 94 i = NULL;
c2ce6a3d 95
cd61c3bf
LP
96 return 0;
97}
98
5fc7f358
LP
99static int image_make(
100 const char *pretty,
101 int dfd,
102 const char *path,
103 const char *filename,
104 Image **ret) {
105
c2ce6a3d 106 struct stat st;
5fc7f358 107 bool read_only;
cd61c3bf
LP
108 int r;
109
5fc7f358 110 assert(filename);
cd61c3bf 111
c2ce6a3d 112 /* We explicitly *do* follow symlinks here, since we want to
5f129649 113 * allow symlinking trees into /var/lib/machines/, and treat
c2ce6a3d 114 * them normally. */
cd61c3bf 115
5fc7f358 116 if (fstatat(dfd, filename, &st, 0) < 0)
c2ce6a3d 117 return -errno;
cd61c3bf 118
5fc7f358
LP
119 read_only =
120 (path && path_startswith(path, "/usr")) ||
08ff5529 121 (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
86e339c8 122
c2ce6a3d 123 if (S_ISDIR(st.st_mode)) {
01b72568
LP
124 _cleanup_close_ int fd = -1;
125 unsigned file_attr = 0;
cd61c3bf 126
c2ce6a3d
LP
127 if (!ret)
128 return 1;
cd61c3bf 129
5fc7f358
LP
130 if (!pretty)
131 pretty = filename;
132
01b72568
LP
133 fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
134 if (fd < 0)
135 return -errno;
136
c2ce6a3d
LP
137 /* btrfs subvolumes have inode 256 */
138 if (st.st_ino == 256) {
cd61c3bf 139
21222ea5
LP
140 r = btrfs_is_filesystem(fd);
141 if (r < 0)
142 return r;
143 if (r) {
10f9c755 144 BtrfsSubvolInfo info;
b6b18498 145 BtrfsQuotaInfo quota;
cd61c3bf 146
c2ce6a3d 147 /* It's a btrfs subvolume */
cd61c3bf 148
10f9c755
LP
149 r = btrfs_subvol_get_info_fd(fd, &info);
150 if (r < 0)
151 return r;
c2ce6a3d
LP
152
153 r = image_new(IMAGE_SUBVOLUME,
5fc7f358 154 pretty,
c2ce6a3d 155 path,
5fc7f358
LP
156 filename,
157 info.read_only || read_only,
10f9c755 158 info.otime,
c2ce6a3d 159 0,
c2ce6a3d
LP
160 ret);
161 if (r < 0)
162 return r;
163
b6b18498
LP
164 r = btrfs_subvol_get_quota_fd(fd, &quota);
165 if (r >= 0) {
cb81cd80 166 (*ret)->usage = quota.referenced;
c19de711 167 (*ret)->usage_exclusive = quota.exclusive;
b6b18498 168
cb81cd80 169 (*ret)->limit = quota.referenced_max;
b6b18498
LP
170 (*ret)->limit_exclusive = quota.exclusive_max;
171 }
172
c2ce6a3d 173 return 1;
cd61c3bf 174 }
c2ce6a3d 175 }
cd61c3bf 176
01b72568
LP
177 /* If the IMMUTABLE bit is set, we consider the
178 * directory read-only. Since the ioctl is not
179 * supported everywhere we ignore failures. */
180 (void) read_attr_fd(fd, &file_attr);
cd61c3bf 181
01b72568 182 /* It's just a normal directory. */
c2ce6a3d 183 r = image_new(IMAGE_DIRECTORY,
5fc7f358 184 pretty,
c2ce6a3d 185 path,
5fc7f358 186 filename,
01b72568 187 read_only || (file_attr & FS_IMMUTABLE_FL),
c2ce6a3d
LP
188 0,
189 0,
190 ret);
191 if (r < 0)
192 return r;
cd61c3bf 193
c2ce6a3d 194 return 1;
cd61c3bf 195
aceac2f0 196 } else if (S_ISREG(st.st_mode) && endswith(filename, ".raw")) {
10f9c755 197 usec_t crtime = 0;
cd61c3bf 198
aceac2f0 199 /* It's a RAW disk image */
cd61c3bf 200
c2ce6a3d
LP
201 if (!ret)
202 return 1;
cd61c3bf 203
5fc7f358 204 fd_getcrtime_at(dfd, filename, &crtime, 0);
10f9c755 205
5fc7f358
LP
206 if (!pretty)
207 pretty = strndupa(filename, strlen(filename) - 4);
10f9c755 208
aceac2f0 209 r = image_new(IMAGE_RAW,
5fc7f358 210 pretty,
c2ce6a3d 211 path,
5fc7f358
LP
212 filename,
213 !(st.st_mode & 0222) || read_only,
10f9c755 214 crtime,
c2ce6a3d 215 timespec_load(&st.st_mtim),
c2ce6a3d
LP
216 ret);
217 if (r < 0)
218 return r;
cd61c3bf 219
c19de711 220 (*ret)->usage = (*ret)->usage_exclusive = st.st_blocks * 512;
b6b18498
LP
221 (*ret)->limit = (*ret)->limit_exclusive = st.st_size;
222
c2ce6a3d
LP
223 return 1;
224 }
cd61c3bf 225
c2ce6a3d
LP
226 return 0;
227}
cd61c3bf 228
c2ce6a3d
LP
229int image_find(const char *name, Image **ret) {
230 const char *path;
231 int r;
cd61c3bf 232
c2ce6a3d 233 assert(name);
cd61c3bf 234
c2ce6a3d
LP
235 /* There are no images with invalid names */
236 if (!image_name_is_valid(name))
237 return 0;
cd61c3bf 238
c2ce6a3d
LP
239 NULSTR_FOREACH(path, image_search_path) {
240 _cleanup_closedir_ DIR *d = NULL;
cd61c3bf 241
c2ce6a3d
LP
242 d = opendir(path);
243 if (!d) {
244 if (errno == ENOENT)
245 continue;
cd61c3bf 246
c2ce6a3d
LP
247 return -errno;
248 }
cd61c3bf 249
5fc7f358
LP
250 r = image_make(NULL, dirfd(d), path, name, ret);
251 if (r == 0 || r == -ENOENT) {
aceac2f0 252 _cleanup_free_ char *raw = NULL;
5fc7f358 253
aceac2f0
LP
254 raw = strappend(name, ".raw");
255 if (!raw)
5fc7f358
LP
256 return -ENOMEM;
257
aceac2f0 258 r = image_make(NULL, dirfd(d), path, raw, ret);
5fc7f358
LP
259 if (r == 0 || r == -ENOENT)
260 continue;
261 }
c2ce6a3d
LP
262 if (r < 0)
263 return r;
cd61c3bf 264
c2ce6a3d
LP
265 return 1;
266 }
267
5fc7f358 268 if (streq(name, ".host"))
27c88c4e 269 return image_make(".host", AT_FDCWD, NULL, "/", ret);
5fc7f358 270
c2ce6a3d
LP
271 return 0;
272};
273
274int image_discover(Hashmap *h) {
275 const char *path;
276 int r;
277
278 assert(h);
279
280 NULSTR_FOREACH(path, image_search_path) {
281 _cleanup_closedir_ DIR *d = NULL;
282 struct dirent *de;
283
284 d = opendir(path);
285 if (!d) {
286 if (errno == ENOENT)
a67a4c8c 287 continue;
c2ce6a3d
LP
288
289 return -errno;
290 }
291
292 FOREACH_DIRENT_ALL(de, d, return -errno) {
293 _cleanup_(image_unrefp) Image *image = NULL;
294
295 if (!image_name_is_valid(de->d_name))
296 continue;
297
298 if (hashmap_contains(h, de->d_name))
299 continue;
300
5fc7f358 301 r = image_make(NULL, dirfd(d), path, de->d_name, &image);
c2ce6a3d
LP
302 if (r == 0 || r == -ENOENT)
303 continue;
304 if (r < 0)
305 return r;
306
307 r = hashmap_put(h, image->name, image);
308 if (r < 0)
309 return r;
310
311 image = NULL;
cd61c3bf
LP
312 }
313 }
314
5fc7f358
LP
315 if (!hashmap_contains(h, ".host")) {
316 _cleanup_(image_unrefp) Image *image = NULL;
317
318 r = image_make(".host", AT_FDCWD, NULL, "/", &image);
319 if (r < 0)
320 return r;
321
322 r = hashmap_put(h, image->name, image);
323 if (r < 0)
324 return r;
325
326 image = NULL;
327
328 }
329
cd61c3bf
LP
330 return 0;
331}
332
333void image_hashmap_free(Hashmap *map) {
334 Image *i;
335
336 while ((i = hashmap_steal_first(map)))
337 image_unref(i);
338
339 hashmap_free(map);
340}
341
08682124 342int image_remove(Image *i) {
30535c16
LP
343 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
344 int r;
345
08682124
LP
346 assert(i);
347
348 if (path_equal(i->path, "/") ||
349 path_startswith(i->path, "/usr"))
350 return -EROFS;
351
30535c16
LP
352 /* Make sure we don't interfere with a running nspawn */
353 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
354 if (r < 0)
355 return r;
356
ebd93cb6
LP
357 switch (i->type) {
358
359 case IMAGE_SUBVOLUME:
d9e2daaf 360 return btrfs_subvol_remove(i->path, true);
ebd93cb6
LP
361
362 case IMAGE_DIRECTORY:
01b72568
LP
363 /* Allow deletion of read-only directories */
364 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
41d1ed05 365 return rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
01b72568 366
aceac2f0 367 case IMAGE_RAW:
41d1ed05
LP
368 if (unlink(i->path) < 0)
369 return -errno;
370
371 return 0;
ebd93cb6
LP
372
373 default:
15411c0c 374 return -EOPNOTSUPP;
ebd93cb6
LP
375 }
376}
377
378int image_rename(Image *i, const char *new_name) {
30535c16 379 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
ebd93cb6 380 _cleanup_free_ char *new_path = NULL, *nn = NULL;
01b72568 381 unsigned file_attr = 0;
ebd93cb6
LP
382 int r;
383
384 assert(i);
385
386 if (!image_name_is_valid(new_name))
387 return -EINVAL;
388
389 if (path_equal(i->path, "/") ||
390 path_startswith(i->path, "/usr"))
391 return -EROFS;
392
30535c16
LP
393 /* Make sure we don't interfere with a running nspawn */
394 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
395 if (r < 0)
396 return r;
397
398 /* Make sure nobody takes the new name, between the time we
399 * checked it is currently unused in all search paths, and the
400 * time we take possesion of it */
401 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
402 if (r < 0)
403 return r;
404
ebd93cb6
LP
405 r = image_find(new_name, NULL);
406 if (r < 0)
407 return r;
408 if (r > 0)
409 return -EEXIST;
410
411 switch (i->type) {
412
ebd93cb6 413 case IMAGE_DIRECTORY:
01b72568
LP
414 /* Turn of the immutable bit while we rename the image, so that we can rename it */
415 (void) read_attr_path(i->path, &file_attr);
416
417 if (file_attr & FS_IMMUTABLE_FL)
418 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
419
420 /* fall through */
421
422 case IMAGE_SUBVOLUME:
ebd93cb6
LP
423 new_path = file_in_same_dir(i->path, new_name);
424 break;
425
aceac2f0 426 case IMAGE_RAW: {
ebd93cb6
LP
427 const char *fn;
428
63c372cb 429 fn = strjoina(new_name, ".raw");
ebd93cb6
LP
430 new_path = file_in_same_dir(i->path, fn);
431 break;
432 }
433
434 default:
15411c0c 435 return -EOPNOTSUPP;
ebd93cb6
LP
436 }
437
438 if (!new_path)
439 return -ENOMEM;
440
441 nn = strdup(new_name);
442 if (!nn)
443 return -ENOMEM;
444
f85ef957
AC
445 r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
446 if (r < 0)
447 return r;
ebd93cb6 448
01b72568
LP
449 /* Restore the immutable bit, if it was set before */
450 if (file_attr & FS_IMMUTABLE_FL)
451 (void) chattr_path(new_path, true, FS_IMMUTABLE_FL);
452
ebd93cb6
LP
453 free(i->path);
454 i->path = new_path;
455 new_path = NULL;
456
457 free(i->name);
458 i->name = nn;
459 nn = NULL;
460
461 return 0;
462}
463
464int image_clone(Image *i, const char *new_name, bool read_only) {
30535c16 465 _cleanup_release_lock_file_ LockFile name_lock = LOCK_FILE_INIT;
ebd93cb6
LP
466 const char *new_path;
467 int r;
468
469 assert(i);
470
471 if (!image_name_is_valid(new_name))
472 return -EINVAL;
473
30535c16
LP
474 /* Make sure nobody takes the new name, between the time we
475 * checked it is currently unused in all search paths, and the
476 * time we take possesion of it */
477 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
478 if (r < 0)
479 return r;
480
ebd93cb6
LP
481 r = image_find(new_name, NULL);
482 if (r < 0)
483 return r;
484 if (r > 0)
485 return -EEXIST;
486
487 switch (i->type) {
488
489 case IMAGE_SUBVOLUME:
490 case IMAGE_DIRECTORY:
63c372cb 491 new_path = strjoina("/var/lib/machines/", new_name);
ebd93cb6 492
f70a17f8 493 r = btrfs_subvol_snapshot(i->path, new_path, (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE);
ebd93cb6
LP
494 break;
495
aceac2f0 496 case IMAGE_RAW:
63c372cb 497 new_path = strjoina("/var/lib/machines/", new_name, ".raw");
ebd93cb6 498
f2068bcc 499 r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, false, FS_NOCOW_FL);
ebd93cb6
LP
500 break;
501
502 default:
15411c0c 503 return -EOPNOTSUPP;
ebd93cb6
LP
504 }
505
506 if (r < 0)
507 return r;
508
509 return 0;
510}
511
512int image_read_only(Image *i, bool b) {
30535c16 513 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
ebd93cb6
LP
514 int r;
515 assert(i);
516
517 if (path_equal(i->path, "/") ||
518 path_startswith(i->path, "/usr"))
519 return -EROFS;
520
30535c16
LP
521 /* Make sure we don't interfere with a running nspawn */
522 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
523 if (r < 0)
524 return r;
525
ebd93cb6
LP
526 switch (i->type) {
527
528 case IMAGE_SUBVOLUME:
529 r = btrfs_subvol_set_read_only(i->path, b);
530 if (r < 0)
531 return r;
01b72568
LP
532
533 break;
534
535 case IMAGE_DIRECTORY:
536 /* For simple directory trees we cannot use the access
537 mode of the top-level directory, since it has an
538 effect on the container itself. However, we can
539 use the "immutable" flag, to at least make the
540 top-level directory read-only. It's not as good as
541 a read-only subvolume, but at least something, and
542 we can read the value back.*/
543
544 r = chattr_path(i->path, b, FS_IMMUTABLE_FL);
545 if (r < 0)
546 return r;
547
ebd93cb6
LP
548 break;
549
aceac2f0 550 case IMAGE_RAW: {
ebd93cb6
LP
551 struct stat st;
552
553 if (stat(i->path, &st) < 0)
554 return -errno;
555
556 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
557 return -errno;
f2068bcc
LP
558
559 /* If the images is now read-only, it's a good time to
560 * defrag it, given that no write patterns will
561 * fragment it again. */
562 if (b)
563 (void) btrfs_defrag(i->path);
ebd93cb6
LP
564 break;
565 }
566
ebd93cb6 567 default:
15411c0c 568 return -EOPNOTSUPP;
ebd93cb6
LP
569 }
570
571 return 0;
08682124
LP
572}
573
30535c16
LP
574int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) {
575 _cleanup_free_ char *p = NULL;
576 LockFile t = LOCK_FILE_INIT;
577 struct stat st;
578 int r;
579
580 assert(path);
581 assert(global);
582 assert(local);
583
584 /* Locks an image path. This actually creates two locks: one
585 * "local" one, next to the image path itself, which might be
586 * shared via NFS. And another "global" one, in /run, that
587 * uses the device/inode number. This has the benefit that we
588 * can even lock a tree that is a mount point, correctly. */
589
590 if (path_equal(path, "/"))
591 return -EBUSY;
592
593 if (!path_is_absolute(path))
594 return -EINVAL;
595
596 if (stat(path, &st) >= 0) {
597 if (asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino) < 0)
598 return -ENOMEM;
599 }
600
601 r = make_lock_file_for(path, operation, &t);
602 if (r < 0)
603 return r;
604
605 if (p) {
7e7cddb2 606 mkdir_p("/run/systemd/nspawn/locks", 0700);
30535c16
LP
607
608 r = make_lock_file(p, operation, global);
609 if (r < 0) {
610 release_lock_file(&t);
611 return r;
612 }
613 }
614
615 *local = t;
616 return 0;
617}
618
cb81cd80 619int image_set_limit(Image *i, uint64_t referenced_max) {
d6ce17c7
LP
620 assert(i);
621
622 if (path_equal(i->path, "/") ||
623 path_startswith(i->path, "/usr"))
624 return -EROFS;
625
626 if (i->type != IMAGE_SUBVOLUME)
15411c0c 627 return -EOPNOTSUPP;
d6ce17c7 628
cb81cd80 629 return btrfs_quota_limit(i->path, referenced_max);
d6ce17c7
LP
630}
631
30535c16
LP
632int image_name_lock(const char *name, int operation, LockFile *ret) {
633 const char *p;
634
635 assert(name);
636 assert(ret);
637
638 /* Locks an image name, regardless of the precise path used. */
639
640 if (!image_name_is_valid(name))
641 return -EINVAL;
642
643 if (streq(name, ".host"))
644 return -EBUSY;
645
7e7cddb2 646 mkdir_p("/run/systemd/nspawn/locks", 0700);
63c372cb 647 p = strjoina("/run/systemd/nspawn/locks/name-", name);
30535c16
LP
648
649 return make_lock_file(p, operation, ret);
650}
651
652bool image_name_is_valid(const char *s) {
653 if (!filename_is_valid(s))
654 return false;
655
656 if (string_has_cc(s, NULL))
657 return false;
658
659 if (!utf8_is_valid(s))
660 return false;
661
662 /* Temporary files for atomically creating new files */
663 if (startswith(s, ".#"))
664 return false;
665
666 return true;
667}
668
cd61c3bf
LP
669static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
670 [IMAGE_DIRECTORY] = "directory",
671 [IMAGE_SUBVOLUME] = "subvolume",
aceac2f0 672 [IMAGE_RAW] = "raw",
cd61c3bf
LP
673};
674
675DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);