]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/shared/machine-image.c
util: rework rm_rf() logic
[thirdparty/systemd.git] / src / shared / machine-image.c
CommitLineData
cd61c3bf
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2013 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/statfs.h>
f2068bcc 23#include <linux/fs.h>
ebd93cb6 24#include <fcntl.h>
cd61c3bf 25
cd61c3bf
LP
26#include "utf8.h"
27#include "btrfs-util.h"
ebeccf9e 28#include "path-util.h"
ebd93cb6 29#include "copy.h"
30535c16 30#include "mkdir.h"
c6878637 31#include "rm-rf.h"
003dffde 32#include "machine-image.h"
cd61c3bf 33
c2ce6a3d 34static const char image_search_path[] =
42c6f2c9 35 "/var/lib/machines\0"
c2ce6a3d 36 "/var/lib/container\0"
42c6f2c9
LP
37 "/usr/local/lib/machines\0"
38 "/usr/lib/machines\0";
c2ce6a3d 39
cd61c3bf
LP
40Image *image_unref(Image *i) {
41 if (!i)
42 return NULL;
43
44 free(i->name);
45 free(i->path);
46 free(i);
47 return NULL;
48}
49
c2ce6a3d 50static int image_new(
cd61c3bf 51 ImageType t,
5fc7f358 52 const char *pretty,
cd61c3bf 53 const char *path,
5fc7f358 54 const char *filename,
cd61c3bf 55 bool read_only,
10f9c755 56 usec_t crtime,
cd61c3bf 57 usec_t mtime,
c2ce6a3d 58 Image **ret) {
cd61c3bf
LP
59
60 _cleanup_(image_unrefp) Image *i = NULL;
cd61c3bf 61
cd61c3bf
LP
62 assert(t >= 0);
63 assert(t < _IMAGE_TYPE_MAX);
5fc7f358
LP
64 assert(pretty);
65 assert(filename);
c2ce6a3d 66 assert(ret);
cd61c3bf 67
c2ce6a3d 68 i = new0(Image, 1);
cd61c3bf
LP
69 if (!i)
70 return -ENOMEM;
71
72 i->type = t;
73 i->read_only = read_only;
10f9c755 74 i->crtime = crtime;
cd61c3bf 75 i->mtime = mtime;
c19de711 76 i->usage = i->usage_exclusive = (uint64_t) -1;
b6b18498 77 i->limit = i->limit_exclusive = (uint64_t) -1;
cd61c3bf 78
5fc7f358 79 i->name = strdup(pretty);
cd61c3bf
LP
80 if (!i->name)
81 return -ENOMEM;
82
5fc7f358
LP
83 if (path)
84 i->path = strjoin(path, "/", filename, NULL);
85 else
86 i->path = strdup(filename);
ebeccf9e 87
5fc7f358
LP
88 if (!i->path)
89 return -ENOMEM;
90
91 path_kill_slashes(i->path);
cd61c3bf 92
c2ce6a3d 93 *ret = i;
cd61c3bf 94 i = NULL;
c2ce6a3d 95
cd61c3bf
LP
96 return 0;
97}
98
5fc7f358
LP
99static int image_make(
100 const char *pretty,
101 int dfd,
102 const char *path,
103 const char *filename,
104 Image **ret) {
105
c2ce6a3d 106 struct stat st;
5fc7f358 107 bool read_only;
cd61c3bf
LP
108 int r;
109
5fc7f358 110 assert(filename);
cd61c3bf 111
c2ce6a3d 112 /* We explicitly *do* follow symlinks here, since we want to
5f129649 113 * allow symlinking trees into /var/lib/machines/, and treat
c2ce6a3d 114 * them normally. */
cd61c3bf 115
5fc7f358 116 if (fstatat(dfd, filename, &st, 0) < 0)
c2ce6a3d 117 return -errno;
cd61c3bf 118
5fc7f358
LP
119 read_only =
120 (path && path_startswith(path, "/usr")) ||
08ff5529 121 (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
86e339c8 122
c2ce6a3d 123 if (S_ISDIR(st.st_mode)) {
01b72568
LP
124 _cleanup_close_ int fd = -1;
125 unsigned file_attr = 0;
cd61c3bf 126
c2ce6a3d
LP
127 if (!ret)
128 return 1;
cd61c3bf 129
5fc7f358
LP
130 if (!pretty)
131 pretty = filename;
132
01b72568
LP
133 fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
134 if (fd < 0)
135 return -errno;
136
c2ce6a3d
LP
137 /* btrfs subvolumes have inode 256 */
138 if (st.st_ino == 256) {
c2ce6a3d 139 struct statfs sfs;
cd61c3bf 140
c2ce6a3d
LP
141 if (fstatfs(fd, &sfs) < 0)
142 return -errno;
cd61c3bf 143
c2ce6a3d 144 if (F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC)) {
10f9c755 145 BtrfsSubvolInfo info;
b6b18498 146 BtrfsQuotaInfo quota;
cd61c3bf 147
c2ce6a3d 148 /* It's a btrfs subvolume */
cd61c3bf 149
10f9c755
LP
150 r = btrfs_subvol_get_info_fd(fd, &info);
151 if (r < 0)
152 return r;
c2ce6a3d
LP
153
154 r = image_new(IMAGE_SUBVOLUME,
5fc7f358 155 pretty,
c2ce6a3d 156 path,
5fc7f358
LP
157 filename,
158 info.read_only || read_only,
10f9c755 159 info.otime,
c2ce6a3d 160 0,
c2ce6a3d
LP
161 ret);
162 if (r < 0)
163 return r;
164
b6b18498
LP
165 r = btrfs_subvol_get_quota_fd(fd, &quota);
166 if (r >= 0) {
cb81cd80 167 (*ret)->usage = quota.referenced;
c19de711 168 (*ret)->usage_exclusive = quota.exclusive;
b6b18498 169
cb81cd80 170 (*ret)->limit = quota.referenced_max;
b6b18498
LP
171 (*ret)->limit_exclusive = quota.exclusive_max;
172 }
173
c2ce6a3d 174 return 1;
cd61c3bf 175 }
c2ce6a3d 176 }
cd61c3bf 177
01b72568
LP
178 /* If the IMMUTABLE bit is set, we consider the
179 * directory read-only. Since the ioctl is not
180 * supported everywhere we ignore failures. */
181 (void) read_attr_fd(fd, &file_attr);
cd61c3bf 182
01b72568 183 /* It's just a normal directory. */
c2ce6a3d 184 r = image_new(IMAGE_DIRECTORY,
5fc7f358 185 pretty,
c2ce6a3d 186 path,
5fc7f358 187 filename,
01b72568 188 read_only || (file_attr & FS_IMMUTABLE_FL),
c2ce6a3d
LP
189 0,
190 0,
191 ret);
192 if (r < 0)
193 return r;
cd61c3bf 194
c2ce6a3d 195 return 1;
cd61c3bf 196
aceac2f0 197 } else if (S_ISREG(st.st_mode) && endswith(filename, ".raw")) {
10f9c755 198 usec_t crtime = 0;
cd61c3bf 199
aceac2f0 200 /* It's a RAW disk image */
cd61c3bf 201
c2ce6a3d
LP
202 if (!ret)
203 return 1;
cd61c3bf 204
5fc7f358 205 fd_getcrtime_at(dfd, filename, &crtime, 0);
10f9c755 206
5fc7f358
LP
207 if (!pretty)
208 pretty = strndupa(filename, strlen(filename) - 4);
10f9c755 209
aceac2f0 210 r = image_new(IMAGE_RAW,
5fc7f358 211 pretty,
c2ce6a3d 212 path,
5fc7f358
LP
213 filename,
214 !(st.st_mode & 0222) || read_only,
10f9c755 215 crtime,
c2ce6a3d 216 timespec_load(&st.st_mtim),
c2ce6a3d
LP
217 ret);
218 if (r < 0)
219 return r;
cd61c3bf 220
c19de711 221 (*ret)->usage = (*ret)->usage_exclusive = st.st_blocks * 512;
b6b18498
LP
222 (*ret)->limit = (*ret)->limit_exclusive = st.st_size;
223
c2ce6a3d
LP
224 return 1;
225 }
cd61c3bf 226
c2ce6a3d
LP
227 return 0;
228}
cd61c3bf 229
c2ce6a3d
LP
230int image_find(const char *name, Image **ret) {
231 const char *path;
232 int r;
cd61c3bf 233
c2ce6a3d 234 assert(name);
cd61c3bf 235
c2ce6a3d
LP
236 /* There are no images with invalid names */
237 if (!image_name_is_valid(name))
238 return 0;
cd61c3bf 239
c2ce6a3d
LP
240 NULSTR_FOREACH(path, image_search_path) {
241 _cleanup_closedir_ DIR *d = NULL;
cd61c3bf 242
c2ce6a3d
LP
243 d = opendir(path);
244 if (!d) {
245 if (errno == ENOENT)
246 continue;
cd61c3bf 247
c2ce6a3d
LP
248 return -errno;
249 }
cd61c3bf 250
5fc7f358
LP
251 r = image_make(NULL, dirfd(d), path, name, ret);
252 if (r == 0 || r == -ENOENT) {
aceac2f0 253 _cleanup_free_ char *raw = NULL;
5fc7f358 254
aceac2f0
LP
255 raw = strappend(name, ".raw");
256 if (!raw)
5fc7f358
LP
257 return -ENOMEM;
258
aceac2f0 259 r = image_make(NULL, dirfd(d), path, raw, ret);
5fc7f358
LP
260 if (r == 0 || r == -ENOENT)
261 continue;
262 }
c2ce6a3d
LP
263 if (r < 0)
264 return r;
cd61c3bf 265
c2ce6a3d
LP
266 return 1;
267 }
268
5fc7f358 269 if (streq(name, ".host"))
27c88c4e 270 return image_make(".host", AT_FDCWD, NULL, "/", ret);
5fc7f358 271
c2ce6a3d
LP
272 return 0;
273};
274
275int image_discover(Hashmap *h) {
276 const char *path;
277 int r;
278
279 assert(h);
280
281 NULSTR_FOREACH(path, image_search_path) {
282 _cleanup_closedir_ DIR *d = NULL;
283 struct dirent *de;
284
285 d = opendir(path);
286 if (!d) {
287 if (errno == ENOENT)
a67a4c8c 288 continue;
c2ce6a3d
LP
289
290 return -errno;
291 }
292
293 FOREACH_DIRENT_ALL(de, d, return -errno) {
294 _cleanup_(image_unrefp) Image *image = NULL;
295
296 if (!image_name_is_valid(de->d_name))
297 continue;
298
299 if (hashmap_contains(h, de->d_name))
300 continue;
301
5fc7f358 302 r = image_make(NULL, dirfd(d), path, de->d_name, &image);
c2ce6a3d
LP
303 if (r == 0 || r == -ENOENT)
304 continue;
305 if (r < 0)
306 return r;
307
308 r = hashmap_put(h, image->name, image);
309 if (r < 0)
310 return r;
311
312 image = NULL;
cd61c3bf
LP
313 }
314 }
315
5fc7f358
LP
316 if (!hashmap_contains(h, ".host")) {
317 _cleanup_(image_unrefp) Image *image = NULL;
318
319 r = image_make(".host", AT_FDCWD, NULL, "/", &image);
320 if (r < 0)
321 return r;
322
323 r = hashmap_put(h, image->name, image);
324 if (r < 0)
325 return r;
326
327 image = NULL;
328
329 }
330
cd61c3bf
LP
331 return 0;
332}
333
334void image_hashmap_free(Hashmap *map) {
335 Image *i;
336
337 while ((i = hashmap_steal_first(map)))
338 image_unref(i);
339
340 hashmap_free(map);
341}
342
08682124 343int image_remove(Image *i) {
30535c16
LP
344 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
345 int r;
346
08682124
LP
347 assert(i);
348
349 if (path_equal(i->path, "/") ||
350 path_startswith(i->path, "/usr"))
351 return -EROFS;
352
30535c16
LP
353 /* Make sure we don't interfere with a running nspawn */
354 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
355 if (r < 0)
356 return r;
357
ebd93cb6
LP
358 switch (i->type) {
359
360 case IMAGE_SUBVOLUME:
08682124 361 return btrfs_subvol_remove(i->path);
ebd93cb6
LP
362
363 case IMAGE_DIRECTORY:
01b72568
LP
364 /* Allow deletion of read-only directories */
365 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
366
367 /* fall through */
368
aceac2f0 369 case IMAGE_RAW:
c6878637 370 return rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL);
ebd93cb6
LP
371
372 default:
15411c0c 373 return -EOPNOTSUPP;
ebd93cb6
LP
374 }
375}
376
377int image_rename(Image *i, const char *new_name) {
30535c16 378 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
ebd93cb6 379 _cleanup_free_ char *new_path = NULL, *nn = NULL;
01b72568 380 unsigned file_attr = 0;
ebd93cb6
LP
381 int r;
382
383 assert(i);
384
385 if (!image_name_is_valid(new_name))
386 return -EINVAL;
387
388 if (path_equal(i->path, "/") ||
389 path_startswith(i->path, "/usr"))
390 return -EROFS;
391
30535c16
LP
392 /* Make sure we don't interfere with a running nspawn */
393 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
394 if (r < 0)
395 return r;
396
397 /* Make sure nobody takes the new name, between the time we
398 * checked it is currently unused in all search paths, and the
399 * time we take possesion of it */
400 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
401 if (r < 0)
402 return r;
403
ebd93cb6
LP
404 r = image_find(new_name, NULL);
405 if (r < 0)
406 return r;
407 if (r > 0)
408 return -EEXIST;
409
410 switch (i->type) {
411
ebd93cb6 412 case IMAGE_DIRECTORY:
01b72568
LP
413 /* Turn of the immutable bit while we rename the image, so that we can rename it */
414 (void) read_attr_path(i->path, &file_attr);
415
416 if (file_attr & FS_IMMUTABLE_FL)
417 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
418
419 /* fall through */
420
421 case IMAGE_SUBVOLUME:
ebd93cb6
LP
422 new_path = file_in_same_dir(i->path, new_name);
423 break;
424
aceac2f0 425 case IMAGE_RAW: {
ebd93cb6
LP
426 const char *fn;
427
63c372cb 428 fn = strjoina(new_name, ".raw");
ebd93cb6
LP
429 new_path = file_in_same_dir(i->path, fn);
430 break;
431 }
432
433 default:
15411c0c 434 return -EOPNOTSUPP;
ebd93cb6
LP
435 }
436
437 if (!new_path)
438 return -ENOMEM;
439
440 nn = strdup(new_name);
441 if (!nn)
442 return -ENOMEM;
443
f85ef957
AC
444 r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
445 if (r < 0)
446 return r;
ebd93cb6 447
01b72568
LP
448 /* Restore the immutable bit, if it was set before */
449 if (file_attr & FS_IMMUTABLE_FL)
450 (void) chattr_path(new_path, true, FS_IMMUTABLE_FL);
451
ebd93cb6
LP
452 free(i->path);
453 i->path = new_path;
454 new_path = NULL;
455
456 free(i->name);
457 i->name = nn;
458 nn = NULL;
459
460 return 0;
461}
462
463int image_clone(Image *i, const char *new_name, bool read_only) {
30535c16 464 _cleanup_release_lock_file_ LockFile name_lock = LOCK_FILE_INIT;
ebd93cb6
LP
465 const char *new_path;
466 int r;
467
468 assert(i);
469
470 if (!image_name_is_valid(new_name))
471 return -EINVAL;
472
30535c16
LP
473 /* Make sure nobody takes the new name, between the time we
474 * checked it is currently unused in all search paths, and the
475 * time we take possesion of it */
476 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
477 if (r < 0)
478 return r;
479
ebd93cb6
LP
480 r = image_find(new_name, NULL);
481 if (r < 0)
482 return r;
483 if (r > 0)
484 return -EEXIST;
485
486 switch (i->type) {
487
488 case IMAGE_SUBVOLUME:
489 case IMAGE_DIRECTORY:
63c372cb 490 new_path = strjoina("/var/lib/machines/", new_name);
ebd93cb6
LP
491
492 r = btrfs_subvol_snapshot(i->path, new_path, read_only, true);
493 break;
494
aceac2f0 495 case IMAGE_RAW:
63c372cb 496 new_path = strjoina("/var/lib/machines/", new_name, ".raw");
ebd93cb6 497
f2068bcc 498 r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, false, FS_NOCOW_FL);
ebd93cb6
LP
499 break;
500
501 default:
15411c0c 502 return -EOPNOTSUPP;
ebd93cb6
LP
503 }
504
505 if (r < 0)
506 return r;
507
508 return 0;
509}
510
511int image_read_only(Image *i, bool b) {
30535c16 512 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
ebd93cb6
LP
513 int r;
514 assert(i);
515
516 if (path_equal(i->path, "/") ||
517 path_startswith(i->path, "/usr"))
518 return -EROFS;
519
30535c16
LP
520 /* Make sure we don't interfere with a running nspawn */
521 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
522 if (r < 0)
523 return r;
524
ebd93cb6
LP
525 switch (i->type) {
526
527 case IMAGE_SUBVOLUME:
528 r = btrfs_subvol_set_read_only(i->path, b);
529 if (r < 0)
530 return r;
01b72568
LP
531
532 break;
533
534 case IMAGE_DIRECTORY:
535 /* For simple directory trees we cannot use the access
536 mode of the top-level directory, since it has an
537 effect on the container itself. However, we can
538 use the "immutable" flag, to at least make the
539 top-level directory read-only. It's not as good as
540 a read-only subvolume, but at least something, and
541 we can read the value back.*/
542
543 r = chattr_path(i->path, b, FS_IMMUTABLE_FL);
544 if (r < 0)
545 return r;
546
ebd93cb6
LP
547 break;
548
aceac2f0 549 case IMAGE_RAW: {
ebd93cb6
LP
550 struct stat st;
551
552 if (stat(i->path, &st) < 0)
553 return -errno;
554
555 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
556 return -errno;
f2068bcc
LP
557
558 /* If the images is now read-only, it's a good time to
559 * defrag it, given that no write patterns will
560 * fragment it again. */
561 if (b)
562 (void) btrfs_defrag(i->path);
ebd93cb6
LP
563 break;
564 }
565
ebd93cb6 566 default:
15411c0c 567 return -EOPNOTSUPP;
ebd93cb6
LP
568 }
569
570 return 0;
08682124
LP
571}
572
30535c16
LP
573int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) {
574 _cleanup_free_ char *p = NULL;
575 LockFile t = LOCK_FILE_INIT;
576 struct stat st;
577 int r;
578
579 assert(path);
580 assert(global);
581 assert(local);
582
583 /* Locks an image path. This actually creates two locks: one
584 * "local" one, next to the image path itself, which might be
585 * shared via NFS. And another "global" one, in /run, that
586 * uses the device/inode number. This has the benefit that we
587 * can even lock a tree that is a mount point, correctly. */
588
589 if (path_equal(path, "/"))
590 return -EBUSY;
591
592 if (!path_is_absolute(path))
593 return -EINVAL;
594
595 if (stat(path, &st) >= 0) {
596 if (asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino) < 0)
597 return -ENOMEM;
598 }
599
600 r = make_lock_file_for(path, operation, &t);
601 if (r < 0)
602 return r;
603
604 if (p) {
605 mkdir_p("/run/systemd/nspawn/locks", 0600);
606
607 r = make_lock_file(p, operation, global);
608 if (r < 0) {
609 release_lock_file(&t);
610 return r;
611 }
612 }
613
614 *local = t;
615 return 0;
616}
617
cb81cd80 618int image_set_limit(Image *i, uint64_t referenced_max) {
d6ce17c7
LP
619 assert(i);
620
621 if (path_equal(i->path, "/") ||
622 path_startswith(i->path, "/usr"))
623 return -EROFS;
624
625 if (i->type != IMAGE_SUBVOLUME)
15411c0c 626 return -EOPNOTSUPP;
d6ce17c7 627
cb81cd80 628 return btrfs_quota_limit(i->path, referenced_max);
d6ce17c7
LP
629}
630
30535c16
LP
631int image_name_lock(const char *name, int operation, LockFile *ret) {
632 const char *p;
633
634 assert(name);
635 assert(ret);
636
637 /* Locks an image name, regardless of the precise path used. */
638
639 if (!image_name_is_valid(name))
640 return -EINVAL;
641
642 if (streq(name, ".host"))
643 return -EBUSY;
644
645 mkdir_p("/run/systemd/nspawn/locks", 0600);
63c372cb 646 p = strjoina("/run/systemd/nspawn/locks/name-", name);
30535c16
LP
647
648 return make_lock_file(p, operation, ret);
649}
650
651bool image_name_is_valid(const char *s) {
652 if (!filename_is_valid(s))
653 return false;
654
655 if (string_has_cc(s, NULL))
656 return false;
657
658 if (!utf8_is_valid(s))
659 return false;
660
661 /* Temporary files for atomically creating new files */
662 if (startswith(s, ".#"))
663 return false;
664
665 return true;
666}
667
cd61c3bf
LP
668static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
669 [IMAGE_DIRECTORY] = "directory",
670 [IMAGE_SUBVOLUME] = "subvolume",
aceac2f0 671 [IMAGE_RAW] = "raw",
cd61c3bf
LP
672};
673
674DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);