]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/machine-image.c
util: rework rm_rf() logic
[thirdparty/systemd.git] / src / shared / machine-image.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2013 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/statfs.h>
23 #include <linux/fs.h>
24 #include <fcntl.h>
25
26 #include "utf8.h"
27 #include "btrfs-util.h"
28 #include "path-util.h"
29 #include "copy.h"
30 #include "mkdir.h"
31 #include "rm-rf.h"
32 #include "machine-image.h"
33
34 static const char image_search_path[] =
35 "/var/lib/machines\0"
36 "/var/lib/container\0"
37 "/usr/local/lib/machines\0"
38 "/usr/lib/machines\0";
39
40 Image *image_unref(Image *i) {
41 if (!i)
42 return NULL;
43
44 free(i->name);
45 free(i->path);
46 free(i);
47 return NULL;
48 }
49
50 static int image_new(
51 ImageType t,
52 const char *pretty,
53 const char *path,
54 const char *filename,
55 bool read_only,
56 usec_t crtime,
57 usec_t mtime,
58 Image **ret) {
59
60 _cleanup_(image_unrefp) Image *i = NULL;
61
62 assert(t >= 0);
63 assert(t < _IMAGE_TYPE_MAX);
64 assert(pretty);
65 assert(filename);
66 assert(ret);
67
68 i = new0(Image, 1);
69 if (!i)
70 return -ENOMEM;
71
72 i->type = t;
73 i->read_only = read_only;
74 i->crtime = crtime;
75 i->mtime = mtime;
76 i->usage = i->usage_exclusive = (uint64_t) -1;
77 i->limit = i->limit_exclusive = (uint64_t) -1;
78
79 i->name = strdup(pretty);
80 if (!i->name)
81 return -ENOMEM;
82
83 if (path)
84 i->path = strjoin(path, "/", filename, NULL);
85 else
86 i->path = strdup(filename);
87
88 if (!i->path)
89 return -ENOMEM;
90
91 path_kill_slashes(i->path);
92
93 *ret = i;
94 i = NULL;
95
96 return 0;
97 }
98
99 static int image_make(
100 const char *pretty,
101 int dfd,
102 const char *path,
103 const char *filename,
104 Image **ret) {
105
106 struct stat st;
107 bool read_only;
108 int r;
109
110 assert(filename);
111
112 /* We explicitly *do* follow symlinks here, since we want to
113 * allow symlinking trees into /var/lib/machines/, and treat
114 * them normally. */
115
116 if (fstatat(dfd, filename, &st, 0) < 0)
117 return -errno;
118
119 read_only =
120 (path && path_startswith(path, "/usr")) ||
121 (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
122
123 if (S_ISDIR(st.st_mode)) {
124 _cleanup_close_ int fd = -1;
125 unsigned file_attr = 0;
126
127 if (!ret)
128 return 1;
129
130 if (!pretty)
131 pretty = filename;
132
133 fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
134 if (fd < 0)
135 return -errno;
136
137 /* btrfs subvolumes have inode 256 */
138 if (st.st_ino == 256) {
139 struct statfs sfs;
140
141 if (fstatfs(fd, &sfs) < 0)
142 return -errno;
143
144 if (F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC)) {
145 BtrfsSubvolInfo info;
146 BtrfsQuotaInfo quota;
147
148 /* It's a btrfs subvolume */
149
150 r = btrfs_subvol_get_info_fd(fd, &info);
151 if (r < 0)
152 return r;
153
154 r = image_new(IMAGE_SUBVOLUME,
155 pretty,
156 path,
157 filename,
158 info.read_only || read_only,
159 info.otime,
160 0,
161 ret);
162 if (r < 0)
163 return r;
164
165 r = btrfs_subvol_get_quota_fd(fd, &quota);
166 if (r >= 0) {
167 (*ret)->usage = quota.referenced;
168 (*ret)->usage_exclusive = quota.exclusive;
169
170 (*ret)->limit = quota.referenced_max;
171 (*ret)->limit_exclusive = quota.exclusive_max;
172 }
173
174 return 1;
175 }
176 }
177
178 /* If the IMMUTABLE bit is set, we consider the
179 * directory read-only. Since the ioctl is not
180 * supported everywhere we ignore failures. */
181 (void) read_attr_fd(fd, &file_attr);
182
183 /* It's just a normal directory. */
184 r = image_new(IMAGE_DIRECTORY,
185 pretty,
186 path,
187 filename,
188 read_only || (file_attr & FS_IMMUTABLE_FL),
189 0,
190 0,
191 ret);
192 if (r < 0)
193 return r;
194
195 return 1;
196
197 } else if (S_ISREG(st.st_mode) && endswith(filename, ".raw")) {
198 usec_t crtime = 0;
199
200 /* It's a RAW disk image */
201
202 if (!ret)
203 return 1;
204
205 fd_getcrtime_at(dfd, filename, &crtime, 0);
206
207 if (!pretty)
208 pretty = strndupa(filename, strlen(filename) - 4);
209
210 r = image_new(IMAGE_RAW,
211 pretty,
212 path,
213 filename,
214 !(st.st_mode & 0222) || read_only,
215 crtime,
216 timespec_load(&st.st_mtim),
217 ret);
218 if (r < 0)
219 return r;
220
221 (*ret)->usage = (*ret)->usage_exclusive = st.st_blocks * 512;
222 (*ret)->limit = (*ret)->limit_exclusive = st.st_size;
223
224 return 1;
225 }
226
227 return 0;
228 }
229
230 int image_find(const char *name, Image **ret) {
231 const char *path;
232 int r;
233
234 assert(name);
235
236 /* There are no images with invalid names */
237 if (!image_name_is_valid(name))
238 return 0;
239
240 NULSTR_FOREACH(path, image_search_path) {
241 _cleanup_closedir_ DIR *d = NULL;
242
243 d = opendir(path);
244 if (!d) {
245 if (errno == ENOENT)
246 continue;
247
248 return -errno;
249 }
250
251 r = image_make(NULL, dirfd(d), path, name, ret);
252 if (r == 0 || r == -ENOENT) {
253 _cleanup_free_ char *raw = NULL;
254
255 raw = strappend(name, ".raw");
256 if (!raw)
257 return -ENOMEM;
258
259 r = image_make(NULL, dirfd(d), path, raw, ret);
260 if (r == 0 || r == -ENOENT)
261 continue;
262 }
263 if (r < 0)
264 return r;
265
266 return 1;
267 }
268
269 if (streq(name, ".host"))
270 return image_make(".host", AT_FDCWD, NULL, "/", ret);
271
272 return 0;
273 };
274
275 int image_discover(Hashmap *h) {
276 const char *path;
277 int r;
278
279 assert(h);
280
281 NULSTR_FOREACH(path, image_search_path) {
282 _cleanup_closedir_ DIR *d = NULL;
283 struct dirent *de;
284
285 d = opendir(path);
286 if (!d) {
287 if (errno == ENOENT)
288 continue;
289
290 return -errno;
291 }
292
293 FOREACH_DIRENT_ALL(de, d, return -errno) {
294 _cleanup_(image_unrefp) Image *image = NULL;
295
296 if (!image_name_is_valid(de->d_name))
297 continue;
298
299 if (hashmap_contains(h, de->d_name))
300 continue;
301
302 r = image_make(NULL, dirfd(d), path, de->d_name, &image);
303 if (r == 0 || r == -ENOENT)
304 continue;
305 if (r < 0)
306 return r;
307
308 r = hashmap_put(h, image->name, image);
309 if (r < 0)
310 return r;
311
312 image = NULL;
313 }
314 }
315
316 if (!hashmap_contains(h, ".host")) {
317 _cleanup_(image_unrefp) Image *image = NULL;
318
319 r = image_make(".host", AT_FDCWD, NULL, "/", &image);
320 if (r < 0)
321 return r;
322
323 r = hashmap_put(h, image->name, image);
324 if (r < 0)
325 return r;
326
327 image = NULL;
328
329 }
330
331 return 0;
332 }
333
334 void image_hashmap_free(Hashmap *map) {
335 Image *i;
336
337 while ((i = hashmap_steal_first(map)))
338 image_unref(i);
339
340 hashmap_free(map);
341 }
342
343 int image_remove(Image *i) {
344 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
345 int r;
346
347 assert(i);
348
349 if (path_equal(i->path, "/") ||
350 path_startswith(i->path, "/usr"))
351 return -EROFS;
352
353 /* Make sure we don't interfere with a running nspawn */
354 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
355 if (r < 0)
356 return r;
357
358 switch (i->type) {
359
360 case IMAGE_SUBVOLUME:
361 return btrfs_subvol_remove(i->path);
362
363 case IMAGE_DIRECTORY:
364 /* Allow deletion of read-only directories */
365 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
366
367 /* fall through */
368
369 case IMAGE_RAW:
370 return rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL);
371
372 default:
373 return -EOPNOTSUPP;
374 }
375 }
376
377 int image_rename(Image *i, const char *new_name) {
378 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
379 _cleanup_free_ char *new_path = NULL, *nn = NULL;
380 unsigned file_attr = 0;
381 int r;
382
383 assert(i);
384
385 if (!image_name_is_valid(new_name))
386 return -EINVAL;
387
388 if (path_equal(i->path, "/") ||
389 path_startswith(i->path, "/usr"))
390 return -EROFS;
391
392 /* Make sure we don't interfere with a running nspawn */
393 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
394 if (r < 0)
395 return r;
396
397 /* Make sure nobody takes the new name, between the time we
398 * checked it is currently unused in all search paths, and the
399 * time we take possesion of it */
400 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
401 if (r < 0)
402 return r;
403
404 r = image_find(new_name, NULL);
405 if (r < 0)
406 return r;
407 if (r > 0)
408 return -EEXIST;
409
410 switch (i->type) {
411
412 case IMAGE_DIRECTORY:
413 /* Turn of the immutable bit while we rename the image, so that we can rename it */
414 (void) read_attr_path(i->path, &file_attr);
415
416 if (file_attr & FS_IMMUTABLE_FL)
417 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
418
419 /* fall through */
420
421 case IMAGE_SUBVOLUME:
422 new_path = file_in_same_dir(i->path, new_name);
423 break;
424
425 case IMAGE_RAW: {
426 const char *fn;
427
428 fn = strjoina(new_name, ".raw");
429 new_path = file_in_same_dir(i->path, fn);
430 break;
431 }
432
433 default:
434 return -EOPNOTSUPP;
435 }
436
437 if (!new_path)
438 return -ENOMEM;
439
440 nn = strdup(new_name);
441 if (!nn)
442 return -ENOMEM;
443
444 r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
445 if (r < 0)
446 return r;
447
448 /* Restore the immutable bit, if it was set before */
449 if (file_attr & FS_IMMUTABLE_FL)
450 (void) chattr_path(new_path, true, FS_IMMUTABLE_FL);
451
452 free(i->path);
453 i->path = new_path;
454 new_path = NULL;
455
456 free(i->name);
457 i->name = nn;
458 nn = NULL;
459
460 return 0;
461 }
462
463 int image_clone(Image *i, const char *new_name, bool read_only) {
464 _cleanup_release_lock_file_ LockFile name_lock = LOCK_FILE_INIT;
465 const char *new_path;
466 int r;
467
468 assert(i);
469
470 if (!image_name_is_valid(new_name))
471 return -EINVAL;
472
473 /* Make sure nobody takes the new name, between the time we
474 * checked it is currently unused in all search paths, and the
475 * time we take possesion of it */
476 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
477 if (r < 0)
478 return r;
479
480 r = image_find(new_name, NULL);
481 if (r < 0)
482 return r;
483 if (r > 0)
484 return -EEXIST;
485
486 switch (i->type) {
487
488 case IMAGE_SUBVOLUME:
489 case IMAGE_DIRECTORY:
490 new_path = strjoina("/var/lib/machines/", new_name);
491
492 r = btrfs_subvol_snapshot(i->path, new_path, read_only, true);
493 break;
494
495 case IMAGE_RAW:
496 new_path = strjoina("/var/lib/machines/", new_name, ".raw");
497
498 r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, false, FS_NOCOW_FL);
499 break;
500
501 default:
502 return -EOPNOTSUPP;
503 }
504
505 if (r < 0)
506 return r;
507
508 return 0;
509 }
510
511 int image_read_only(Image *i, bool b) {
512 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
513 int r;
514 assert(i);
515
516 if (path_equal(i->path, "/") ||
517 path_startswith(i->path, "/usr"))
518 return -EROFS;
519
520 /* Make sure we don't interfere with a running nspawn */
521 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
522 if (r < 0)
523 return r;
524
525 switch (i->type) {
526
527 case IMAGE_SUBVOLUME:
528 r = btrfs_subvol_set_read_only(i->path, b);
529 if (r < 0)
530 return r;
531
532 break;
533
534 case IMAGE_DIRECTORY:
535 /* For simple directory trees we cannot use the access
536 mode of the top-level directory, since it has an
537 effect on the container itself. However, we can
538 use the "immutable" flag, to at least make the
539 top-level directory read-only. It's not as good as
540 a read-only subvolume, but at least something, and
541 we can read the value back.*/
542
543 r = chattr_path(i->path, b, FS_IMMUTABLE_FL);
544 if (r < 0)
545 return r;
546
547 break;
548
549 case IMAGE_RAW: {
550 struct stat st;
551
552 if (stat(i->path, &st) < 0)
553 return -errno;
554
555 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
556 return -errno;
557
558 /* If the images is now read-only, it's a good time to
559 * defrag it, given that no write patterns will
560 * fragment it again. */
561 if (b)
562 (void) btrfs_defrag(i->path);
563 break;
564 }
565
566 default:
567 return -EOPNOTSUPP;
568 }
569
570 return 0;
571 }
572
573 int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) {
574 _cleanup_free_ char *p = NULL;
575 LockFile t = LOCK_FILE_INIT;
576 struct stat st;
577 int r;
578
579 assert(path);
580 assert(global);
581 assert(local);
582
583 /* Locks an image path. This actually creates two locks: one
584 * "local" one, next to the image path itself, which might be
585 * shared via NFS. And another "global" one, in /run, that
586 * uses the device/inode number. This has the benefit that we
587 * can even lock a tree that is a mount point, correctly. */
588
589 if (path_equal(path, "/"))
590 return -EBUSY;
591
592 if (!path_is_absolute(path))
593 return -EINVAL;
594
595 if (stat(path, &st) >= 0) {
596 if (asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino) < 0)
597 return -ENOMEM;
598 }
599
600 r = make_lock_file_for(path, operation, &t);
601 if (r < 0)
602 return r;
603
604 if (p) {
605 mkdir_p("/run/systemd/nspawn/locks", 0600);
606
607 r = make_lock_file(p, operation, global);
608 if (r < 0) {
609 release_lock_file(&t);
610 return r;
611 }
612 }
613
614 *local = t;
615 return 0;
616 }
617
618 int image_set_limit(Image *i, uint64_t referenced_max) {
619 assert(i);
620
621 if (path_equal(i->path, "/") ||
622 path_startswith(i->path, "/usr"))
623 return -EROFS;
624
625 if (i->type != IMAGE_SUBVOLUME)
626 return -EOPNOTSUPP;
627
628 return btrfs_quota_limit(i->path, referenced_max);
629 }
630
631 int image_name_lock(const char *name, int operation, LockFile *ret) {
632 const char *p;
633
634 assert(name);
635 assert(ret);
636
637 /* Locks an image name, regardless of the precise path used. */
638
639 if (!image_name_is_valid(name))
640 return -EINVAL;
641
642 if (streq(name, ".host"))
643 return -EBUSY;
644
645 mkdir_p("/run/systemd/nspawn/locks", 0600);
646 p = strjoina("/run/systemd/nspawn/locks/name-", name);
647
648 return make_lock_file(p, operation, ret);
649 }
650
651 bool image_name_is_valid(const char *s) {
652 if (!filename_is_valid(s))
653 return false;
654
655 if (string_has_cc(s, NULL))
656 return false;
657
658 if (!utf8_is_valid(s))
659 return false;
660
661 /* Temporary files for atomically creating new files */
662 if (startswith(s, ".#"))
663 return false;
664
665 return true;
666 }
667
668 static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
669 [IMAGE_DIRECTORY] = "directory",
670 [IMAGE_SUBVOLUME] = "subvolume",
671 [IMAGE_RAW] = "raw",
672 };
673
674 DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);