]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/shared/machine-image.c
import: rename "gpt" disk image type to "raw"
[thirdparty/systemd.git] / src / shared / machine-image.c
CommitLineData
cd61c3bf
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2013 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/statfs.h>
f2068bcc 23#include <linux/fs.h>
ebd93cb6 24#include <fcntl.h>
cd61c3bf
LP
25
26#include "strv.h"
27#include "utf8.h"
28#include "btrfs-util.h"
ebeccf9e 29#include "path-util.h"
ebd93cb6 30#include "copy.h"
30535c16 31#include "mkdir.h"
003dffde 32#include "machine-image.h"
cd61c3bf 33
c2ce6a3d 34static const char image_search_path[] =
42c6f2c9 35 "/var/lib/machines\0"
c2ce6a3d 36 "/var/lib/container\0"
42c6f2c9
LP
37 "/usr/local/lib/machines\0"
38 "/usr/lib/machines\0";
c2ce6a3d 39
cd61c3bf
LP
40Image *image_unref(Image *i) {
41 if (!i)
42 return NULL;
43
44 free(i->name);
45 free(i->path);
46 free(i);
47 return NULL;
48}
49
c2ce6a3d 50static int image_new(
cd61c3bf 51 ImageType t,
5fc7f358 52 const char *pretty,
cd61c3bf 53 const char *path,
5fc7f358 54 const char *filename,
cd61c3bf 55 bool read_only,
10f9c755 56 usec_t crtime,
cd61c3bf 57 usec_t mtime,
c2ce6a3d 58 Image **ret) {
cd61c3bf
LP
59
60 _cleanup_(image_unrefp) Image *i = NULL;
cd61c3bf 61
cd61c3bf
LP
62 assert(t >= 0);
63 assert(t < _IMAGE_TYPE_MAX);
5fc7f358
LP
64 assert(pretty);
65 assert(filename);
c2ce6a3d 66 assert(ret);
cd61c3bf 67
c2ce6a3d 68 i = new0(Image, 1);
cd61c3bf
LP
69 if (!i)
70 return -ENOMEM;
71
72 i->type = t;
73 i->read_only = read_only;
10f9c755 74 i->crtime = crtime;
cd61c3bf 75 i->mtime = mtime;
b6b18498
LP
76 i->size = i->size_exclusive = (uint64_t) -1;
77 i->limit = i->limit_exclusive = (uint64_t) -1;
cd61c3bf 78
5fc7f358 79 i->name = strdup(pretty);
cd61c3bf
LP
80 if (!i->name)
81 return -ENOMEM;
82
5fc7f358
LP
83 if (path)
84 i->path = strjoin(path, "/", filename, NULL);
85 else
86 i->path = strdup(filename);
ebeccf9e 87
5fc7f358
LP
88 if (!i->path)
89 return -ENOMEM;
90
91 path_kill_slashes(i->path);
cd61c3bf 92
c2ce6a3d 93 *ret = i;
cd61c3bf 94 i = NULL;
c2ce6a3d 95
cd61c3bf
LP
96 return 0;
97}
98
5fc7f358
LP
99static int image_make(
100 const char *pretty,
101 int dfd,
102 const char *path,
103 const char *filename,
104 Image **ret) {
105
c2ce6a3d 106 struct stat st;
5fc7f358 107 bool read_only;
cd61c3bf
LP
108 int r;
109
5fc7f358 110 assert(filename);
cd61c3bf 111
c2ce6a3d
LP
112 /* We explicitly *do* follow symlinks here, since we want to
113 * allow symlinking trees into /var/lib/container/, and treat
114 * them normally. */
cd61c3bf 115
5fc7f358 116 if (fstatat(dfd, filename, &st, 0) < 0)
c2ce6a3d 117 return -errno;
cd61c3bf 118
5fc7f358
LP
119 read_only =
120 (path && path_startswith(path, "/usr")) ||
08ff5529 121 (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
86e339c8 122
c2ce6a3d 123 if (S_ISDIR(st.st_mode)) {
01b72568
LP
124 _cleanup_close_ int fd = -1;
125 unsigned file_attr = 0;
cd61c3bf 126
c2ce6a3d
LP
127 if (!ret)
128 return 1;
cd61c3bf 129
5fc7f358
LP
130 if (!pretty)
131 pretty = filename;
132
01b72568
LP
133 fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
134 if (fd < 0)
135 return -errno;
136
c2ce6a3d
LP
137 /* btrfs subvolumes have inode 256 */
138 if (st.st_ino == 256) {
c2ce6a3d 139 struct statfs sfs;
cd61c3bf 140
c2ce6a3d
LP
141 if (fstatfs(fd, &sfs) < 0)
142 return -errno;
cd61c3bf 143
c2ce6a3d 144 if (F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC)) {
10f9c755 145 BtrfsSubvolInfo info;
b6b18498 146 BtrfsQuotaInfo quota;
cd61c3bf 147
c2ce6a3d 148 /* It's a btrfs subvolume */
cd61c3bf 149
10f9c755
LP
150 r = btrfs_subvol_get_info_fd(fd, &info);
151 if (r < 0)
152 return r;
c2ce6a3d
LP
153
154 r = image_new(IMAGE_SUBVOLUME,
5fc7f358 155 pretty,
c2ce6a3d 156 path,
5fc7f358
LP
157 filename,
158 info.read_only || read_only,
10f9c755 159 info.otime,
c2ce6a3d 160 0,
c2ce6a3d
LP
161 ret);
162 if (r < 0)
163 return r;
164
b6b18498
LP
165 r = btrfs_subvol_get_quota_fd(fd, &quota);
166 if (r >= 0) {
167 (*ret)->size = quota.referred;
168 (*ret)->size_exclusive = quota.exclusive;
169
170 (*ret)->limit = quota.referred_max;
171 (*ret)->limit_exclusive = quota.exclusive_max;
172 }
173
c2ce6a3d 174 return 1;
cd61c3bf 175 }
c2ce6a3d 176 }
cd61c3bf 177
01b72568
LP
178 /* If the IMMUTABLE bit is set, we consider the
179 * directory read-only. Since the ioctl is not
180 * supported everywhere we ignore failures. */
181 (void) read_attr_fd(fd, &file_attr);
cd61c3bf 182
01b72568 183 /* It's just a normal directory. */
c2ce6a3d 184 r = image_new(IMAGE_DIRECTORY,
5fc7f358 185 pretty,
c2ce6a3d 186 path,
5fc7f358 187 filename,
01b72568 188 read_only || (file_attr & FS_IMMUTABLE_FL),
c2ce6a3d
LP
189 0,
190 0,
191 ret);
192 if (r < 0)
193 return r;
cd61c3bf 194
c2ce6a3d 195 return 1;
cd61c3bf 196
aceac2f0 197 } else if (S_ISREG(st.st_mode) && endswith(filename, ".raw")) {
10f9c755 198 usec_t crtime = 0;
cd61c3bf 199
aceac2f0 200 /* It's a RAW disk image */
cd61c3bf 201
c2ce6a3d
LP
202 if (!ret)
203 return 1;
cd61c3bf 204
5fc7f358 205 fd_getcrtime_at(dfd, filename, &crtime, 0);
10f9c755 206
5fc7f358
LP
207 if (!pretty)
208 pretty = strndupa(filename, strlen(filename) - 4);
10f9c755 209
aceac2f0 210 r = image_new(IMAGE_RAW,
5fc7f358 211 pretty,
c2ce6a3d 212 path,
5fc7f358
LP
213 filename,
214 !(st.st_mode & 0222) || read_only,
10f9c755 215 crtime,
c2ce6a3d 216 timespec_load(&st.st_mtim),
c2ce6a3d
LP
217 ret);
218 if (r < 0)
219 return r;
cd61c3bf 220
b6b18498
LP
221 (*ret)->size = (*ret)->size_exclusive = st.st_blocks * 512;
222 (*ret)->limit = (*ret)->limit_exclusive = st.st_size;
223
c2ce6a3d
LP
224 return 1;
225 }
cd61c3bf 226
c2ce6a3d
LP
227 return 0;
228}
cd61c3bf 229
c2ce6a3d
LP
230int image_find(const char *name, Image **ret) {
231 const char *path;
232 int r;
cd61c3bf 233
c2ce6a3d 234 assert(name);
cd61c3bf 235
c2ce6a3d
LP
236 /* There are no images with invalid names */
237 if (!image_name_is_valid(name))
238 return 0;
cd61c3bf 239
c2ce6a3d
LP
240 NULSTR_FOREACH(path, image_search_path) {
241 _cleanup_closedir_ DIR *d = NULL;
cd61c3bf 242
c2ce6a3d
LP
243 d = opendir(path);
244 if (!d) {
245 if (errno == ENOENT)
246 continue;
cd61c3bf 247
c2ce6a3d
LP
248 return -errno;
249 }
cd61c3bf 250
5fc7f358
LP
251 r = image_make(NULL, dirfd(d), path, name, ret);
252 if (r == 0 || r == -ENOENT) {
aceac2f0 253 _cleanup_free_ char *raw = NULL;
5fc7f358 254
aceac2f0
LP
255 raw = strappend(name, ".raw");
256 if (!raw)
5fc7f358
LP
257 return -ENOMEM;
258
aceac2f0 259 r = image_make(NULL, dirfd(d), path, raw, ret);
5fc7f358
LP
260 if (r == 0 || r == -ENOENT)
261 continue;
262 }
c2ce6a3d
LP
263 if (r < 0)
264 return r;
cd61c3bf 265
c2ce6a3d
LP
266 return 1;
267 }
268
5fc7f358 269 if (streq(name, ".host"))
27c88c4e 270 return image_make(".host", AT_FDCWD, NULL, "/", ret);
5fc7f358 271
c2ce6a3d
LP
272 return 0;
273};
274
275int image_discover(Hashmap *h) {
276 const char *path;
277 int r;
278
279 assert(h);
280
281 NULSTR_FOREACH(path, image_search_path) {
282 _cleanup_closedir_ DIR *d = NULL;
283 struct dirent *de;
284
285 d = opendir(path);
286 if (!d) {
287 if (errno == ENOENT)
a67a4c8c 288 continue;
c2ce6a3d
LP
289
290 return -errno;
291 }
292
293 FOREACH_DIRENT_ALL(de, d, return -errno) {
294 _cleanup_(image_unrefp) Image *image = NULL;
295
296 if (!image_name_is_valid(de->d_name))
297 continue;
298
299 if (hashmap_contains(h, de->d_name))
300 continue;
301
5fc7f358 302 r = image_make(NULL, dirfd(d), path, de->d_name, &image);
c2ce6a3d
LP
303 if (r == 0 || r == -ENOENT)
304 continue;
305 if (r < 0)
306 return r;
307
308 r = hashmap_put(h, image->name, image);
309 if (r < 0)
310 return r;
311
312 image = NULL;
cd61c3bf
LP
313 }
314 }
315
5fc7f358
LP
316 if (!hashmap_contains(h, ".host")) {
317 _cleanup_(image_unrefp) Image *image = NULL;
318
319 r = image_make(".host", AT_FDCWD, NULL, "/", &image);
320 if (r < 0)
321 return r;
322
323 r = hashmap_put(h, image->name, image);
324 if (r < 0)
325 return r;
326
327 image = NULL;
328
329 }
330
cd61c3bf
LP
331 return 0;
332}
333
334void image_hashmap_free(Hashmap *map) {
335 Image *i;
336
337 while ((i = hashmap_steal_first(map)))
338 image_unref(i);
339
340 hashmap_free(map);
341}
342
08682124 343int image_remove(Image *i) {
30535c16
LP
344 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
345 int r;
346
08682124
LP
347 assert(i);
348
349 if (path_equal(i->path, "/") ||
350 path_startswith(i->path, "/usr"))
351 return -EROFS;
352
30535c16
LP
353 /* Make sure we don't interfere with a running nspawn */
354 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
355 if (r < 0)
356 return r;
357
ebd93cb6
LP
358 switch (i->type) {
359
360 case IMAGE_SUBVOLUME:
08682124 361 return btrfs_subvol_remove(i->path);
ebd93cb6
LP
362
363 case IMAGE_DIRECTORY:
01b72568
LP
364 /* Allow deletion of read-only directories */
365 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
366
367 /* fall through */
368
aceac2f0 369 case IMAGE_RAW:
08682124 370 return rm_rf_dangerous(i->path, false, true, false);
ebd93cb6
LP
371
372 default:
373 return -ENOTSUP;
374 }
375}
376
377int image_rename(Image *i, const char *new_name) {
30535c16 378 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
ebd93cb6 379 _cleanup_free_ char *new_path = NULL, *nn = NULL;
01b72568 380 unsigned file_attr = 0;
ebd93cb6
LP
381 int r;
382
383 assert(i);
384
385 if (!image_name_is_valid(new_name))
386 return -EINVAL;
387
388 if (path_equal(i->path, "/") ||
389 path_startswith(i->path, "/usr"))
390 return -EROFS;
391
30535c16
LP
392 /* Make sure we don't interfere with a running nspawn */
393 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
394 if (r < 0)
395 return r;
396
397 /* Make sure nobody takes the new name, between the time we
398 * checked it is currently unused in all search paths, and the
399 * time we take possesion of it */
400 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
401 if (r < 0)
402 return r;
403
ebd93cb6
LP
404 r = image_find(new_name, NULL);
405 if (r < 0)
406 return r;
407 if (r > 0)
408 return -EEXIST;
409
410 switch (i->type) {
411
ebd93cb6 412 case IMAGE_DIRECTORY:
01b72568
LP
413 /* Turn of the immutable bit while we rename the image, so that we can rename it */
414 (void) read_attr_path(i->path, &file_attr);
415
416 if (file_attr & FS_IMMUTABLE_FL)
417 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
418
419 /* fall through */
420
421 case IMAGE_SUBVOLUME:
ebd93cb6
LP
422 new_path = file_in_same_dir(i->path, new_name);
423 break;
424
aceac2f0 425 case IMAGE_RAW: {
ebd93cb6
LP
426 const char *fn;
427
aceac2f0 428 fn = strappenda(new_name, ".raw");
ebd93cb6
LP
429 new_path = file_in_same_dir(i->path, fn);
430 break;
431 }
432
433 default:
434 return -ENOTSUP;
435 }
436
437 if (!new_path)
438 return -ENOMEM;
439
440 nn = strdup(new_name);
441 if (!nn)
442 return -ENOMEM;
443
444 if (renameat2(AT_FDCWD, i->path, AT_FDCWD, new_path, RENAME_NOREPLACE) < 0)
445 return -errno;
446
01b72568
LP
447 /* Restore the immutable bit, if it was set before */
448 if (file_attr & FS_IMMUTABLE_FL)
449 (void) chattr_path(new_path, true, FS_IMMUTABLE_FL);
450
ebd93cb6
LP
451 free(i->path);
452 i->path = new_path;
453 new_path = NULL;
454
455 free(i->name);
456 i->name = nn;
457 nn = NULL;
458
459 return 0;
460}
461
462int image_clone(Image *i, const char *new_name, bool read_only) {
30535c16 463 _cleanup_release_lock_file_ LockFile name_lock = LOCK_FILE_INIT;
ebd93cb6
LP
464 const char *new_path;
465 int r;
466
467 assert(i);
468
469 if (!image_name_is_valid(new_name))
470 return -EINVAL;
471
30535c16
LP
472 /* Make sure nobody takes the new name, between the time we
473 * checked it is currently unused in all search paths, and the
474 * time we take possesion of it */
475 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
476 if (r < 0)
477 return r;
478
ebd93cb6
LP
479 r = image_find(new_name, NULL);
480 if (r < 0)
481 return r;
482 if (r > 0)
483 return -EEXIST;
484
485 switch (i->type) {
486
487 case IMAGE_SUBVOLUME:
488 case IMAGE_DIRECTORY:
489 new_path = strappenda("/var/lib/container/", new_name);
490
491 r = btrfs_subvol_snapshot(i->path, new_path, read_only, true);
492 break;
493
aceac2f0
LP
494 case IMAGE_RAW:
495 new_path = strappenda("/var/lib/container/", new_name, ".raw");
ebd93cb6 496
f2068bcc 497 r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, false, FS_NOCOW_FL);
ebd93cb6
LP
498 break;
499
500 default:
501 return -ENOTSUP;
502 }
503
504 if (r < 0)
505 return r;
506
507 return 0;
508}
509
510int image_read_only(Image *i, bool b) {
30535c16 511 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
ebd93cb6
LP
512 int r;
513 assert(i);
514
515 if (path_equal(i->path, "/") ||
516 path_startswith(i->path, "/usr"))
517 return -EROFS;
518
30535c16
LP
519 /* Make sure we don't interfere with a running nspawn */
520 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
521 if (r < 0)
522 return r;
523
ebd93cb6
LP
524 switch (i->type) {
525
526 case IMAGE_SUBVOLUME:
527 r = btrfs_subvol_set_read_only(i->path, b);
528 if (r < 0)
529 return r;
01b72568
LP
530
531 break;
532
533 case IMAGE_DIRECTORY:
534 /* For simple directory trees we cannot use the access
535 mode of the top-level directory, since it has an
536 effect on the container itself. However, we can
537 use the "immutable" flag, to at least make the
538 top-level directory read-only. It's not as good as
539 a read-only subvolume, but at least something, and
540 we can read the value back.*/
541
542 r = chattr_path(i->path, b, FS_IMMUTABLE_FL);
543 if (r < 0)
544 return r;
545
ebd93cb6
LP
546 break;
547
aceac2f0 548 case IMAGE_RAW: {
ebd93cb6
LP
549 struct stat st;
550
551 if (stat(i->path, &st) < 0)
552 return -errno;
553
554 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
555 return -errno;
f2068bcc
LP
556
557 /* If the images is now read-only, it's a good time to
558 * defrag it, given that no write patterns will
559 * fragment it again. */
560 if (b)
561 (void) btrfs_defrag(i->path);
ebd93cb6
LP
562 break;
563 }
564
ebd93cb6
LP
565 default:
566 return -ENOTSUP;
567 }
568
569 return 0;
08682124
LP
570}
571
30535c16
LP
572int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) {
573 _cleanup_free_ char *p = NULL;
574 LockFile t = LOCK_FILE_INIT;
575 struct stat st;
576 int r;
577
578 assert(path);
579 assert(global);
580 assert(local);
581
582 /* Locks an image path. This actually creates two locks: one
583 * "local" one, next to the image path itself, which might be
584 * shared via NFS. And another "global" one, in /run, that
585 * uses the device/inode number. This has the benefit that we
586 * can even lock a tree that is a mount point, correctly. */
587
588 if (path_equal(path, "/"))
589 return -EBUSY;
590
591 if (!path_is_absolute(path))
592 return -EINVAL;
593
594 if (stat(path, &st) >= 0) {
595 if (asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino) < 0)
596 return -ENOMEM;
597 }
598
599 r = make_lock_file_for(path, operation, &t);
600 if (r < 0)
601 return r;
602
603 if (p) {
604 mkdir_p("/run/systemd/nspawn/locks", 0600);
605
606 r = make_lock_file(p, operation, global);
607 if (r < 0) {
608 release_lock_file(&t);
609 return r;
610 }
611 }
612
613 *local = t;
614 return 0;
615}
616
617int image_name_lock(const char *name, int operation, LockFile *ret) {
618 const char *p;
619
620 assert(name);
621 assert(ret);
622
623 /* Locks an image name, regardless of the precise path used. */
624
625 if (!image_name_is_valid(name))
626 return -EINVAL;
627
628 if (streq(name, ".host"))
629 return -EBUSY;
630
631 mkdir_p("/run/systemd/nspawn/locks", 0600);
632 p = strappenda("/run/systemd/nspawn/locks/name-", name);
633
634 return make_lock_file(p, operation, ret);
635}
636
637bool image_name_is_valid(const char *s) {
638 if (!filename_is_valid(s))
639 return false;
640
641 if (string_has_cc(s, NULL))
642 return false;
643
644 if (!utf8_is_valid(s))
645 return false;
646
647 /* Temporary files for atomically creating new files */
648 if (startswith(s, ".#"))
649 return false;
650
651 return true;
652}
653
cd61c3bf
LP
654static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
655 [IMAGE_DIRECTORY] = "directory",
656 [IMAGE_SUBVOLUME] = "subvolume",
aceac2f0 657 [IMAGE_RAW] = "raw",
cd61c3bf
LP
658};
659
660DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);