]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/shared/machine-image.c
conf-files: beef up conf-files.[ch] a bit
[thirdparty/systemd.git] / src / shared / machine-image.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
cd61c3bf
LP
2/***
3 This file is part of systemd.
4
5 Copyright 2013 Lennart Poettering
cd61c3bf
LP
6***/
7
a8fbdf54
TA
8#include <dirent.h>
9#include <errno.h>
ebd93cb6 10#include <fcntl.h>
a8fbdf54
TA
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
e306723e 14#include <sys/file.h>
a8fbdf54
TA
15#include <sys/stat.h>
16#include <unistd.h>
8e0b6570 17#include <linux/fs.h>
546dbec5 18
b5efdb8a 19#include "alloc-util.h"
cd61c3bf 20#include "btrfs-util.h"
c8b3094d 21#include "chattr-util.h"
ebd93cb6 22#include "copy.h"
a0956174 23#include "dirent-util.h"
c7664c07 24#include "dissect-image.h"
b6e953f2 25#include "env-util.h"
3ffd4af2 26#include "fd-util.h"
c7664c07 27#include "fileio.h"
f4f15635 28#include "fs-util.h"
a8fbdf54 29#include "hashmap.h"
c7664c07
LP
30#include "hostname-util.h"
31#include "id128-util.h"
a8fbdf54
TA
32#include "lockfile-util.h"
33#include "log.h"
c7664c07 34#include "loop-util.h"
3ffd4af2 35#include "machine-image.h"
546dbec5 36#include "macro.h"
30535c16 37#include "mkdir.h"
d58ad743 38#include "os-util.h"
8e0b6570 39#include "path-util.h"
c6878637 40#include "rm-rf.h"
8b43440b 41#include "string-table.h"
07630cea 42#include "string-util.h"
8e0b6570 43#include "strv.h"
a8fbdf54 44#include "time-util.h"
8e0b6570 45#include "utf8.h"
a8fbdf54 46#include "util.h"
89a5a90c 47#include "xattr-util.h"
cd61c3bf 48
c2ce6a3d 49static const char image_search_path[] =
42c6f2c9 50 "/var/lib/machines\0"
7d105503 51 "/var/lib/container\0" /* legacy */
42c6f2c9
LP
52 "/usr/local/lib/machines\0"
53 "/usr/lib/machines\0";
c2ce6a3d 54
cd61c3bf
LP
55Image *image_unref(Image *i) {
56 if (!i)
57 return NULL;
58
59 free(i->name);
60 free(i->path);
c7664c07
LP
61
62 free(i->hostname);
63 strv_free(i->machine_info);
64 strv_free(i->os_release);
65
6b430fdb 66 return mfree(i);
cd61c3bf
LP
67}
68
8e0b6570
LP
69static char **image_settings_path(Image *image) {
70 _cleanup_strv_free_ char **l = NULL;
8e0b6570
LP
71 const char *fn, *s;
72 unsigned i = 0;
73
74 assert(image);
75
76 l = new0(char*, 4);
77 if (!l)
78 return NULL;
79
80 fn = strjoina(image->name, ".nspawn");
81
82 FOREACH_STRING(s, "/etc/systemd/nspawn/", "/run/systemd/nspawn/") {
83 l[i] = strappend(s, fn);
84 if (!l[i])
85 return NULL;
86
87 i++;
88 }
89
90 l[i] = file_in_same_dir(image->path, fn);
91 if (!l[i])
92 return NULL;
93
ae2a15bc 94 return TAKE_PTR(l);
8e0b6570
LP
95}
96
bafbac4e
LP
97static char *image_roothash_path(Image *image) {
98 const char *fn;
99
100 assert(image);
101
102 fn = strjoina(image->name, ".roothash");
103
104 return file_in_same_dir(image->path, fn);
105}
106
c2ce6a3d 107static int image_new(
cd61c3bf 108 ImageType t,
5fc7f358 109 const char *pretty,
cd61c3bf 110 const char *path,
5fc7f358 111 const char *filename,
cd61c3bf 112 bool read_only,
10f9c755 113 usec_t crtime,
cd61c3bf 114 usec_t mtime,
c2ce6a3d 115 Image **ret) {
cd61c3bf
LP
116
117 _cleanup_(image_unrefp) Image *i = NULL;
cd61c3bf 118
cd61c3bf
LP
119 assert(t >= 0);
120 assert(t < _IMAGE_TYPE_MAX);
5fc7f358
LP
121 assert(pretty);
122 assert(filename);
c2ce6a3d 123 assert(ret);
cd61c3bf 124
c2ce6a3d 125 i = new0(Image, 1);
cd61c3bf
LP
126 if (!i)
127 return -ENOMEM;
128
129 i->type = t;
130 i->read_only = read_only;
10f9c755 131 i->crtime = crtime;
cd61c3bf 132 i->mtime = mtime;
c19de711 133 i->usage = i->usage_exclusive = (uint64_t) -1;
b6b18498 134 i->limit = i->limit_exclusive = (uint64_t) -1;
cd61c3bf 135
5fc7f358 136 i->name = strdup(pretty);
cd61c3bf
LP
137 if (!i->name)
138 return -ENOMEM;
139
5fc7f358 140 if (path)
605405c6 141 i->path = strjoin(path, "/", filename);
5fc7f358
LP
142 else
143 i->path = strdup(filename);
ebeccf9e 144
5fc7f358
LP
145 if (!i->path)
146 return -ENOMEM;
147
148 path_kill_slashes(i->path);
cd61c3bf 149
1cc6c93a 150 *ret = TAKE_PTR(i);
c2ce6a3d 151
cd61c3bf
LP
152 return 0;
153}
154
5fc7f358
LP
155static int image_make(
156 const char *pretty,
157 int dfd,
158 const char *path,
159 const char *filename,
160 Image **ret) {
161
c2ce6a3d 162 struct stat st;
5fc7f358 163 bool read_only;
cd61c3bf
LP
164 int r;
165
5fc7f358 166 assert(filename);
cd61c3bf 167
eb38edce
LP
168 /* We explicitly *do* follow symlinks here, since we want to allow symlinking trees, raw files and block
169 * devices into /var/lib/machines/, and treat them normally. */
cd61c3bf 170
5fc7f358 171 if (fstatat(dfd, filename, &st, 0) < 0)
c2ce6a3d 172 return -errno;
cd61c3bf 173
5fc7f358
LP
174 read_only =
175 (path && path_startswith(path, "/usr")) ||
08ff5529 176 (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
86e339c8 177
c2ce6a3d 178 if (S_ISDIR(st.st_mode)) {
01b72568
LP
179 _cleanup_close_ int fd = -1;
180 unsigned file_attr = 0;
cd61c3bf 181
c2ce6a3d
LP
182 if (!ret)
183 return 1;
cd61c3bf 184
5fc7f358
LP
185 if (!pretty)
186 pretty = filename;
187
01b72568
LP
188 fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
189 if (fd < 0)
190 return -errno;
191
c2ce6a3d
LP
192 /* btrfs subvolumes have inode 256 */
193 if (st.st_ino == 256) {
cd61c3bf 194
21222ea5
LP
195 r = btrfs_is_filesystem(fd);
196 if (r < 0)
197 return r;
198 if (r) {
10f9c755 199 BtrfsSubvolInfo info;
cd61c3bf 200
c2ce6a3d 201 /* It's a btrfs subvolume */
cd61c3bf 202
5bcd08db 203 r = btrfs_subvol_get_info_fd(fd, 0, &info);
10f9c755
LP
204 if (r < 0)
205 return r;
c2ce6a3d
LP
206
207 r = image_new(IMAGE_SUBVOLUME,
5fc7f358 208 pretty,
c2ce6a3d 209 path,
5fc7f358
LP
210 filename,
211 info.read_only || read_only,
10f9c755 212 info.otime,
c2ce6a3d 213 0,
c2ce6a3d
LP
214 ret);
215 if (r < 0)
216 return r;
217
5bcd08db
LP
218 if (btrfs_quota_scan_ongoing(fd) == 0) {
219 BtrfsQuotaInfo quota;
b6b18498 220
5bcd08db
LP
221 r = btrfs_subvol_get_subtree_quota_fd(fd, 0, &quota);
222 if (r >= 0) {
223 (*ret)->usage = quota.referenced;
224 (*ret)->usage_exclusive = quota.exclusive;
225
226 (*ret)->limit = quota.referenced_max;
227 (*ret)->limit_exclusive = quota.exclusive_max;
228 }
b6b18498
LP
229 }
230
c2ce6a3d 231 return 1;
cd61c3bf 232 }
c2ce6a3d 233 }
cd61c3bf 234
01b72568
LP
235 /* If the IMMUTABLE bit is set, we consider the
236 * directory read-only. Since the ioctl is not
237 * supported everywhere we ignore failures. */
238 (void) read_attr_fd(fd, &file_attr);
cd61c3bf 239
01b72568 240 /* It's just a normal directory. */
c2ce6a3d 241 r = image_new(IMAGE_DIRECTORY,
5fc7f358 242 pretty,
c2ce6a3d 243 path,
5fc7f358 244 filename,
01b72568 245 read_only || (file_attr & FS_IMMUTABLE_FL),
c2ce6a3d
LP
246 0,
247 0,
248 ret);
249 if (r < 0)
250 return r;
cd61c3bf 251
c2ce6a3d 252 return 1;
cd61c3bf 253
aceac2f0 254 } else if (S_ISREG(st.st_mode) && endswith(filename, ".raw")) {
10f9c755 255 usec_t crtime = 0;
cd61c3bf 256
aceac2f0 257 /* It's a RAW disk image */
cd61c3bf 258
c2ce6a3d
LP
259 if (!ret)
260 return 1;
cd61c3bf 261
5fc7f358 262 fd_getcrtime_at(dfd, filename, &crtime, 0);
10f9c755 263
5fc7f358
LP
264 if (!pretty)
265 pretty = strndupa(filename, strlen(filename) - 4);
10f9c755 266
aceac2f0 267 r = image_new(IMAGE_RAW,
5fc7f358 268 pretty,
c2ce6a3d 269 path,
5fc7f358
LP
270 filename,
271 !(st.st_mode & 0222) || read_only,
10f9c755 272 crtime,
c2ce6a3d 273 timespec_load(&st.st_mtim),
c2ce6a3d
LP
274 ret);
275 if (r < 0)
276 return r;
cd61c3bf 277
c19de711 278 (*ret)->usage = (*ret)->usage_exclusive = st.st_blocks * 512;
b6b18498
LP
279 (*ret)->limit = (*ret)->limit_exclusive = st.st_size;
280
c2ce6a3d 281 return 1;
eb38edce
LP
282
283 } else if (S_ISBLK(st.st_mode)) {
284 _cleanup_close_ int block_fd = -1;
285 uint64_t size = UINT64_MAX;
286
287 /* A block device */
288
289 if (!ret)
290 return 1;
291
292 if (!pretty)
293 pretty = filename;
294
295 block_fd = openat(dfd, filename, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY);
296 if (block_fd < 0)
297 log_debug_errno(errno, "Failed to open block device %s/%s, ignoring: %m", path, filename);
298 else {
299 if (fstat(block_fd, &st) < 0)
300 return -errno;
301 if (!S_ISBLK(st.st_mode)) /* Verify that what we opened is actually what we think it is */
302 return -ENOTTY;
303
304 if (!read_only) {
305 int state = 0;
306
307 if (ioctl(block_fd, BLKROGET, &state) < 0)
308 log_debug_errno(errno, "Failed to issue BLKROGET on device %s/%s, ignoring: %m", path, filename);
309 else if (state)
310 read_only = true;
311 }
312
313 if (ioctl(block_fd, BLKGETSIZE64, &size) < 0)
314 log_debug_errno(errno, "Failed to issue BLKFLSBUF on device %s/%s, ignoring: %m", path, filename);
315
316 block_fd = safe_close(block_fd);
317 }
318
319 r = image_new(IMAGE_BLOCK,
320 pretty,
321 path,
322 filename,
323 !(st.st_mode & 0222) || read_only,
324 0,
325 0,
326 ret);
327 if (r < 0)
328 return r;
329
330 if (size != 0 && size != UINT64_MAX)
331 (*ret)->usage = (*ret)->usage_exclusive = (*ret)->limit = (*ret)->limit_exclusive = size;
332
333 return 1;
c2ce6a3d 334 }
cd61c3bf 335
c2ce6a3d
LP
336 return 0;
337}
cd61c3bf 338
c2ce6a3d
LP
339int image_find(const char *name, Image **ret) {
340 const char *path;
341 int r;
cd61c3bf 342
c2ce6a3d 343 assert(name);
cd61c3bf 344
c2ce6a3d
LP
345 /* There are no images with invalid names */
346 if (!image_name_is_valid(name))
347 return 0;
cd61c3bf 348
c2ce6a3d
LP
349 NULSTR_FOREACH(path, image_search_path) {
350 _cleanup_closedir_ DIR *d = NULL;
cd61c3bf 351
c2ce6a3d
LP
352 d = opendir(path);
353 if (!d) {
354 if (errno == ENOENT)
355 continue;
cd61c3bf 356
c2ce6a3d
LP
357 return -errno;
358 }
cd61c3bf 359
5fc7f358 360 r = image_make(NULL, dirfd(d), path, name, ret);
4c701096 361 if (IN_SET(r, 0, -ENOENT)) {
aceac2f0 362 _cleanup_free_ char *raw = NULL;
5fc7f358 363
aceac2f0
LP
364 raw = strappend(name, ".raw");
365 if (!raw)
5fc7f358
LP
366 return -ENOMEM;
367
aceac2f0 368 r = image_make(NULL, dirfd(d), path, raw, ret);
4c701096 369 if (IN_SET(r, 0, -ENOENT))
5fc7f358
LP
370 continue;
371 }
c2ce6a3d
LP
372 if (r < 0)
373 return r;
cd61c3bf 374
c2ce6a3d
LP
375 return 1;
376 }
377
5fc7f358 378 if (streq(name, ".host"))
27c88c4e 379 return image_make(".host", AT_FDCWD, NULL, "/", ret);
5fc7f358 380
c2ce6a3d
LP
381 return 0;
382};
383
384int image_discover(Hashmap *h) {
385 const char *path;
386 int r;
387
388 assert(h);
389
390 NULSTR_FOREACH(path, image_search_path) {
391 _cleanup_closedir_ DIR *d = NULL;
392 struct dirent *de;
393
394 d = opendir(path);
395 if (!d) {
396 if (errno == ENOENT)
a67a4c8c 397 continue;
c2ce6a3d
LP
398
399 return -errno;
400 }
401
402 FOREACH_DIRENT_ALL(de, d, return -errno) {
403 _cleanup_(image_unrefp) Image *image = NULL;
404
405 if (!image_name_is_valid(de->d_name))
406 continue;
407
408 if (hashmap_contains(h, de->d_name))
409 continue;
410
5fc7f358 411 r = image_make(NULL, dirfd(d), path, de->d_name, &image);
4c701096 412 if (IN_SET(r, 0, -ENOENT))
c2ce6a3d
LP
413 continue;
414 if (r < 0)
415 return r;
416
417 r = hashmap_put(h, image->name, image);
418 if (r < 0)
419 return r;
420
421 image = NULL;
cd61c3bf
LP
422 }
423 }
424
5fc7f358
LP
425 if (!hashmap_contains(h, ".host")) {
426 _cleanup_(image_unrefp) Image *image = NULL;
427
428 r = image_make(".host", AT_FDCWD, NULL, "/", &image);
429 if (r < 0)
430 return r;
431
432 r = hashmap_put(h, image->name, image);
433 if (r < 0)
434 return r;
435
436 image = NULL;
437
438 }
439
cd61c3bf
LP
440 return 0;
441}
442
08682124 443int image_remove(Image *i) {
8e766630 444 _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
8e0b6570 445 _cleanup_strv_free_ char **settings = NULL;
bafbac4e 446 _cleanup_free_ char *roothash = NULL;
8e0b6570 447 char **j;
30535c16
LP
448 int r;
449
08682124
LP
450 assert(i);
451
d94c2b06 452 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
08682124
LP
453 return -EROFS;
454
8e0b6570
LP
455 settings = image_settings_path(i);
456 if (!settings)
457 return -ENOMEM;
458
bafbac4e
LP
459 roothash = image_roothash_path(i);
460 if (!roothash)
461 return -ENOMEM;
462
30535c16
LP
463 /* Make sure we don't interfere with a running nspawn */
464 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
465 if (r < 0)
466 return r;
467
ebd93cb6
LP
468 switch (i->type) {
469
470 case IMAGE_SUBVOLUME:
9fb0b9c7
LP
471
472 /* Let's unlink first, maybe it is a symlink? If that works we are happy. Otherwise, let's get out the
473 * big guns */
474 if (unlink(i->path) < 0) {
475 r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
476 if (r < 0)
477 return r;
478 }
479
8e0b6570 480 break;
ebd93cb6
LP
481
482 case IMAGE_DIRECTORY:
01b72568 483 /* Allow deletion of read-only directories */
a67d68b8 484 (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL);
8e0b6570
LP
485 r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
486 if (r < 0)
487 return r;
488
489 break;
01b72568 490
eb38edce
LP
491 case IMAGE_BLOCK:
492
493 /* If this is inside of /dev, then it's a real block device, hence let's not touch the device node
494 * itself (but let's remove the stuff stored alongside it). If it's anywhere else, let's try to unlink
495 * the thing (it's most likely a symlink after all). */
496
497 if (path_startswith(i->path, "/dev"))
498 break;
499
4831981d 500 _fallthrough_;
aceac2f0 501 case IMAGE_RAW:
41d1ed05
LP
502 if (unlink(i->path) < 0)
503 return -errno;
8e0b6570 504 break;
ebd93cb6
LP
505
506 default:
15411c0c 507 return -EOPNOTSUPP;
ebd93cb6 508 }
8e0b6570
LP
509
510 STRV_FOREACH(j, settings) {
511 if (unlink(*j) < 0 && errno != ENOENT)
512 log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j);
513 }
514
bafbac4e
LP
515 if (unlink(roothash) < 0 && errno != ENOENT)
516 log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", roothash);
517
8e0b6570
LP
518 return 0;
519}
520
bafbac4e 521static int rename_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
8e0b6570
LP
522 _cleanup_free_ char *rs = NULL;
523 const char *fn;
524
bafbac4e 525 fn = strjoina(new_name, suffix);
8e0b6570
LP
526
527 rs = file_in_same_dir(path, fn);
528 if (!rs)
529 return -ENOMEM;
530
531 return rename_noreplace(AT_FDCWD, path, AT_FDCWD, rs);
ebd93cb6
LP
532}
533
534int image_rename(Image *i, const char *new_name) {
8e766630 535 _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
bafbac4e 536 _cleanup_free_ char *new_path = NULL, *nn = NULL, *roothash = NULL;
8e0b6570 537 _cleanup_strv_free_ char **settings = NULL;
01b72568 538 unsigned file_attr = 0;
8e0b6570 539 char **j;
ebd93cb6
LP
540 int r;
541
542 assert(i);
543
544 if (!image_name_is_valid(new_name))
545 return -EINVAL;
546
d94c2b06 547 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
ebd93cb6
LP
548 return -EROFS;
549
8e0b6570
LP
550 settings = image_settings_path(i);
551 if (!settings)
552 return -ENOMEM;
553
bafbac4e
LP
554 roothash = image_roothash_path(i);
555 if (!roothash)
556 return -ENOMEM;
557
30535c16
LP
558 /* Make sure we don't interfere with a running nspawn */
559 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
560 if (r < 0)
561 return r;
562
563 /* Make sure nobody takes the new name, between the time we
564 * checked it is currently unused in all search paths, and the
f8e2f4d6 565 * time we take possession of it */
30535c16
LP
566 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
567 if (r < 0)
568 return r;
569
ebd93cb6
LP
570 r = image_find(new_name, NULL);
571 if (r < 0)
572 return r;
573 if (r > 0)
574 return -EEXIST;
575
576 switch (i->type) {
577
ebd93cb6 578 case IMAGE_DIRECTORY:
01b72568
LP
579 /* Turn of the immutable bit while we rename the image, so that we can rename it */
580 (void) read_attr_path(i->path, &file_attr);
581
582 if (file_attr & FS_IMMUTABLE_FL)
a67d68b8 583 (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL);
01b72568 584
4831981d 585 _fallthrough_;
01b72568 586 case IMAGE_SUBVOLUME:
ebd93cb6
LP
587 new_path = file_in_same_dir(i->path, new_name);
588 break;
589
eb38edce
LP
590 case IMAGE_BLOCK:
591
592 /* Refuse renaming raw block devices in /dev, the names are picked by udev after all. */
593 if (path_startswith(i->path, "/dev"))
594 return -EROFS;
595
596 new_path = file_in_same_dir(i->path, new_name);
597 break;
598
aceac2f0 599 case IMAGE_RAW: {
ebd93cb6
LP
600 const char *fn;
601
63c372cb 602 fn = strjoina(new_name, ".raw");
ebd93cb6
LP
603 new_path = file_in_same_dir(i->path, fn);
604 break;
605 }
606
607 default:
15411c0c 608 return -EOPNOTSUPP;
ebd93cb6
LP
609 }
610
611 if (!new_path)
612 return -ENOMEM;
613
614 nn = strdup(new_name);
615 if (!nn)
616 return -ENOMEM;
617
f85ef957
AC
618 r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
619 if (r < 0)
620 return r;
ebd93cb6 621
01b72568
LP
622 /* Restore the immutable bit, if it was set before */
623 if (file_attr & FS_IMMUTABLE_FL)
a67d68b8 624 (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL);
01b72568 625
f9ecfd3b
DL
626 free_and_replace(i->path, new_path);
627 free_and_replace(i->name, nn);
ebd93cb6 628
8e0b6570 629 STRV_FOREACH(j, settings) {
bafbac4e 630 r = rename_auxiliary_file(*j, new_name, ".nspawn");
8e0b6570
LP
631 if (r < 0 && r != -ENOENT)
632 log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j);
633 }
634
bafbac4e
LP
635 r = rename_auxiliary_file(roothash, new_name, ".roothash");
636 if (r < 0 && r != -ENOENT)
637 log_debug_errno(r, "Failed to rename roothash file %s, ignoring: %m", roothash);
638
ebd93cb6
LP
639 return 0;
640}
641
bafbac4e 642static int clone_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
8e0b6570
LP
643 _cleanup_free_ char *rs = NULL;
644 const char *fn;
645
bafbac4e 646 fn = strjoina(new_name, suffix);
8e0b6570
LP
647
648 rs = file_in_same_dir(path, fn);
649 if (!rs)
650 return -ENOMEM;
651
1c876927 652 return copy_file_atomic(path, rs, 0664, 0, COPY_REFLINK);
8e0b6570
LP
653}
654
ebd93cb6 655int image_clone(Image *i, const char *new_name, bool read_only) {
8e766630 656 _cleanup_(release_lock_file) LockFile name_lock = LOCK_FILE_INIT;
8e0b6570 657 _cleanup_strv_free_ char **settings = NULL;
bafbac4e 658 _cleanup_free_ char *roothash = NULL;
ebd93cb6 659 const char *new_path;
8e0b6570 660 char **j;
ebd93cb6
LP
661 int r;
662
663 assert(i);
664
665 if (!image_name_is_valid(new_name))
666 return -EINVAL;
667
8e0b6570
LP
668 settings = image_settings_path(i);
669 if (!settings)
670 return -ENOMEM;
671
bafbac4e
LP
672 roothash = image_roothash_path(i);
673 if (!roothash)
674 return -ENOMEM;
675
30535c16
LP
676 /* Make sure nobody takes the new name, between the time we
677 * checked it is currently unused in all search paths, and the
f8e2f4d6 678 * time we take possession of it */
30535c16
LP
679 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
680 if (r < 0)
681 return r;
682
ebd93cb6
LP
683 r = image_find(new_name, NULL);
684 if (r < 0)
685 return r;
686 if (r > 0)
687 return -EEXIST;
688
689 switch (i->type) {
690
691 case IMAGE_SUBVOLUME:
692 case IMAGE_DIRECTORY:
9a50e3ca 693 /* If we can we'll always try to create a new btrfs subvolume here, even if the source is a plain
13e785f7 694 * directory. */
9a50e3ca 695
63c372cb 696 new_path = strjoina("/var/lib/machines/", new_name);
ebd93cb6 697
17cbb288
LP
698 r = btrfs_subvol_snapshot(i->path, new_path,
699 (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
700 BTRFS_SNAPSHOT_FALLBACK_COPY |
701 BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
702 BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE |
703 BTRFS_SNAPSHOT_RECURSIVE |
704 BTRFS_SNAPSHOT_QUOTA);
705 if (r >= 0)
9a50e3ca 706 /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */
8120ee28 707 (void) btrfs_subvol_auto_qgroup(new_path, 0, true);
5bcd08db 708
ebd93cb6
LP
709 break;
710
aceac2f0 711 case IMAGE_RAW:
63c372cb 712 new_path = strjoina("/var/lib/machines/", new_name, ".raw");
ebd93cb6 713
1c876927 714 r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, FS_NOCOW_FL, COPY_REFLINK);
ebd93cb6
LP
715 break;
716
eb38edce 717 case IMAGE_BLOCK:
ebd93cb6 718 default:
15411c0c 719 return -EOPNOTSUPP;
ebd93cb6
LP
720 }
721
722 if (r < 0)
723 return r;
724
8e0b6570 725 STRV_FOREACH(j, settings) {
bafbac4e 726 r = clone_auxiliary_file(*j, new_name, ".nspawn");
8e0b6570
LP
727 if (r < 0 && r != -ENOENT)
728 log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j);
729 }
730
bafbac4e
LP
731 r = clone_auxiliary_file(roothash, new_name, ".roothash");
732 if (r < 0 && r != -ENOENT)
733 log_debug_errno(r, "Failed to clone root hash file %s, ignoring: %m", roothash);
734
ebd93cb6
LP
735 return 0;
736}
737
738int image_read_only(Image *i, bool b) {
8e766630 739 _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
ebd93cb6 740 int r;
c7664c07 741
ebd93cb6
LP
742 assert(i);
743
d94c2b06 744 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
ebd93cb6
LP
745 return -EROFS;
746
30535c16
LP
747 /* Make sure we don't interfere with a running nspawn */
748 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
749 if (r < 0)
750 return r;
751
ebd93cb6
LP
752 switch (i->type) {
753
754 case IMAGE_SUBVOLUME:
5bcd08db
LP
755
756 /* Note that we set the flag only on the top-level
757 * subvolume of the image. */
758
ebd93cb6
LP
759 r = btrfs_subvol_set_read_only(i->path, b);
760 if (r < 0)
761 return r;
01b72568
LP
762
763 break;
764
765 case IMAGE_DIRECTORY:
766 /* For simple directory trees we cannot use the access
767 mode of the top-level directory, since it has an
768 effect on the container itself. However, we can
769 use the "immutable" flag, to at least make the
770 top-level directory read-only. It's not as good as
771 a read-only subvolume, but at least something, and
13e785f7 772 we can read the value back. */
01b72568 773
a67d68b8 774 r = chattr_path(i->path, b ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL);
01b72568
LP
775 if (r < 0)
776 return r;
777
ebd93cb6
LP
778 break;
779
aceac2f0 780 case IMAGE_RAW: {
ebd93cb6
LP
781 struct stat st;
782
783 if (stat(i->path, &st) < 0)
784 return -errno;
785
786 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
787 return -errno;
f2068bcc
LP
788
789 /* If the images is now read-only, it's a good time to
790 * defrag it, given that no write patterns will
791 * fragment it again. */
792 if (b)
793 (void) btrfs_defrag(i->path);
ebd93cb6
LP
794 break;
795 }
796
eb38edce
LP
797 case IMAGE_BLOCK: {
798 _cleanup_close_ int fd = -1;
799 struct stat st;
800 int state = b;
801
802 fd = open(i->path, O_CLOEXEC|O_RDONLY|O_NONBLOCK|O_NOCTTY);
803 if (fd < 0)
804 return -errno;
805
806 if (fstat(fd, &st) < 0)
807 return -errno;
808 if (!S_ISBLK(st.st_mode))
809 return -ENOTTY;
810
811 if (ioctl(fd, BLKROSET, &state) < 0)
812 return -errno;
813
814 break;
815 }
816
ebd93cb6 817 default:
15411c0c 818 return -EOPNOTSUPP;
ebd93cb6
LP
819 }
820
821 return 0;
08682124
LP
822}
823
30535c16
LP
824int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) {
825 _cleanup_free_ char *p = NULL;
826 LockFile t = LOCK_FILE_INIT;
827 struct stat st;
828 int r;
829
830 assert(path);
831 assert(global);
832 assert(local);
833
834 /* Locks an image path. This actually creates two locks: one
835 * "local" one, next to the image path itself, which might be
836 * shared via NFS. And another "global" one, in /run, that
837 * uses the device/inode number. This has the benefit that we
838 * can even lock a tree that is a mount point, correctly. */
839
30535c16
LP
840 if (!path_is_absolute(path))
841 return -EINVAL;
842
b6e953f2
LP
843 if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
844 *local = *global = (LockFile) LOCK_FILE_INIT;
845 return 0;
846 }
847
848 if (path_equal(path, "/"))
849 return -EBUSY;
850
30535c16 851 if (stat(path, &st) >= 0) {
eb38edce
LP
852 if (S_ISBLK(st.st_mode))
853 r = asprintf(&p, "/run/systemd/nspawn/locks/block-%u:%u", major(st.st_rdev), minor(st.st_rdev));
854 else if (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode))
855 r = asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino);
856 else
857 return -ENOTTY;
858
859 if (r < 0)
30535c16
LP
860 return -ENOMEM;
861 }
862
eb38edce
LP
863 /* For block devices we don't need the "local" lock, as the major/minor lock above should be sufficient, since
864 * block devices are device local anyway. */
865 if (!path_startswith(path, "/dev")) {
866 r = make_lock_file_for(path, operation, &t);
8be17c9b
LT
867 if (r < 0) {
868 if ((operation & LOCK_SH) && r == -EROFS)
771b7ead 869 log_debug_errno(r, "Failed to create shared lock for '%s', ignoring: %m", path);
8be17c9b
LT
870 else
871 return r;
872 }
eb38edce 873 }
30535c16
LP
874
875 if (p) {
7e7cddb2 876 mkdir_p("/run/systemd/nspawn/locks", 0700);
30535c16
LP
877
878 r = make_lock_file(p, operation, global);
879 if (r < 0) {
880 release_lock_file(&t);
881 return r;
882 }
546dbec5
LP
883 } else
884 *global = (LockFile) LOCK_FILE_INIT;
30535c16
LP
885
886 *local = t;
887 return 0;
888}
889
cb81cd80 890int image_set_limit(Image *i, uint64_t referenced_max) {
d6ce17c7
LP
891 assert(i);
892
d94c2b06 893 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
d6ce17c7
LP
894 return -EROFS;
895
896 if (i->type != IMAGE_SUBVOLUME)
15411c0c 897 return -EOPNOTSUPP;
d6ce17c7 898
5bcd08db
LP
899 /* We set the quota both for the subvolume as well as for the
900 * subtree. The latter is mostly for historical reasons, since
901 * we didn't use to have a concept of subtree quota, and hence
902 * only modified the subvolume quota. */
903
904 (void) btrfs_qgroup_set_limit(i->path, 0, referenced_max);
905 (void) btrfs_subvol_auto_qgroup(i->path, 0, true);
906 return btrfs_subvol_set_subtree_quota_limit(i->path, 0, referenced_max);
d6ce17c7
LP
907}
908
c7664c07 909int image_read_metadata(Image *i) {
8e766630 910 _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
c7664c07
LP
911 int r;
912
913 assert(i);
914
915 r = image_path_lock(i->path, LOCK_SH|LOCK_NB, &global_lock, &local_lock);
916 if (r < 0)
917 return r;
918
919 switch (i->type) {
920
921 case IMAGE_SUBVOLUME:
922 case IMAGE_DIRECTORY: {
923 _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL;
924 sd_id128_t machine_id = SD_ID128_NULL;
925 _cleanup_free_ char *hostname = NULL;
926 _cleanup_free_ char *path = NULL;
d58ad743 927 _cleanup_fclose_ FILE *f = NULL;
c7664c07 928
62570f6f 929 r = chase_symlinks("/etc/hostname", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path);
c7664c07
LP
930 if (r < 0 && r != -ENOENT)
931 log_debug_errno(r, "Failed to chase /etc/hostname in image %s: %m", i->name);
932 else if (r >= 0) {
933 r = read_etc_hostname(path, &hostname);
934 if (r < 0)
935 log_debug_errno(errno, "Failed to read /etc/hostname of image %s: %m", i->name);
936 }
937
938 path = mfree(path);
939
62570f6f 940 r = chase_symlinks("/etc/machine-id", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path);
c7664c07
LP
941 if (r < 0 && r != -ENOENT)
942 log_debug_errno(r, "Failed to chase /etc/machine-id in image %s: %m", i->name);
943 else if (r >= 0) {
944 _cleanup_close_ int fd = -1;
945
946 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY);
947 if (fd < 0)
948 log_debug_errno(errno, "Failed to open %s: %m", path);
949 else {
950 r = id128_read_fd(fd, ID128_PLAIN, &machine_id);
951 if (r < 0)
952 log_debug_errno(r, "Image %s contains invalid machine ID.", i->name);
953 }
954 }
955
956 path = mfree(path);
957
62570f6f 958 r = chase_symlinks("/etc/machine-info", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path);
c7664c07
LP
959 if (r < 0 && r != -ENOENT)
960 log_debug_errno(r, "Failed to chase /etc/machine-info in image %s: %m", i->name);
961 else if (r >= 0) {
962 r = load_env_file_pairs(NULL, path, NULL, &machine_info);
963 if (r < 0)
964 log_debug_errno(r, "Failed to parse machine-info data of %s: %m", i->name);
965 }
966
d58ad743
LP
967 r = load_os_release_pairs(i->path, &os_release);
968 if (r < 0)
969 log_debug_errno(r, "Failed to read os-release in image, ignoring: %m");
c7664c07
LP
970
971 free_and_replace(i->hostname, hostname);
972 i->machine_id = machine_id;
973 strv_free_and_replace(i->machine_info, machine_info);
974 strv_free_and_replace(i->os_release, os_release);
975
976 break;
977 }
978
979 case IMAGE_RAW:
980 case IMAGE_BLOCK: {
981 _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
982 _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
983
984 r = loop_device_make_by_path(i->path, O_RDONLY, &d);
985 if (r < 0)
986 return r;
987
988 r = dissect_image(d->fd, NULL, 0, DISSECT_IMAGE_REQUIRE_ROOT, &m);
989 if (r < 0)
990 return r;
991
992 r = dissected_image_acquire_metadata(m);
993 if (r < 0)
994 return r;
995
996 free_and_replace(i->hostname, m->hostname);
997 i->machine_id = m->machine_id;
998 strv_free_and_replace(i->machine_info, m->machine_info);
999 strv_free_and_replace(i->os_release, m->os_release);
1000
1001 break;
1002 }
1003
1004 default:
1005 return -EOPNOTSUPP;
1006 }
1007
1008 i->metadata_valid = true;
1009
1010 return 0;
1011}
1012
30535c16
LP
1013int image_name_lock(const char *name, int operation, LockFile *ret) {
1014 const char *p;
1015
1016 assert(name);
1017 assert(ret);
1018
1019 /* Locks an image name, regardless of the precise path used. */
1020
1021 if (!image_name_is_valid(name))
1022 return -EINVAL;
1023
b6e953f2
LP
1024 if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
1025 *ret = (LockFile) LOCK_FILE_INIT;
1026 return 0;
1027 }
1028
30535c16
LP
1029 if (streq(name, ".host"))
1030 return -EBUSY;
1031
7e7cddb2 1032 mkdir_p("/run/systemd/nspawn/locks", 0700);
63c372cb 1033 p = strjoina("/run/systemd/nspawn/locks/name-", name);
30535c16
LP
1034
1035 return make_lock_file(p, operation, ret);
1036}
1037
1038bool image_name_is_valid(const char *s) {
1039 if (!filename_is_valid(s))
1040 return false;
1041
1042 if (string_has_cc(s, NULL))
1043 return false;
1044
1045 if (!utf8_is_valid(s))
1046 return false;
1047
1048 /* Temporary files for atomically creating new files */
1049 if (startswith(s, ".#"))
1050 return false;
1051
1052 return true;
1053}
1054
cd61c3bf
LP
1055static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
1056 [IMAGE_DIRECTORY] = "directory",
1057 [IMAGE_SUBVOLUME] = "subvolume",
aceac2f0 1058 [IMAGE_RAW] = "raw",
eb38edce 1059 [IMAGE_BLOCK] = "block",
cd61c3bf
LP
1060};
1061
1062DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);