]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/shared/machine-image.c
Add SPDX license identifiers to source files under the LGPL
[thirdparty/systemd.git] / src / shared / machine-image.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
cd61c3bf
LP
2/***
3 This file is part of systemd.
4
5 Copyright 2013 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19***/
20
a8fbdf54
TA
21#include <dirent.h>
22#include <errno.h>
ebd93cb6 23#include <fcntl.h>
a8fbdf54
TA
24#include <stdio.h>
25#include <stdlib.h>
26#include <string.h>
e306723e 27#include <sys/file.h>
a8fbdf54
TA
28#include <sys/stat.h>
29#include <unistd.h>
8e0b6570 30#include <linux/fs.h>
546dbec5 31
b5efdb8a 32#include "alloc-util.h"
cd61c3bf 33#include "btrfs-util.h"
c8b3094d 34#include "chattr-util.h"
ebd93cb6 35#include "copy.h"
a0956174 36#include "dirent-util.h"
b6e953f2 37#include "env-util.h"
3ffd4af2 38#include "fd-util.h"
f4f15635 39#include "fs-util.h"
a8fbdf54
TA
40#include "hashmap.h"
41#include "lockfile-util.h"
42#include "log.h"
3ffd4af2 43#include "machine-image.h"
546dbec5 44#include "macro.h"
30535c16 45#include "mkdir.h"
8e0b6570 46#include "path-util.h"
c6878637 47#include "rm-rf.h"
8b43440b 48#include "string-table.h"
07630cea 49#include "string-util.h"
8e0b6570 50#include "strv.h"
a8fbdf54 51#include "time-util.h"
8e0b6570 52#include "utf8.h"
a8fbdf54 53#include "util.h"
89a5a90c 54#include "xattr-util.h"
cd61c3bf 55
c2ce6a3d 56static const char image_search_path[] =
42c6f2c9 57 "/var/lib/machines\0"
7d105503 58 "/var/lib/container\0" /* legacy */
42c6f2c9
LP
59 "/usr/local/lib/machines\0"
60 "/usr/lib/machines\0";
c2ce6a3d 61
cd61c3bf
LP
62Image *image_unref(Image *i) {
63 if (!i)
64 return NULL;
65
66 free(i->name);
67 free(i->path);
6b430fdb 68 return mfree(i);
cd61c3bf
LP
69}
70
8e0b6570
LP
71static char **image_settings_path(Image *image) {
72 _cleanup_strv_free_ char **l = NULL;
73 char **ret;
74 const char *fn, *s;
75 unsigned i = 0;
76
77 assert(image);
78
79 l = new0(char*, 4);
80 if (!l)
81 return NULL;
82
83 fn = strjoina(image->name, ".nspawn");
84
85 FOREACH_STRING(s, "/etc/systemd/nspawn/", "/run/systemd/nspawn/") {
86 l[i] = strappend(s, fn);
87 if (!l[i])
88 return NULL;
89
90 i++;
91 }
92
93 l[i] = file_in_same_dir(image->path, fn);
94 if (!l[i])
95 return NULL;
96
97 ret = l;
98 l = NULL;
99
100 return ret;
101}
102
bafbac4e
LP
103static char *image_roothash_path(Image *image) {
104 const char *fn;
105
106 assert(image);
107
108 fn = strjoina(image->name, ".roothash");
109
110 return file_in_same_dir(image->path, fn);
111}
112
c2ce6a3d 113static int image_new(
cd61c3bf 114 ImageType t,
5fc7f358 115 const char *pretty,
cd61c3bf 116 const char *path,
5fc7f358 117 const char *filename,
cd61c3bf 118 bool read_only,
10f9c755 119 usec_t crtime,
cd61c3bf 120 usec_t mtime,
c2ce6a3d 121 Image **ret) {
cd61c3bf
LP
122
123 _cleanup_(image_unrefp) Image *i = NULL;
cd61c3bf 124
cd61c3bf
LP
125 assert(t >= 0);
126 assert(t < _IMAGE_TYPE_MAX);
5fc7f358
LP
127 assert(pretty);
128 assert(filename);
c2ce6a3d 129 assert(ret);
cd61c3bf 130
c2ce6a3d 131 i = new0(Image, 1);
cd61c3bf
LP
132 if (!i)
133 return -ENOMEM;
134
135 i->type = t;
136 i->read_only = read_only;
10f9c755 137 i->crtime = crtime;
cd61c3bf 138 i->mtime = mtime;
c19de711 139 i->usage = i->usage_exclusive = (uint64_t) -1;
b6b18498 140 i->limit = i->limit_exclusive = (uint64_t) -1;
cd61c3bf 141
5fc7f358 142 i->name = strdup(pretty);
cd61c3bf
LP
143 if (!i->name)
144 return -ENOMEM;
145
5fc7f358 146 if (path)
605405c6 147 i->path = strjoin(path, "/", filename);
5fc7f358
LP
148 else
149 i->path = strdup(filename);
ebeccf9e 150
5fc7f358
LP
151 if (!i->path)
152 return -ENOMEM;
153
154 path_kill_slashes(i->path);
cd61c3bf 155
c2ce6a3d 156 *ret = i;
cd61c3bf 157 i = NULL;
c2ce6a3d 158
cd61c3bf
LP
159 return 0;
160}
161
5fc7f358
LP
162static int image_make(
163 const char *pretty,
164 int dfd,
165 const char *path,
166 const char *filename,
167 Image **ret) {
168
c2ce6a3d 169 struct stat st;
5fc7f358 170 bool read_only;
cd61c3bf
LP
171 int r;
172
5fc7f358 173 assert(filename);
cd61c3bf 174
eb38edce
LP
175 /* We explicitly *do* follow symlinks here, since we want to allow symlinking trees, raw files and block
176 * devices into /var/lib/machines/, and treat them normally. */
cd61c3bf 177
5fc7f358 178 if (fstatat(dfd, filename, &st, 0) < 0)
c2ce6a3d 179 return -errno;
cd61c3bf 180
5fc7f358
LP
181 read_only =
182 (path && path_startswith(path, "/usr")) ||
08ff5529 183 (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
86e339c8 184
c2ce6a3d 185 if (S_ISDIR(st.st_mode)) {
01b72568
LP
186 _cleanup_close_ int fd = -1;
187 unsigned file_attr = 0;
cd61c3bf 188
c2ce6a3d
LP
189 if (!ret)
190 return 1;
cd61c3bf 191
5fc7f358
LP
192 if (!pretty)
193 pretty = filename;
194
01b72568
LP
195 fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
196 if (fd < 0)
197 return -errno;
198
c2ce6a3d
LP
199 /* btrfs subvolumes have inode 256 */
200 if (st.st_ino == 256) {
cd61c3bf 201
21222ea5
LP
202 r = btrfs_is_filesystem(fd);
203 if (r < 0)
204 return r;
205 if (r) {
10f9c755 206 BtrfsSubvolInfo info;
cd61c3bf 207
c2ce6a3d 208 /* It's a btrfs subvolume */
cd61c3bf 209
5bcd08db 210 r = btrfs_subvol_get_info_fd(fd, 0, &info);
10f9c755
LP
211 if (r < 0)
212 return r;
c2ce6a3d
LP
213
214 r = image_new(IMAGE_SUBVOLUME,
5fc7f358 215 pretty,
c2ce6a3d 216 path,
5fc7f358
LP
217 filename,
218 info.read_only || read_only,
10f9c755 219 info.otime,
c2ce6a3d 220 0,
c2ce6a3d
LP
221 ret);
222 if (r < 0)
223 return r;
224
5bcd08db
LP
225 if (btrfs_quota_scan_ongoing(fd) == 0) {
226 BtrfsQuotaInfo quota;
b6b18498 227
5bcd08db
LP
228 r = btrfs_subvol_get_subtree_quota_fd(fd, 0, &quota);
229 if (r >= 0) {
230 (*ret)->usage = quota.referenced;
231 (*ret)->usage_exclusive = quota.exclusive;
232
233 (*ret)->limit = quota.referenced_max;
234 (*ret)->limit_exclusive = quota.exclusive_max;
235 }
b6b18498
LP
236 }
237
c2ce6a3d 238 return 1;
cd61c3bf 239 }
c2ce6a3d 240 }
cd61c3bf 241
01b72568
LP
242 /* If the IMMUTABLE bit is set, we consider the
243 * directory read-only. Since the ioctl is not
244 * supported everywhere we ignore failures. */
245 (void) read_attr_fd(fd, &file_attr);
cd61c3bf 246
01b72568 247 /* It's just a normal directory. */
c2ce6a3d 248 r = image_new(IMAGE_DIRECTORY,
5fc7f358 249 pretty,
c2ce6a3d 250 path,
5fc7f358 251 filename,
01b72568 252 read_only || (file_attr & FS_IMMUTABLE_FL),
c2ce6a3d
LP
253 0,
254 0,
255 ret);
256 if (r < 0)
257 return r;
cd61c3bf 258
c2ce6a3d 259 return 1;
cd61c3bf 260
aceac2f0 261 } else if (S_ISREG(st.st_mode) && endswith(filename, ".raw")) {
10f9c755 262 usec_t crtime = 0;
cd61c3bf 263
aceac2f0 264 /* It's a RAW disk image */
cd61c3bf 265
c2ce6a3d
LP
266 if (!ret)
267 return 1;
cd61c3bf 268
5fc7f358 269 fd_getcrtime_at(dfd, filename, &crtime, 0);
10f9c755 270
5fc7f358
LP
271 if (!pretty)
272 pretty = strndupa(filename, strlen(filename) - 4);
10f9c755 273
aceac2f0 274 r = image_new(IMAGE_RAW,
5fc7f358 275 pretty,
c2ce6a3d 276 path,
5fc7f358
LP
277 filename,
278 !(st.st_mode & 0222) || read_only,
10f9c755 279 crtime,
c2ce6a3d 280 timespec_load(&st.st_mtim),
c2ce6a3d
LP
281 ret);
282 if (r < 0)
283 return r;
cd61c3bf 284
c19de711 285 (*ret)->usage = (*ret)->usage_exclusive = st.st_blocks * 512;
b6b18498
LP
286 (*ret)->limit = (*ret)->limit_exclusive = st.st_size;
287
c2ce6a3d 288 return 1;
eb38edce
LP
289
290 } else if (S_ISBLK(st.st_mode)) {
291 _cleanup_close_ int block_fd = -1;
292 uint64_t size = UINT64_MAX;
293
294 /* A block device */
295
296 if (!ret)
297 return 1;
298
299 if (!pretty)
300 pretty = filename;
301
302 block_fd = openat(dfd, filename, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY);
303 if (block_fd < 0)
304 log_debug_errno(errno, "Failed to open block device %s/%s, ignoring: %m", path, filename);
305 else {
306 if (fstat(block_fd, &st) < 0)
307 return -errno;
308 if (!S_ISBLK(st.st_mode)) /* Verify that what we opened is actually what we think it is */
309 return -ENOTTY;
310
311 if (!read_only) {
312 int state = 0;
313
314 if (ioctl(block_fd, BLKROGET, &state) < 0)
315 log_debug_errno(errno, "Failed to issue BLKROGET on device %s/%s, ignoring: %m", path, filename);
316 else if (state)
317 read_only = true;
318 }
319
320 if (ioctl(block_fd, BLKGETSIZE64, &size) < 0)
321 log_debug_errno(errno, "Failed to issue BLKFLSBUF on device %s/%s, ignoring: %m", path, filename);
322
323 block_fd = safe_close(block_fd);
324 }
325
326 r = image_new(IMAGE_BLOCK,
327 pretty,
328 path,
329 filename,
330 !(st.st_mode & 0222) || read_only,
331 0,
332 0,
333 ret);
334 if (r < 0)
335 return r;
336
337 if (size != 0 && size != UINT64_MAX)
338 (*ret)->usage = (*ret)->usage_exclusive = (*ret)->limit = (*ret)->limit_exclusive = size;
339
340 return 1;
c2ce6a3d 341 }
cd61c3bf 342
c2ce6a3d
LP
343 return 0;
344}
cd61c3bf 345
c2ce6a3d
LP
346int image_find(const char *name, Image **ret) {
347 const char *path;
348 int r;
cd61c3bf 349
c2ce6a3d 350 assert(name);
cd61c3bf 351
c2ce6a3d
LP
352 /* There are no images with invalid names */
353 if (!image_name_is_valid(name))
354 return 0;
cd61c3bf 355
c2ce6a3d
LP
356 NULSTR_FOREACH(path, image_search_path) {
357 _cleanup_closedir_ DIR *d = NULL;
cd61c3bf 358
c2ce6a3d
LP
359 d = opendir(path);
360 if (!d) {
361 if (errno == ENOENT)
362 continue;
cd61c3bf 363
c2ce6a3d
LP
364 return -errno;
365 }
cd61c3bf 366
5fc7f358 367 r = image_make(NULL, dirfd(d), path, name, ret);
4c701096 368 if (IN_SET(r, 0, -ENOENT)) {
aceac2f0 369 _cleanup_free_ char *raw = NULL;
5fc7f358 370
aceac2f0
LP
371 raw = strappend(name, ".raw");
372 if (!raw)
5fc7f358
LP
373 return -ENOMEM;
374
aceac2f0 375 r = image_make(NULL, dirfd(d), path, raw, ret);
4c701096 376 if (IN_SET(r, 0, -ENOENT))
5fc7f358
LP
377 continue;
378 }
c2ce6a3d
LP
379 if (r < 0)
380 return r;
cd61c3bf 381
c2ce6a3d
LP
382 return 1;
383 }
384
5fc7f358 385 if (streq(name, ".host"))
27c88c4e 386 return image_make(".host", AT_FDCWD, NULL, "/", ret);
5fc7f358 387
c2ce6a3d
LP
388 return 0;
389};
390
391int image_discover(Hashmap *h) {
392 const char *path;
393 int r;
394
395 assert(h);
396
397 NULSTR_FOREACH(path, image_search_path) {
398 _cleanup_closedir_ DIR *d = NULL;
399 struct dirent *de;
400
401 d = opendir(path);
402 if (!d) {
403 if (errno == ENOENT)
a67a4c8c 404 continue;
c2ce6a3d
LP
405
406 return -errno;
407 }
408
409 FOREACH_DIRENT_ALL(de, d, return -errno) {
410 _cleanup_(image_unrefp) Image *image = NULL;
411
412 if (!image_name_is_valid(de->d_name))
413 continue;
414
415 if (hashmap_contains(h, de->d_name))
416 continue;
417
5fc7f358 418 r = image_make(NULL, dirfd(d), path, de->d_name, &image);
4c701096 419 if (IN_SET(r, 0, -ENOENT))
c2ce6a3d
LP
420 continue;
421 if (r < 0)
422 return r;
423
424 r = hashmap_put(h, image->name, image);
425 if (r < 0)
426 return r;
427
428 image = NULL;
cd61c3bf
LP
429 }
430 }
431
5fc7f358
LP
432 if (!hashmap_contains(h, ".host")) {
433 _cleanup_(image_unrefp) Image *image = NULL;
434
435 r = image_make(".host", AT_FDCWD, NULL, "/", &image);
436 if (r < 0)
437 return r;
438
439 r = hashmap_put(h, image->name, image);
440 if (r < 0)
441 return r;
442
443 image = NULL;
444
445 }
446
cd61c3bf
LP
447 return 0;
448}
449
450void image_hashmap_free(Hashmap *map) {
451 Image *i;
452
453 while ((i = hashmap_steal_first(map)))
454 image_unref(i);
455
456 hashmap_free(map);
457}
458
08682124 459int image_remove(Image *i) {
30535c16 460 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
8e0b6570 461 _cleanup_strv_free_ char **settings = NULL;
bafbac4e 462 _cleanup_free_ char *roothash = NULL;
8e0b6570 463 char **j;
30535c16
LP
464 int r;
465
08682124
LP
466 assert(i);
467
d94c2b06 468 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
08682124
LP
469 return -EROFS;
470
8e0b6570
LP
471 settings = image_settings_path(i);
472 if (!settings)
473 return -ENOMEM;
474
bafbac4e
LP
475 roothash = image_roothash_path(i);
476 if (!roothash)
477 return -ENOMEM;
478
30535c16
LP
479 /* Make sure we don't interfere with a running nspawn */
480 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
481 if (r < 0)
482 return r;
483
ebd93cb6
LP
484 switch (i->type) {
485
486 case IMAGE_SUBVOLUME:
9fb0b9c7
LP
487
488 /* Let's unlink first, maybe it is a symlink? If that works we are happy. Otherwise, let's get out the
489 * big guns */
490 if (unlink(i->path) < 0) {
491 r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
492 if (r < 0)
493 return r;
494 }
495
8e0b6570 496 break;
ebd93cb6
LP
497
498 case IMAGE_DIRECTORY:
01b72568 499 /* Allow deletion of read-only directories */
a67d68b8 500 (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL);
8e0b6570
LP
501 r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
502 if (r < 0)
503 return r;
504
505 break;
01b72568 506
eb38edce
LP
507 case IMAGE_BLOCK:
508
509 /* If this is inside of /dev, then it's a real block device, hence let's not touch the device node
510 * itself (but let's remove the stuff stored alongside it). If it's anywhere else, let's try to unlink
511 * the thing (it's most likely a symlink after all). */
512
513 if (path_startswith(i->path, "/dev"))
514 break;
515
516 /* fallthrough */
517
aceac2f0 518 case IMAGE_RAW:
41d1ed05
LP
519 if (unlink(i->path) < 0)
520 return -errno;
8e0b6570 521 break;
ebd93cb6
LP
522
523 default:
15411c0c 524 return -EOPNOTSUPP;
ebd93cb6 525 }
8e0b6570
LP
526
527 STRV_FOREACH(j, settings) {
528 if (unlink(*j) < 0 && errno != ENOENT)
529 log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j);
530 }
531
bafbac4e
LP
532 if (unlink(roothash) < 0 && errno != ENOENT)
533 log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", roothash);
534
8e0b6570
LP
535 return 0;
536}
537
bafbac4e 538static int rename_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
8e0b6570
LP
539 _cleanup_free_ char *rs = NULL;
540 const char *fn;
541
bafbac4e 542 fn = strjoina(new_name, suffix);
8e0b6570
LP
543
544 rs = file_in_same_dir(path, fn);
545 if (!rs)
546 return -ENOMEM;
547
548 return rename_noreplace(AT_FDCWD, path, AT_FDCWD, rs);
ebd93cb6
LP
549}
550
551int image_rename(Image *i, const char *new_name) {
30535c16 552 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
bafbac4e 553 _cleanup_free_ char *new_path = NULL, *nn = NULL, *roothash = NULL;
8e0b6570 554 _cleanup_strv_free_ char **settings = NULL;
01b72568 555 unsigned file_attr = 0;
8e0b6570 556 char **j;
ebd93cb6
LP
557 int r;
558
559 assert(i);
560
561 if (!image_name_is_valid(new_name))
562 return -EINVAL;
563
d94c2b06 564 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
ebd93cb6
LP
565 return -EROFS;
566
8e0b6570
LP
567 settings = image_settings_path(i);
568 if (!settings)
569 return -ENOMEM;
570
bafbac4e
LP
571 roothash = image_roothash_path(i);
572 if (!roothash)
573 return -ENOMEM;
574
30535c16
LP
575 /* Make sure we don't interfere with a running nspawn */
576 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
577 if (r < 0)
578 return r;
579
580 /* Make sure nobody takes the new name, between the time we
581 * checked it is currently unused in all search paths, and the
f8e2f4d6 582 * time we take possession of it */
30535c16
LP
583 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
584 if (r < 0)
585 return r;
586
ebd93cb6
LP
587 r = image_find(new_name, NULL);
588 if (r < 0)
589 return r;
590 if (r > 0)
591 return -EEXIST;
592
593 switch (i->type) {
594
ebd93cb6 595 case IMAGE_DIRECTORY:
01b72568
LP
596 /* Turn of the immutable bit while we rename the image, so that we can rename it */
597 (void) read_attr_path(i->path, &file_attr);
598
599 if (file_attr & FS_IMMUTABLE_FL)
a67d68b8 600 (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL);
01b72568
LP
601
602 /* fall through */
603
604 case IMAGE_SUBVOLUME:
ebd93cb6
LP
605 new_path = file_in_same_dir(i->path, new_name);
606 break;
607
eb38edce
LP
608 case IMAGE_BLOCK:
609
610 /* Refuse renaming raw block devices in /dev, the names are picked by udev after all. */
611 if (path_startswith(i->path, "/dev"))
612 return -EROFS;
613
614 new_path = file_in_same_dir(i->path, new_name);
615 break;
616
aceac2f0 617 case IMAGE_RAW: {
ebd93cb6
LP
618 const char *fn;
619
63c372cb 620 fn = strjoina(new_name, ".raw");
ebd93cb6
LP
621 new_path = file_in_same_dir(i->path, fn);
622 break;
623 }
624
625 default:
15411c0c 626 return -EOPNOTSUPP;
ebd93cb6
LP
627 }
628
629 if (!new_path)
630 return -ENOMEM;
631
632 nn = strdup(new_name);
633 if (!nn)
634 return -ENOMEM;
635
f85ef957
AC
636 r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
637 if (r < 0)
638 return r;
ebd93cb6 639
01b72568
LP
640 /* Restore the immutable bit, if it was set before */
641 if (file_attr & FS_IMMUTABLE_FL)
a67d68b8 642 (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL);
01b72568 643
ebd93cb6
LP
644 free(i->path);
645 i->path = new_path;
646 new_path = NULL;
647
648 free(i->name);
649 i->name = nn;
650 nn = NULL;
651
8e0b6570 652 STRV_FOREACH(j, settings) {
bafbac4e 653 r = rename_auxiliary_file(*j, new_name, ".nspawn");
8e0b6570
LP
654 if (r < 0 && r != -ENOENT)
655 log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j);
656 }
657
bafbac4e
LP
658 r = rename_auxiliary_file(roothash, new_name, ".roothash");
659 if (r < 0 && r != -ENOENT)
660 log_debug_errno(r, "Failed to rename roothash file %s, ignoring: %m", roothash);
661
ebd93cb6
LP
662 return 0;
663}
664
bafbac4e 665static int clone_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
8e0b6570
LP
666 _cleanup_free_ char *rs = NULL;
667 const char *fn;
668
bafbac4e 669 fn = strjoina(new_name, suffix);
8e0b6570
LP
670
671 rs = file_in_same_dir(path, fn);
672 if (!rs)
673 return -ENOMEM;
674
1c876927 675 return copy_file_atomic(path, rs, 0664, 0, COPY_REFLINK);
8e0b6570
LP
676}
677
ebd93cb6 678int image_clone(Image *i, const char *new_name, bool read_only) {
30535c16 679 _cleanup_release_lock_file_ LockFile name_lock = LOCK_FILE_INIT;
8e0b6570 680 _cleanup_strv_free_ char **settings = NULL;
bafbac4e 681 _cleanup_free_ char *roothash = NULL;
ebd93cb6 682 const char *new_path;
8e0b6570 683 char **j;
ebd93cb6
LP
684 int r;
685
686 assert(i);
687
688 if (!image_name_is_valid(new_name))
689 return -EINVAL;
690
8e0b6570
LP
691 settings = image_settings_path(i);
692 if (!settings)
693 return -ENOMEM;
694
bafbac4e
LP
695 roothash = image_roothash_path(i);
696 if (!roothash)
697 return -ENOMEM;
698
30535c16
LP
699 /* Make sure nobody takes the new name, between the time we
700 * checked it is currently unused in all search paths, and the
f8e2f4d6 701 * time we take possession of it */
30535c16
LP
702 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
703 if (r < 0)
704 return r;
705
ebd93cb6
LP
706 r = image_find(new_name, NULL);
707 if (r < 0)
708 return r;
709 if (r > 0)
710 return -EEXIST;
711
712 switch (i->type) {
713
714 case IMAGE_SUBVOLUME:
715 case IMAGE_DIRECTORY:
9a50e3ca 716 /* If we can we'll always try to create a new btrfs subvolume here, even if the source is a plain
13e785f7 717 * directory. */
9a50e3ca 718
63c372cb 719 new_path = strjoina("/var/lib/machines/", new_name);
ebd93cb6 720
17cbb288
LP
721 r = btrfs_subvol_snapshot(i->path, new_path,
722 (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
723 BTRFS_SNAPSHOT_FALLBACK_COPY |
724 BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
725 BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE |
726 BTRFS_SNAPSHOT_RECURSIVE |
727 BTRFS_SNAPSHOT_QUOTA);
728 if (r >= 0)
9a50e3ca 729 /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */
8120ee28 730 (void) btrfs_subvol_auto_qgroup(new_path, 0, true);
5bcd08db 731
ebd93cb6
LP
732 break;
733
aceac2f0 734 case IMAGE_RAW:
63c372cb 735 new_path = strjoina("/var/lib/machines/", new_name, ".raw");
ebd93cb6 736
1c876927 737 r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, FS_NOCOW_FL, COPY_REFLINK);
ebd93cb6
LP
738 break;
739
eb38edce 740 case IMAGE_BLOCK:
ebd93cb6 741 default:
15411c0c 742 return -EOPNOTSUPP;
ebd93cb6
LP
743 }
744
745 if (r < 0)
746 return r;
747
8e0b6570 748 STRV_FOREACH(j, settings) {
bafbac4e 749 r = clone_auxiliary_file(*j, new_name, ".nspawn");
8e0b6570
LP
750 if (r < 0 && r != -ENOENT)
751 log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j);
752 }
753
bafbac4e
LP
754 r = clone_auxiliary_file(roothash, new_name, ".roothash");
755 if (r < 0 && r != -ENOENT)
756 log_debug_errno(r, "Failed to clone root hash file %s, ignoring: %m", roothash);
757
ebd93cb6
LP
758 return 0;
759}
760
761int image_read_only(Image *i, bool b) {
30535c16 762 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
ebd93cb6
LP
763 int r;
764 assert(i);
765
d94c2b06 766 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
ebd93cb6
LP
767 return -EROFS;
768
30535c16
LP
769 /* Make sure we don't interfere with a running nspawn */
770 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
771 if (r < 0)
772 return r;
773
ebd93cb6
LP
774 switch (i->type) {
775
776 case IMAGE_SUBVOLUME:
5bcd08db
LP
777
778 /* Note that we set the flag only on the top-level
779 * subvolume of the image. */
780
ebd93cb6
LP
781 r = btrfs_subvol_set_read_only(i->path, b);
782 if (r < 0)
783 return r;
01b72568
LP
784
785 break;
786
787 case IMAGE_DIRECTORY:
788 /* For simple directory trees we cannot use the access
789 mode of the top-level directory, since it has an
790 effect on the container itself. However, we can
791 use the "immutable" flag, to at least make the
792 top-level directory read-only. It's not as good as
793 a read-only subvolume, but at least something, and
13e785f7 794 we can read the value back. */
01b72568 795
a67d68b8 796 r = chattr_path(i->path, b ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL);
01b72568
LP
797 if (r < 0)
798 return r;
799
ebd93cb6
LP
800 break;
801
aceac2f0 802 case IMAGE_RAW: {
ebd93cb6
LP
803 struct stat st;
804
805 if (stat(i->path, &st) < 0)
806 return -errno;
807
808 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
809 return -errno;
f2068bcc
LP
810
811 /* If the images is now read-only, it's a good time to
812 * defrag it, given that no write patterns will
813 * fragment it again. */
814 if (b)
815 (void) btrfs_defrag(i->path);
ebd93cb6
LP
816 break;
817 }
818
eb38edce
LP
819 case IMAGE_BLOCK: {
820 _cleanup_close_ int fd = -1;
821 struct stat st;
822 int state = b;
823
824 fd = open(i->path, O_CLOEXEC|O_RDONLY|O_NONBLOCK|O_NOCTTY);
825 if (fd < 0)
826 return -errno;
827
828 if (fstat(fd, &st) < 0)
829 return -errno;
830 if (!S_ISBLK(st.st_mode))
831 return -ENOTTY;
832
833 if (ioctl(fd, BLKROSET, &state) < 0)
834 return -errno;
835
836 break;
837 }
838
ebd93cb6 839 default:
15411c0c 840 return -EOPNOTSUPP;
ebd93cb6
LP
841 }
842
843 return 0;
08682124
LP
844}
845
30535c16
LP
846int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) {
847 _cleanup_free_ char *p = NULL;
848 LockFile t = LOCK_FILE_INIT;
849 struct stat st;
850 int r;
851
852 assert(path);
853 assert(global);
854 assert(local);
855
856 /* Locks an image path. This actually creates two locks: one
857 * "local" one, next to the image path itself, which might be
858 * shared via NFS. And another "global" one, in /run, that
859 * uses the device/inode number. This has the benefit that we
860 * can even lock a tree that is a mount point, correctly. */
861
30535c16
LP
862 if (!path_is_absolute(path))
863 return -EINVAL;
864
b6e953f2
LP
865 if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
866 *local = *global = (LockFile) LOCK_FILE_INIT;
867 return 0;
868 }
869
870 if (path_equal(path, "/"))
871 return -EBUSY;
872
30535c16 873 if (stat(path, &st) >= 0) {
eb38edce
LP
874 if (S_ISBLK(st.st_mode))
875 r = asprintf(&p, "/run/systemd/nspawn/locks/block-%u:%u", major(st.st_rdev), minor(st.st_rdev));
876 else if (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode))
877 r = asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino);
878 else
879 return -ENOTTY;
880
881 if (r < 0)
30535c16
LP
882 return -ENOMEM;
883 }
884
eb38edce
LP
885 /* For block devices we don't need the "local" lock, as the major/minor lock above should be sufficient, since
886 * block devices are device local anyway. */
887 if (!path_startswith(path, "/dev")) {
888 r = make_lock_file_for(path, operation, &t);
889 if (r < 0)
890 return r;
891 }
30535c16
LP
892
893 if (p) {
7e7cddb2 894 mkdir_p("/run/systemd/nspawn/locks", 0700);
30535c16
LP
895
896 r = make_lock_file(p, operation, global);
897 if (r < 0) {
898 release_lock_file(&t);
899 return r;
900 }
546dbec5
LP
901 } else
902 *global = (LockFile) LOCK_FILE_INIT;
30535c16
LP
903
904 *local = t;
905 return 0;
906}
907
cb81cd80 908int image_set_limit(Image *i, uint64_t referenced_max) {
d6ce17c7
LP
909 assert(i);
910
d94c2b06 911 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
d6ce17c7
LP
912 return -EROFS;
913
914 if (i->type != IMAGE_SUBVOLUME)
15411c0c 915 return -EOPNOTSUPP;
d6ce17c7 916
5bcd08db
LP
917 /* We set the quota both for the subvolume as well as for the
918 * subtree. The latter is mostly for historical reasons, since
919 * we didn't use to have a concept of subtree quota, and hence
920 * only modified the subvolume quota. */
921
922 (void) btrfs_qgroup_set_limit(i->path, 0, referenced_max);
923 (void) btrfs_subvol_auto_qgroup(i->path, 0, true);
924 return btrfs_subvol_set_subtree_quota_limit(i->path, 0, referenced_max);
d6ce17c7
LP
925}
926
30535c16
LP
927int image_name_lock(const char *name, int operation, LockFile *ret) {
928 const char *p;
929
930 assert(name);
931 assert(ret);
932
933 /* Locks an image name, regardless of the precise path used. */
934
935 if (!image_name_is_valid(name))
936 return -EINVAL;
937
b6e953f2
LP
938 if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
939 *ret = (LockFile) LOCK_FILE_INIT;
940 return 0;
941 }
942
30535c16
LP
943 if (streq(name, ".host"))
944 return -EBUSY;
945
7e7cddb2 946 mkdir_p("/run/systemd/nspawn/locks", 0700);
63c372cb 947 p = strjoina("/run/systemd/nspawn/locks/name-", name);
30535c16
LP
948
949 return make_lock_file(p, operation, ret);
950}
951
952bool image_name_is_valid(const char *s) {
953 if (!filename_is_valid(s))
954 return false;
955
956 if (string_has_cc(s, NULL))
957 return false;
958
959 if (!utf8_is_valid(s))
960 return false;
961
962 /* Temporary files for atomically creating new files */
963 if (startswith(s, ".#"))
964 return false;
965
966 return true;
967}
968
cd61c3bf
LP
969static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
970 [IMAGE_DIRECTORY] = "directory",
971 [IMAGE_SUBVOLUME] = "subvolume",
aceac2f0 972 [IMAGE_RAW] = "raw",
eb38edce 973 [IMAGE_BLOCK] = "block",
cd61c3bf
LP
974};
975
976DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);