]>
Commit | Line | Data |
---|---|---|
53e1b683 | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
cd61c3bf LP |
2 | /*** |
3 | This file is part of systemd. | |
4 | ||
5 | Copyright 2013 Lennart Poettering | |
6 | ||
7 | systemd is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU Lesser General Public License as published by | |
9 | the Free Software Foundation; either version 2.1 of the License, or | |
10 | (at your option) any later version. | |
11 | ||
12 | systemd is distributed in the hope that it will be useful, but | |
13 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | Lesser General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU Lesser General Public License | |
18 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
19 | ***/ | |
20 | ||
a8fbdf54 TA |
21 | #include <dirent.h> |
22 | #include <errno.h> | |
ebd93cb6 | 23 | #include <fcntl.h> |
a8fbdf54 TA |
24 | #include <stdio.h> |
25 | #include <stdlib.h> | |
26 | #include <string.h> | |
e306723e | 27 | #include <sys/file.h> |
a8fbdf54 TA |
28 | #include <sys/stat.h> |
29 | #include <unistd.h> | |
8e0b6570 | 30 | #include <linux/fs.h> |
546dbec5 | 31 | |
b5efdb8a | 32 | #include "alloc-util.h" |
cd61c3bf | 33 | #include "btrfs-util.h" |
c8b3094d | 34 | #include "chattr-util.h" |
ebd93cb6 | 35 | #include "copy.h" |
a0956174 | 36 | #include "dirent-util.h" |
b6e953f2 | 37 | #include "env-util.h" |
3ffd4af2 | 38 | #include "fd-util.h" |
f4f15635 | 39 | #include "fs-util.h" |
a8fbdf54 TA |
40 | #include "hashmap.h" |
41 | #include "lockfile-util.h" | |
42 | #include "log.h" | |
3ffd4af2 | 43 | #include "machine-image.h" |
546dbec5 | 44 | #include "macro.h" |
30535c16 | 45 | #include "mkdir.h" |
8e0b6570 | 46 | #include "path-util.h" |
c6878637 | 47 | #include "rm-rf.h" |
8b43440b | 48 | #include "string-table.h" |
07630cea | 49 | #include "string-util.h" |
8e0b6570 | 50 | #include "strv.h" |
a8fbdf54 | 51 | #include "time-util.h" |
8e0b6570 | 52 | #include "utf8.h" |
a8fbdf54 | 53 | #include "util.h" |
89a5a90c | 54 | #include "xattr-util.h" |
cd61c3bf | 55 | |
c2ce6a3d | 56 | static const char image_search_path[] = |
42c6f2c9 | 57 | "/var/lib/machines\0" |
7d105503 | 58 | "/var/lib/container\0" /* legacy */ |
42c6f2c9 LP |
59 | "/usr/local/lib/machines\0" |
60 | "/usr/lib/machines\0"; | |
c2ce6a3d | 61 | |
cd61c3bf LP |
62 | Image *image_unref(Image *i) { |
63 | if (!i) | |
64 | return NULL; | |
65 | ||
66 | free(i->name); | |
67 | free(i->path); | |
6b430fdb | 68 | return mfree(i); |
cd61c3bf LP |
69 | } |
70 | ||
8e0b6570 LP |
71 | static char **image_settings_path(Image *image) { |
72 | _cleanup_strv_free_ char **l = NULL; | |
73 | char **ret; | |
74 | const char *fn, *s; | |
75 | unsigned i = 0; | |
76 | ||
77 | assert(image); | |
78 | ||
79 | l = new0(char*, 4); | |
80 | if (!l) | |
81 | return NULL; | |
82 | ||
83 | fn = strjoina(image->name, ".nspawn"); | |
84 | ||
85 | FOREACH_STRING(s, "/etc/systemd/nspawn/", "/run/systemd/nspawn/") { | |
86 | l[i] = strappend(s, fn); | |
87 | if (!l[i]) | |
88 | return NULL; | |
89 | ||
90 | i++; | |
91 | } | |
92 | ||
93 | l[i] = file_in_same_dir(image->path, fn); | |
94 | if (!l[i]) | |
95 | return NULL; | |
96 | ||
97 | ret = l; | |
98 | l = NULL; | |
99 | ||
100 | return ret; | |
101 | } | |
102 | ||
bafbac4e LP |
103 | static char *image_roothash_path(Image *image) { |
104 | const char *fn; | |
105 | ||
106 | assert(image); | |
107 | ||
108 | fn = strjoina(image->name, ".roothash"); | |
109 | ||
110 | return file_in_same_dir(image->path, fn); | |
111 | } | |
112 | ||
c2ce6a3d | 113 | static int image_new( |
cd61c3bf | 114 | ImageType t, |
5fc7f358 | 115 | const char *pretty, |
cd61c3bf | 116 | const char *path, |
5fc7f358 | 117 | const char *filename, |
cd61c3bf | 118 | bool read_only, |
10f9c755 | 119 | usec_t crtime, |
cd61c3bf | 120 | usec_t mtime, |
c2ce6a3d | 121 | Image **ret) { |
cd61c3bf LP |
122 | |
123 | _cleanup_(image_unrefp) Image *i = NULL; | |
cd61c3bf | 124 | |
cd61c3bf LP |
125 | assert(t >= 0); |
126 | assert(t < _IMAGE_TYPE_MAX); | |
5fc7f358 LP |
127 | assert(pretty); |
128 | assert(filename); | |
c2ce6a3d | 129 | assert(ret); |
cd61c3bf | 130 | |
c2ce6a3d | 131 | i = new0(Image, 1); |
cd61c3bf LP |
132 | if (!i) |
133 | return -ENOMEM; | |
134 | ||
135 | i->type = t; | |
136 | i->read_only = read_only; | |
10f9c755 | 137 | i->crtime = crtime; |
cd61c3bf | 138 | i->mtime = mtime; |
c19de711 | 139 | i->usage = i->usage_exclusive = (uint64_t) -1; |
b6b18498 | 140 | i->limit = i->limit_exclusive = (uint64_t) -1; |
cd61c3bf | 141 | |
5fc7f358 | 142 | i->name = strdup(pretty); |
cd61c3bf LP |
143 | if (!i->name) |
144 | return -ENOMEM; | |
145 | ||
5fc7f358 | 146 | if (path) |
605405c6 | 147 | i->path = strjoin(path, "/", filename); |
5fc7f358 LP |
148 | else |
149 | i->path = strdup(filename); | |
ebeccf9e | 150 | |
5fc7f358 LP |
151 | if (!i->path) |
152 | return -ENOMEM; | |
153 | ||
154 | path_kill_slashes(i->path); | |
cd61c3bf | 155 | |
c2ce6a3d | 156 | *ret = i; |
cd61c3bf | 157 | i = NULL; |
c2ce6a3d | 158 | |
cd61c3bf LP |
159 | return 0; |
160 | } | |
161 | ||
5fc7f358 LP |
162 | static int image_make( |
163 | const char *pretty, | |
164 | int dfd, | |
165 | const char *path, | |
166 | const char *filename, | |
167 | Image **ret) { | |
168 | ||
c2ce6a3d | 169 | struct stat st; |
5fc7f358 | 170 | bool read_only; |
cd61c3bf LP |
171 | int r; |
172 | ||
5fc7f358 | 173 | assert(filename); |
cd61c3bf | 174 | |
eb38edce LP |
175 | /* We explicitly *do* follow symlinks here, since we want to allow symlinking trees, raw files and block |
176 | * devices into /var/lib/machines/, and treat them normally. */ | |
cd61c3bf | 177 | |
5fc7f358 | 178 | if (fstatat(dfd, filename, &st, 0) < 0) |
c2ce6a3d | 179 | return -errno; |
cd61c3bf | 180 | |
5fc7f358 LP |
181 | read_only = |
182 | (path && path_startswith(path, "/usr")) || | |
08ff5529 | 183 | (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS); |
86e339c8 | 184 | |
c2ce6a3d | 185 | if (S_ISDIR(st.st_mode)) { |
01b72568 LP |
186 | _cleanup_close_ int fd = -1; |
187 | unsigned file_attr = 0; | |
cd61c3bf | 188 | |
c2ce6a3d LP |
189 | if (!ret) |
190 | return 1; | |
cd61c3bf | 191 | |
5fc7f358 LP |
192 | if (!pretty) |
193 | pretty = filename; | |
194 | ||
01b72568 LP |
195 | fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY); |
196 | if (fd < 0) | |
197 | return -errno; | |
198 | ||
c2ce6a3d LP |
199 | /* btrfs subvolumes have inode 256 */ |
200 | if (st.st_ino == 256) { | |
cd61c3bf | 201 | |
21222ea5 LP |
202 | r = btrfs_is_filesystem(fd); |
203 | if (r < 0) | |
204 | return r; | |
205 | if (r) { | |
10f9c755 | 206 | BtrfsSubvolInfo info; |
cd61c3bf | 207 | |
c2ce6a3d | 208 | /* It's a btrfs subvolume */ |
cd61c3bf | 209 | |
5bcd08db | 210 | r = btrfs_subvol_get_info_fd(fd, 0, &info); |
10f9c755 LP |
211 | if (r < 0) |
212 | return r; | |
c2ce6a3d LP |
213 | |
214 | r = image_new(IMAGE_SUBVOLUME, | |
5fc7f358 | 215 | pretty, |
c2ce6a3d | 216 | path, |
5fc7f358 LP |
217 | filename, |
218 | info.read_only || read_only, | |
10f9c755 | 219 | info.otime, |
c2ce6a3d | 220 | 0, |
c2ce6a3d LP |
221 | ret); |
222 | if (r < 0) | |
223 | return r; | |
224 | ||
5bcd08db LP |
225 | if (btrfs_quota_scan_ongoing(fd) == 0) { |
226 | BtrfsQuotaInfo quota; | |
b6b18498 | 227 | |
5bcd08db LP |
228 | r = btrfs_subvol_get_subtree_quota_fd(fd, 0, "a); |
229 | if (r >= 0) { | |
230 | (*ret)->usage = quota.referenced; | |
231 | (*ret)->usage_exclusive = quota.exclusive; | |
232 | ||
233 | (*ret)->limit = quota.referenced_max; | |
234 | (*ret)->limit_exclusive = quota.exclusive_max; | |
235 | } | |
b6b18498 LP |
236 | } |
237 | ||
c2ce6a3d | 238 | return 1; |
cd61c3bf | 239 | } |
c2ce6a3d | 240 | } |
cd61c3bf | 241 | |
01b72568 LP |
242 | /* If the IMMUTABLE bit is set, we consider the |
243 | * directory read-only. Since the ioctl is not | |
244 | * supported everywhere we ignore failures. */ | |
245 | (void) read_attr_fd(fd, &file_attr); | |
cd61c3bf | 246 | |
01b72568 | 247 | /* It's just a normal directory. */ |
c2ce6a3d | 248 | r = image_new(IMAGE_DIRECTORY, |
5fc7f358 | 249 | pretty, |
c2ce6a3d | 250 | path, |
5fc7f358 | 251 | filename, |
01b72568 | 252 | read_only || (file_attr & FS_IMMUTABLE_FL), |
c2ce6a3d LP |
253 | 0, |
254 | 0, | |
255 | ret); | |
256 | if (r < 0) | |
257 | return r; | |
cd61c3bf | 258 | |
c2ce6a3d | 259 | return 1; |
cd61c3bf | 260 | |
aceac2f0 | 261 | } else if (S_ISREG(st.st_mode) && endswith(filename, ".raw")) { |
10f9c755 | 262 | usec_t crtime = 0; |
cd61c3bf | 263 | |
aceac2f0 | 264 | /* It's a RAW disk image */ |
cd61c3bf | 265 | |
c2ce6a3d LP |
266 | if (!ret) |
267 | return 1; | |
cd61c3bf | 268 | |
5fc7f358 | 269 | fd_getcrtime_at(dfd, filename, &crtime, 0); |
10f9c755 | 270 | |
5fc7f358 LP |
271 | if (!pretty) |
272 | pretty = strndupa(filename, strlen(filename) - 4); | |
10f9c755 | 273 | |
aceac2f0 | 274 | r = image_new(IMAGE_RAW, |
5fc7f358 | 275 | pretty, |
c2ce6a3d | 276 | path, |
5fc7f358 LP |
277 | filename, |
278 | !(st.st_mode & 0222) || read_only, | |
10f9c755 | 279 | crtime, |
c2ce6a3d | 280 | timespec_load(&st.st_mtim), |
c2ce6a3d LP |
281 | ret); |
282 | if (r < 0) | |
283 | return r; | |
cd61c3bf | 284 | |
c19de711 | 285 | (*ret)->usage = (*ret)->usage_exclusive = st.st_blocks * 512; |
b6b18498 LP |
286 | (*ret)->limit = (*ret)->limit_exclusive = st.st_size; |
287 | ||
c2ce6a3d | 288 | return 1; |
eb38edce LP |
289 | |
290 | } else if (S_ISBLK(st.st_mode)) { | |
291 | _cleanup_close_ int block_fd = -1; | |
292 | uint64_t size = UINT64_MAX; | |
293 | ||
294 | /* A block device */ | |
295 | ||
296 | if (!ret) | |
297 | return 1; | |
298 | ||
299 | if (!pretty) | |
300 | pretty = filename; | |
301 | ||
302 | block_fd = openat(dfd, filename, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY); | |
303 | if (block_fd < 0) | |
304 | log_debug_errno(errno, "Failed to open block device %s/%s, ignoring: %m", path, filename); | |
305 | else { | |
306 | if (fstat(block_fd, &st) < 0) | |
307 | return -errno; | |
308 | if (!S_ISBLK(st.st_mode)) /* Verify that what we opened is actually what we think it is */ | |
309 | return -ENOTTY; | |
310 | ||
311 | if (!read_only) { | |
312 | int state = 0; | |
313 | ||
314 | if (ioctl(block_fd, BLKROGET, &state) < 0) | |
315 | log_debug_errno(errno, "Failed to issue BLKROGET on device %s/%s, ignoring: %m", path, filename); | |
316 | else if (state) | |
317 | read_only = true; | |
318 | } | |
319 | ||
320 | if (ioctl(block_fd, BLKGETSIZE64, &size) < 0) | |
321 | log_debug_errno(errno, "Failed to issue BLKFLSBUF on device %s/%s, ignoring: %m", path, filename); | |
322 | ||
323 | block_fd = safe_close(block_fd); | |
324 | } | |
325 | ||
326 | r = image_new(IMAGE_BLOCK, | |
327 | pretty, | |
328 | path, | |
329 | filename, | |
330 | !(st.st_mode & 0222) || read_only, | |
331 | 0, | |
332 | 0, | |
333 | ret); | |
334 | if (r < 0) | |
335 | return r; | |
336 | ||
337 | if (size != 0 && size != UINT64_MAX) | |
338 | (*ret)->usage = (*ret)->usage_exclusive = (*ret)->limit = (*ret)->limit_exclusive = size; | |
339 | ||
340 | return 1; | |
c2ce6a3d | 341 | } |
cd61c3bf | 342 | |
c2ce6a3d LP |
343 | return 0; |
344 | } | |
cd61c3bf | 345 | |
c2ce6a3d LP |
346 | int image_find(const char *name, Image **ret) { |
347 | const char *path; | |
348 | int r; | |
cd61c3bf | 349 | |
c2ce6a3d | 350 | assert(name); |
cd61c3bf | 351 | |
c2ce6a3d LP |
352 | /* There are no images with invalid names */ |
353 | if (!image_name_is_valid(name)) | |
354 | return 0; | |
cd61c3bf | 355 | |
c2ce6a3d LP |
356 | NULSTR_FOREACH(path, image_search_path) { |
357 | _cleanup_closedir_ DIR *d = NULL; | |
cd61c3bf | 358 | |
c2ce6a3d LP |
359 | d = opendir(path); |
360 | if (!d) { | |
361 | if (errno == ENOENT) | |
362 | continue; | |
cd61c3bf | 363 | |
c2ce6a3d LP |
364 | return -errno; |
365 | } | |
cd61c3bf | 366 | |
5fc7f358 | 367 | r = image_make(NULL, dirfd(d), path, name, ret); |
4c701096 | 368 | if (IN_SET(r, 0, -ENOENT)) { |
aceac2f0 | 369 | _cleanup_free_ char *raw = NULL; |
5fc7f358 | 370 | |
aceac2f0 LP |
371 | raw = strappend(name, ".raw"); |
372 | if (!raw) | |
5fc7f358 LP |
373 | return -ENOMEM; |
374 | ||
aceac2f0 | 375 | r = image_make(NULL, dirfd(d), path, raw, ret); |
4c701096 | 376 | if (IN_SET(r, 0, -ENOENT)) |
5fc7f358 LP |
377 | continue; |
378 | } | |
c2ce6a3d LP |
379 | if (r < 0) |
380 | return r; | |
cd61c3bf | 381 | |
c2ce6a3d LP |
382 | return 1; |
383 | } | |
384 | ||
5fc7f358 | 385 | if (streq(name, ".host")) |
27c88c4e | 386 | return image_make(".host", AT_FDCWD, NULL, "/", ret); |
5fc7f358 | 387 | |
c2ce6a3d LP |
388 | return 0; |
389 | }; | |
390 | ||
391 | int image_discover(Hashmap *h) { | |
392 | const char *path; | |
393 | int r; | |
394 | ||
395 | assert(h); | |
396 | ||
397 | NULSTR_FOREACH(path, image_search_path) { | |
398 | _cleanup_closedir_ DIR *d = NULL; | |
399 | struct dirent *de; | |
400 | ||
401 | d = opendir(path); | |
402 | if (!d) { | |
403 | if (errno == ENOENT) | |
a67a4c8c | 404 | continue; |
c2ce6a3d LP |
405 | |
406 | return -errno; | |
407 | } | |
408 | ||
409 | FOREACH_DIRENT_ALL(de, d, return -errno) { | |
410 | _cleanup_(image_unrefp) Image *image = NULL; | |
411 | ||
412 | if (!image_name_is_valid(de->d_name)) | |
413 | continue; | |
414 | ||
415 | if (hashmap_contains(h, de->d_name)) | |
416 | continue; | |
417 | ||
5fc7f358 | 418 | r = image_make(NULL, dirfd(d), path, de->d_name, &image); |
4c701096 | 419 | if (IN_SET(r, 0, -ENOENT)) |
c2ce6a3d LP |
420 | continue; |
421 | if (r < 0) | |
422 | return r; | |
423 | ||
424 | r = hashmap_put(h, image->name, image); | |
425 | if (r < 0) | |
426 | return r; | |
427 | ||
428 | image = NULL; | |
cd61c3bf LP |
429 | } |
430 | } | |
431 | ||
5fc7f358 LP |
432 | if (!hashmap_contains(h, ".host")) { |
433 | _cleanup_(image_unrefp) Image *image = NULL; | |
434 | ||
435 | r = image_make(".host", AT_FDCWD, NULL, "/", &image); | |
436 | if (r < 0) | |
437 | return r; | |
438 | ||
439 | r = hashmap_put(h, image->name, image); | |
440 | if (r < 0) | |
441 | return r; | |
442 | ||
443 | image = NULL; | |
444 | ||
445 | } | |
446 | ||
cd61c3bf LP |
447 | return 0; |
448 | } | |
449 | ||
450 | void image_hashmap_free(Hashmap *map) { | |
451 | Image *i; | |
452 | ||
453 | while ((i = hashmap_steal_first(map))) | |
454 | image_unref(i); | |
455 | ||
456 | hashmap_free(map); | |
457 | } | |
458 | ||
08682124 | 459 | int image_remove(Image *i) { |
30535c16 | 460 | _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT; |
8e0b6570 | 461 | _cleanup_strv_free_ char **settings = NULL; |
bafbac4e | 462 | _cleanup_free_ char *roothash = NULL; |
8e0b6570 | 463 | char **j; |
30535c16 LP |
464 | int r; |
465 | ||
08682124 LP |
466 | assert(i); |
467 | ||
d94c2b06 | 468 | if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i)) |
08682124 LP |
469 | return -EROFS; |
470 | ||
8e0b6570 LP |
471 | settings = image_settings_path(i); |
472 | if (!settings) | |
473 | return -ENOMEM; | |
474 | ||
bafbac4e LP |
475 | roothash = image_roothash_path(i); |
476 | if (!roothash) | |
477 | return -ENOMEM; | |
478 | ||
30535c16 LP |
479 | /* Make sure we don't interfere with a running nspawn */ |
480 | r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock); | |
481 | if (r < 0) | |
482 | return r; | |
483 | ||
ebd93cb6 LP |
484 | switch (i->type) { |
485 | ||
486 | case IMAGE_SUBVOLUME: | |
9fb0b9c7 LP |
487 | |
488 | /* Let's unlink first, maybe it is a symlink? If that works we are happy. Otherwise, let's get out the | |
489 | * big guns */ | |
490 | if (unlink(i->path) < 0) { | |
491 | r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA); | |
492 | if (r < 0) | |
493 | return r; | |
494 | } | |
495 | ||
8e0b6570 | 496 | break; |
ebd93cb6 LP |
497 | |
498 | case IMAGE_DIRECTORY: | |
01b72568 | 499 | /* Allow deletion of read-only directories */ |
a67d68b8 | 500 | (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL); |
8e0b6570 LP |
501 | r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME); |
502 | if (r < 0) | |
503 | return r; | |
504 | ||
505 | break; | |
01b72568 | 506 | |
eb38edce LP |
507 | case IMAGE_BLOCK: |
508 | ||
509 | /* If this is inside of /dev, then it's a real block device, hence let's not touch the device node | |
510 | * itself (but let's remove the stuff stored alongside it). If it's anywhere else, let's try to unlink | |
511 | * the thing (it's most likely a symlink after all). */ | |
512 | ||
513 | if (path_startswith(i->path, "/dev")) | |
514 | break; | |
515 | ||
516 | /* fallthrough */ | |
517 | ||
aceac2f0 | 518 | case IMAGE_RAW: |
41d1ed05 LP |
519 | if (unlink(i->path) < 0) |
520 | return -errno; | |
8e0b6570 | 521 | break; |
ebd93cb6 LP |
522 | |
523 | default: | |
15411c0c | 524 | return -EOPNOTSUPP; |
ebd93cb6 | 525 | } |
8e0b6570 LP |
526 | |
527 | STRV_FOREACH(j, settings) { | |
528 | if (unlink(*j) < 0 && errno != ENOENT) | |
529 | log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j); | |
530 | } | |
531 | ||
bafbac4e LP |
532 | if (unlink(roothash) < 0 && errno != ENOENT) |
533 | log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", roothash); | |
534 | ||
8e0b6570 LP |
535 | return 0; |
536 | } | |
537 | ||
bafbac4e | 538 | static int rename_auxiliary_file(const char *path, const char *new_name, const char *suffix) { |
8e0b6570 LP |
539 | _cleanup_free_ char *rs = NULL; |
540 | const char *fn; | |
541 | ||
bafbac4e | 542 | fn = strjoina(new_name, suffix); |
8e0b6570 LP |
543 | |
544 | rs = file_in_same_dir(path, fn); | |
545 | if (!rs) | |
546 | return -ENOMEM; | |
547 | ||
548 | return rename_noreplace(AT_FDCWD, path, AT_FDCWD, rs); | |
ebd93cb6 LP |
549 | } |
550 | ||
551 | int image_rename(Image *i, const char *new_name) { | |
30535c16 | 552 | _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT; |
bafbac4e | 553 | _cleanup_free_ char *new_path = NULL, *nn = NULL, *roothash = NULL; |
8e0b6570 | 554 | _cleanup_strv_free_ char **settings = NULL; |
01b72568 | 555 | unsigned file_attr = 0; |
8e0b6570 | 556 | char **j; |
ebd93cb6 LP |
557 | int r; |
558 | ||
559 | assert(i); | |
560 | ||
561 | if (!image_name_is_valid(new_name)) | |
562 | return -EINVAL; | |
563 | ||
d94c2b06 | 564 | if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i)) |
ebd93cb6 LP |
565 | return -EROFS; |
566 | ||
8e0b6570 LP |
567 | settings = image_settings_path(i); |
568 | if (!settings) | |
569 | return -ENOMEM; | |
570 | ||
bafbac4e LP |
571 | roothash = image_roothash_path(i); |
572 | if (!roothash) | |
573 | return -ENOMEM; | |
574 | ||
30535c16 LP |
575 | /* Make sure we don't interfere with a running nspawn */ |
576 | r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock); | |
577 | if (r < 0) | |
578 | return r; | |
579 | ||
580 | /* Make sure nobody takes the new name, between the time we | |
581 | * checked it is currently unused in all search paths, and the | |
f8e2f4d6 | 582 | * time we take possession of it */ |
30535c16 LP |
583 | r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock); |
584 | if (r < 0) | |
585 | return r; | |
586 | ||
ebd93cb6 LP |
587 | r = image_find(new_name, NULL); |
588 | if (r < 0) | |
589 | return r; | |
590 | if (r > 0) | |
591 | return -EEXIST; | |
592 | ||
593 | switch (i->type) { | |
594 | ||
ebd93cb6 | 595 | case IMAGE_DIRECTORY: |
01b72568 LP |
596 | /* Turn of the immutable bit while we rename the image, so that we can rename it */ |
597 | (void) read_attr_path(i->path, &file_attr); | |
598 | ||
599 | if (file_attr & FS_IMMUTABLE_FL) | |
a67d68b8 | 600 | (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL); |
01b72568 LP |
601 | |
602 | /* fall through */ | |
603 | ||
604 | case IMAGE_SUBVOLUME: | |
ebd93cb6 LP |
605 | new_path = file_in_same_dir(i->path, new_name); |
606 | break; | |
607 | ||
eb38edce LP |
608 | case IMAGE_BLOCK: |
609 | ||
610 | /* Refuse renaming raw block devices in /dev, the names are picked by udev after all. */ | |
611 | if (path_startswith(i->path, "/dev")) | |
612 | return -EROFS; | |
613 | ||
614 | new_path = file_in_same_dir(i->path, new_name); | |
615 | break; | |
616 | ||
aceac2f0 | 617 | case IMAGE_RAW: { |
ebd93cb6 LP |
618 | const char *fn; |
619 | ||
63c372cb | 620 | fn = strjoina(new_name, ".raw"); |
ebd93cb6 LP |
621 | new_path = file_in_same_dir(i->path, fn); |
622 | break; | |
623 | } | |
624 | ||
625 | default: | |
15411c0c | 626 | return -EOPNOTSUPP; |
ebd93cb6 LP |
627 | } |
628 | ||
629 | if (!new_path) | |
630 | return -ENOMEM; | |
631 | ||
632 | nn = strdup(new_name); | |
633 | if (!nn) | |
634 | return -ENOMEM; | |
635 | ||
f85ef957 AC |
636 | r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path); |
637 | if (r < 0) | |
638 | return r; | |
ebd93cb6 | 639 | |
01b72568 LP |
640 | /* Restore the immutable bit, if it was set before */ |
641 | if (file_attr & FS_IMMUTABLE_FL) | |
a67d68b8 | 642 | (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL); |
01b72568 | 643 | |
ebd93cb6 LP |
644 | free(i->path); |
645 | i->path = new_path; | |
646 | new_path = NULL; | |
647 | ||
648 | free(i->name); | |
649 | i->name = nn; | |
650 | nn = NULL; | |
651 | ||
8e0b6570 | 652 | STRV_FOREACH(j, settings) { |
bafbac4e | 653 | r = rename_auxiliary_file(*j, new_name, ".nspawn"); |
8e0b6570 LP |
654 | if (r < 0 && r != -ENOENT) |
655 | log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j); | |
656 | } | |
657 | ||
bafbac4e LP |
658 | r = rename_auxiliary_file(roothash, new_name, ".roothash"); |
659 | if (r < 0 && r != -ENOENT) | |
660 | log_debug_errno(r, "Failed to rename roothash file %s, ignoring: %m", roothash); | |
661 | ||
ebd93cb6 LP |
662 | return 0; |
663 | } | |
664 | ||
bafbac4e | 665 | static int clone_auxiliary_file(const char *path, const char *new_name, const char *suffix) { |
8e0b6570 LP |
666 | _cleanup_free_ char *rs = NULL; |
667 | const char *fn; | |
668 | ||
bafbac4e | 669 | fn = strjoina(new_name, suffix); |
8e0b6570 LP |
670 | |
671 | rs = file_in_same_dir(path, fn); | |
672 | if (!rs) | |
673 | return -ENOMEM; | |
674 | ||
1c876927 | 675 | return copy_file_atomic(path, rs, 0664, 0, COPY_REFLINK); |
8e0b6570 LP |
676 | } |
677 | ||
ebd93cb6 | 678 | int image_clone(Image *i, const char *new_name, bool read_only) { |
30535c16 | 679 | _cleanup_release_lock_file_ LockFile name_lock = LOCK_FILE_INIT; |
8e0b6570 | 680 | _cleanup_strv_free_ char **settings = NULL; |
bafbac4e | 681 | _cleanup_free_ char *roothash = NULL; |
ebd93cb6 | 682 | const char *new_path; |
8e0b6570 | 683 | char **j; |
ebd93cb6 LP |
684 | int r; |
685 | ||
686 | assert(i); | |
687 | ||
688 | if (!image_name_is_valid(new_name)) | |
689 | return -EINVAL; | |
690 | ||
8e0b6570 LP |
691 | settings = image_settings_path(i); |
692 | if (!settings) | |
693 | return -ENOMEM; | |
694 | ||
bafbac4e LP |
695 | roothash = image_roothash_path(i); |
696 | if (!roothash) | |
697 | return -ENOMEM; | |
698 | ||
30535c16 LP |
699 | /* Make sure nobody takes the new name, between the time we |
700 | * checked it is currently unused in all search paths, and the | |
f8e2f4d6 | 701 | * time we take possession of it */ |
30535c16 LP |
702 | r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock); |
703 | if (r < 0) | |
704 | return r; | |
705 | ||
ebd93cb6 LP |
706 | r = image_find(new_name, NULL); |
707 | if (r < 0) | |
708 | return r; | |
709 | if (r > 0) | |
710 | return -EEXIST; | |
711 | ||
712 | switch (i->type) { | |
713 | ||
714 | case IMAGE_SUBVOLUME: | |
715 | case IMAGE_DIRECTORY: | |
9a50e3ca | 716 | /* If we can we'll always try to create a new btrfs subvolume here, even if the source is a plain |
13e785f7 | 717 | * directory. */ |
9a50e3ca | 718 | |
63c372cb | 719 | new_path = strjoina("/var/lib/machines/", new_name); |
ebd93cb6 | 720 | |
17cbb288 LP |
721 | r = btrfs_subvol_snapshot(i->path, new_path, |
722 | (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | | |
723 | BTRFS_SNAPSHOT_FALLBACK_COPY | | |
724 | BTRFS_SNAPSHOT_FALLBACK_DIRECTORY | | |
725 | BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE | | |
726 | BTRFS_SNAPSHOT_RECURSIVE | | |
727 | BTRFS_SNAPSHOT_QUOTA); | |
728 | if (r >= 0) | |
9a50e3ca | 729 | /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */ |
8120ee28 | 730 | (void) btrfs_subvol_auto_qgroup(new_path, 0, true); |
5bcd08db | 731 | |
ebd93cb6 LP |
732 | break; |
733 | ||
aceac2f0 | 734 | case IMAGE_RAW: |
63c372cb | 735 | new_path = strjoina("/var/lib/machines/", new_name, ".raw"); |
ebd93cb6 | 736 | |
1c876927 | 737 | r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, FS_NOCOW_FL, COPY_REFLINK); |
ebd93cb6 LP |
738 | break; |
739 | ||
eb38edce | 740 | case IMAGE_BLOCK: |
ebd93cb6 | 741 | default: |
15411c0c | 742 | return -EOPNOTSUPP; |
ebd93cb6 LP |
743 | } |
744 | ||
745 | if (r < 0) | |
746 | return r; | |
747 | ||
8e0b6570 | 748 | STRV_FOREACH(j, settings) { |
bafbac4e | 749 | r = clone_auxiliary_file(*j, new_name, ".nspawn"); |
8e0b6570 LP |
750 | if (r < 0 && r != -ENOENT) |
751 | log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j); | |
752 | } | |
753 | ||
bafbac4e LP |
754 | r = clone_auxiliary_file(roothash, new_name, ".roothash"); |
755 | if (r < 0 && r != -ENOENT) | |
756 | log_debug_errno(r, "Failed to clone root hash file %s, ignoring: %m", roothash); | |
757 | ||
ebd93cb6 LP |
758 | return 0; |
759 | } | |
760 | ||
761 | int image_read_only(Image *i, bool b) { | |
30535c16 | 762 | _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT; |
ebd93cb6 LP |
763 | int r; |
764 | assert(i); | |
765 | ||
d94c2b06 | 766 | if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i)) |
ebd93cb6 LP |
767 | return -EROFS; |
768 | ||
30535c16 LP |
769 | /* Make sure we don't interfere with a running nspawn */ |
770 | r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock); | |
771 | if (r < 0) | |
772 | return r; | |
773 | ||
ebd93cb6 LP |
774 | switch (i->type) { |
775 | ||
776 | case IMAGE_SUBVOLUME: | |
5bcd08db LP |
777 | |
778 | /* Note that we set the flag only on the top-level | |
779 | * subvolume of the image. */ | |
780 | ||
ebd93cb6 LP |
781 | r = btrfs_subvol_set_read_only(i->path, b); |
782 | if (r < 0) | |
783 | return r; | |
01b72568 LP |
784 | |
785 | break; | |
786 | ||
787 | case IMAGE_DIRECTORY: | |
788 | /* For simple directory trees we cannot use the access | |
789 | mode of the top-level directory, since it has an | |
790 | effect on the container itself. However, we can | |
791 | use the "immutable" flag, to at least make the | |
792 | top-level directory read-only. It's not as good as | |
793 | a read-only subvolume, but at least something, and | |
13e785f7 | 794 | we can read the value back. */ |
01b72568 | 795 | |
a67d68b8 | 796 | r = chattr_path(i->path, b ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL); |
01b72568 LP |
797 | if (r < 0) |
798 | return r; | |
799 | ||
ebd93cb6 LP |
800 | break; |
801 | ||
aceac2f0 | 802 | case IMAGE_RAW: { |
ebd93cb6 LP |
803 | struct stat st; |
804 | ||
805 | if (stat(i->path, &st) < 0) | |
806 | return -errno; | |
807 | ||
808 | if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0) | |
809 | return -errno; | |
f2068bcc LP |
810 | |
811 | /* If the images is now read-only, it's a good time to | |
812 | * defrag it, given that no write patterns will | |
813 | * fragment it again. */ | |
814 | if (b) | |
815 | (void) btrfs_defrag(i->path); | |
ebd93cb6 LP |
816 | break; |
817 | } | |
818 | ||
eb38edce LP |
819 | case IMAGE_BLOCK: { |
820 | _cleanup_close_ int fd = -1; | |
821 | struct stat st; | |
822 | int state = b; | |
823 | ||
824 | fd = open(i->path, O_CLOEXEC|O_RDONLY|O_NONBLOCK|O_NOCTTY); | |
825 | if (fd < 0) | |
826 | return -errno; | |
827 | ||
828 | if (fstat(fd, &st) < 0) | |
829 | return -errno; | |
830 | if (!S_ISBLK(st.st_mode)) | |
831 | return -ENOTTY; | |
832 | ||
833 | if (ioctl(fd, BLKROSET, &state) < 0) | |
834 | return -errno; | |
835 | ||
836 | break; | |
837 | } | |
838 | ||
ebd93cb6 | 839 | default: |
15411c0c | 840 | return -EOPNOTSUPP; |
ebd93cb6 LP |
841 | } |
842 | ||
843 | return 0; | |
08682124 LP |
844 | } |
845 | ||
30535c16 LP |
846 | int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) { |
847 | _cleanup_free_ char *p = NULL; | |
848 | LockFile t = LOCK_FILE_INIT; | |
849 | struct stat st; | |
850 | int r; | |
851 | ||
852 | assert(path); | |
853 | assert(global); | |
854 | assert(local); | |
855 | ||
856 | /* Locks an image path. This actually creates two locks: one | |
857 | * "local" one, next to the image path itself, which might be | |
858 | * shared via NFS. And another "global" one, in /run, that | |
859 | * uses the device/inode number. This has the benefit that we | |
860 | * can even lock a tree that is a mount point, correctly. */ | |
861 | ||
30535c16 LP |
862 | if (!path_is_absolute(path)) |
863 | return -EINVAL; | |
864 | ||
b6e953f2 LP |
865 | if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) { |
866 | *local = *global = (LockFile) LOCK_FILE_INIT; | |
867 | return 0; | |
868 | } | |
869 | ||
870 | if (path_equal(path, "/")) | |
871 | return -EBUSY; | |
872 | ||
30535c16 | 873 | if (stat(path, &st) >= 0) { |
eb38edce LP |
874 | if (S_ISBLK(st.st_mode)) |
875 | r = asprintf(&p, "/run/systemd/nspawn/locks/block-%u:%u", major(st.st_rdev), minor(st.st_rdev)); | |
876 | else if (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode)) | |
877 | r = asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino); | |
878 | else | |
879 | return -ENOTTY; | |
880 | ||
881 | if (r < 0) | |
30535c16 LP |
882 | return -ENOMEM; |
883 | } | |
884 | ||
eb38edce LP |
885 | /* For block devices we don't need the "local" lock, as the major/minor lock above should be sufficient, since |
886 | * block devices are device local anyway. */ | |
887 | if (!path_startswith(path, "/dev")) { | |
888 | r = make_lock_file_for(path, operation, &t); | |
889 | if (r < 0) | |
890 | return r; | |
891 | } | |
30535c16 LP |
892 | |
893 | if (p) { | |
7e7cddb2 | 894 | mkdir_p("/run/systemd/nspawn/locks", 0700); |
30535c16 LP |
895 | |
896 | r = make_lock_file(p, operation, global); | |
897 | if (r < 0) { | |
898 | release_lock_file(&t); | |
899 | return r; | |
900 | } | |
546dbec5 LP |
901 | } else |
902 | *global = (LockFile) LOCK_FILE_INIT; | |
30535c16 LP |
903 | |
904 | *local = t; | |
905 | return 0; | |
906 | } | |
907 | ||
cb81cd80 | 908 | int image_set_limit(Image *i, uint64_t referenced_max) { |
d6ce17c7 LP |
909 | assert(i); |
910 | ||
d94c2b06 | 911 | if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i)) |
d6ce17c7 LP |
912 | return -EROFS; |
913 | ||
914 | if (i->type != IMAGE_SUBVOLUME) | |
15411c0c | 915 | return -EOPNOTSUPP; |
d6ce17c7 | 916 | |
5bcd08db LP |
917 | /* We set the quota both for the subvolume as well as for the |
918 | * subtree. The latter is mostly for historical reasons, since | |
919 | * we didn't use to have a concept of subtree quota, and hence | |
920 | * only modified the subvolume quota. */ | |
921 | ||
922 | (void) btrfs_qgroup_set_limit(i->path, 0, referenced_max); | |
923 | (void) btrfs_subvol_auto_qgroup(i->path, 0, true); | |
924 | return btrfs_subvol_set_subtree_quota_limit(i->path, 0, referenced_max); | |
d6ce17c7 LP |
925 | } |
926 | ||
30535c16 LP |
927 | int image_name_lock(const char *name, int operation, LockFile *ret) { |
928 | const char *p; | |
929 | ||
930 | assert(name); | |
931 | assert(ret); | |
932 | ||
933 | /* Locks an image name, regardless of the precise path used. */ | |
934 | ||
935 | if (!image_name_is_valid(name)) | |
936 | return -EINVAL; | |
937 | ||
b6e953f2 LP |
938 | if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) { |
939 | *ret = (LockFile) LOCK_FILE_INIT; | |
940 | return 0; | |
941 | } | |
942 | ||
30535c16 LP |
943 | if (streq(name, ".host")) |
944 | return -EBUSY; | |
945 | ||
7e7cddb2 | 946 | mkdir_p("/run/systemd/nspawn/locks", 0700); |
63c372cb | 947 | p = strjoina("/run/systemd/nspawn/locks/name-", name); |
30535c16 LP |
948 | |
949 | return make_lock_file(p, operation, ret); | |
950 | } | |
951 | ||
952 | bool image_name_is_valid(const char *s) { | |
953 | if (!filename_is_valid(s)) | |
954 | return false; | |
955 | ||
956 | if (string_has_cc(s, NULL)) | |
957 | return false; | |
958 | ||
959 | if (!utf8_is_valid(s)) | |
960 | return false; | |
961 | ||
962 | /* Temporary files for atomically creating new files */ | |
963 | if (startswith(s, ".#")) | |
964 | return false; | |
965 | ||
966 | return true; | |
967 | } | |
968 | ||
cd61c3bf LP |
969 | static const char* const image_type_table[_IMAGE_TYPE_MAX] = { |
970 | [IMAGE_DIRECTORY] = "directory", | |
971 | [IMAGE_SUBVOLUME] = "subvolume", | |
aceac2f0 | 972 | [IMAGE_RAW] = "raw", |
eb38edce | 973 | [IMAGE_BLOCK] = "block", |
cd61c3bf LP |
974 | }; |
975 | ||
976 | DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType); |