]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/machine-image.c
shared: the btrfs quota field is called "referenced" not "referred"
[thirdparty/systemd.git] / src / shared / machine-image.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2013 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/statfs.h>
23 #include <linux/fs.h>
24 #include <fcntl.h>
25
26 #include "utf8.h"
27 #include "btrfs-util.h"
28 #include "path-util.h"
29 #include "copy.h"
30 #include "mkdir.h"
31 #include "machine-image.h"
32
33 static const char image_search_path[] =
34 "/var/lib/machines\0"
35 "/var/lib/container\0"
36 "/usr/local/lib/machines\0"
37 "/usr/lib/machines\0";
38
39 Image *image_unref(Image *i) {
40 if (!i)
41 return NULL;
42
43 free(i->name);
44 free(i->path);
45 free(i);
46 return NULL;
47 }
48
49 static int image_new(
50 ImageType t,
51 const char *pretty,
52 const char *path,
53 const char *filename,
54 bool read_only,
55 usec_t crtime,
56 usec_t mtime,
57 Image **ret) {
58
59 _cleanup_(image_unrefp) Image *i = NULL;
60
61 assert(t >= 0);
62 assert(t < _IMAGE_TYPE_MAX);
63 assert(pretty);
64 assert(filename);
65 assert(ret);
66
67 i = new0(Image, 1);
68 if (!i)
69 return -ENOMEM;
70
71 i->type = t;
72 i->read_only = read_only;
73 i->crtime = crtime;
74 i->mtime = mtime;
75 i->usage = i->usage_exclusive = (uint64_t) -1;
76 i->limit = i->limit_exclusive = (uint64_t) -1;
77
78 i->name = strdup(pretty);
79 if (!i->name)
80 return -ENOMEM;
81
82 if (path)
83 i->path = strjoin(path, "/", filename, NULL);
84 else
85 i->path = strdup(filename);
86
87 if (!i->path)
88 return -ENOMEM;
89
90 path_kill_slashes(i->path);
91
92 *ret = i;
93 i = NULL;
94
95 return 0;
96 }
97
98 static int image_make(
99 const char *pretty,
100 int dfd,
101 const char *path,
102 const char *filename,
103 Image **ret) {
104
105 struct stat st;
106 bool read_only;
107 int r;
108
109 assert(filename);
110
111 /* We explicitly *do* follow symlinks here, since we want to
112 * allow symlinking trees into /var/lib/machines/, and treat
113 * them normally. */
114
115 if (fstatat(dfd, filename, &st, 0) < 0)
116 return -errno;
117
118 read_only =
119 (path && path_startswith(path, "/usr")) ||
120 (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
121
122 if (S_ISDIR(st.st_mode)) {
123 _cleanup_close_ int fd = -1;
124 unsigned file_attr = 0;
125
126 if (!ret)
127 return 1;
128
129 if (!pretty)
130 pretty = filename;
131
132 fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
133 if (fd < 0)
134 return -errno;
135
136 /* btrfs subvolumes have inode 256 */
137 if (st.st_ino == 256) {
138 struct statfs sfs;
139
140 if (fstatfs(fd, &sfs) < 0)
141 return -errno;
142
143 if (F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC)) {
144 BtrfsSubvolInfo info;
145 BtrfsQuotaInfo quota;
146
147 /* It's a btrfs subvolume */
148
149 r = btrfs_subvol_get_info_fd(fd, &info);
150 if (r < 0)
151 return r;
152
153 r = image_new(IMAGE_SUBVOLUME,
154 pretty,
155 path,
156 filename,
157 info.read_only || read_only,
158 info.otime,
159 0,
160 ret);
161 if (r < 0)
162 return r;
163
164 r = btrfs_subvol_get_quota_fd(fd, &quota);
165 if (r >= 0) {
166 (*ret)->usage = quota.referenced;
167 (*ret)->usage_exclusive = quota.exclusive;
168
169 (*ret)->limit = quota.referenced_max;
170 (*ret)->limit_exclusive = quota.exclusive_max;
171 }
172
173 return 1;
174 }
175 }
176
177 /* If the IMMUTABLE bit is set, we consider the
178 * directory read-only. Since the ioctl is not
179 * supported everywhere we ignore failures. */
180 (void) read_attr_fd(fd, &file_attr);
181
182 /* It's just a normal directory. */
183 r = image_new(IMAGE_DIRECTORY,
184 pretty,
185 path,
186 filename,
187 read_only || (file_attr & FS_IMMUTABLE_FL),
188 0,
189 0,
190 ret);
191 if (r < 0)
192 return r;
193
194 return 1;
195
196 } else if (S_ISREG(st.st_mode) && endswith(filename, ".raw")) {
197 usec_t crtime = 0;
198
199 /* It's a RAW disk image */
200
201 if (!ret)
202 return 1;
203
204 fd_getcrtime_at(dfd, filename, &crtime, 0);
205
206 if (!pretty)
207 pretty = strndupa(filename, strlen(filename) - 4);
208
209 r = image_new(IMAGE_RAW,
210 pretty,
211 path,
212 filename,
213 !(st.st_mode & 0222) || read_only,
214 crtime,
215 timespec_load(&st.st_mtim),
216 ret);
217 if (r < 0)
218 return r;
219
220 (*ret)->usage = (*ret)->usage_exclusive = st.st_blocks * 512;
221 (*ret)->limit = (*ret)->limit_exclusive = st.st_size;
222
223 return 1;
224 }
225
226 return 0;
227 }
228
229 int image_find(const char *name, Image **ret) {
230 const char *path;
231 int r;
232
233 assert(name);
234
235 /* There are no images with invalid names */
236 if (!image_name_is_valid(name))
237 return 0;
238
239 NULSTR_FOREACH(path, image_search_path) {
240 _cleanup_closedir_ DIR *d = NULL;
241
242 d = opendir(path);
243 if (!d) {
244 if (errno == ENOENT)
245 continue;
246
247 return -errno;
248 }
249
250 r = image_make(NULL, dirfd(d), path, name, ret);
251 if (r == 0 || r == -ENOENT) {
252 _cleanup_free_ char *raw = NULL;
253
254 raw = strappend(name, ".raw");
255 if (!raw)
256 return -ENOMEM;
257
258 r = image_make(NULL, dirfd(d), path, raw, ret);
259 if (r == 0 || r == -ENOENT)
260 continue;
261 }
262 if (r < 0)
263 return r;
264
265 return 1;
266 }
267
268 if (streq(name, ".host"))
269 return image_make(".host", AT_FDCWD, NULL, "/", ret);
270
271 return 0;
272 };
273
274 int image_discover(Hashmap *h) {
275 const char *path;
276 int r;
277
278 assert(h);
279
280 NULSTR_FOREACH(path, image_search_path) {
281 _cleanup_closedir_ DIR *d = NULL;
282 struct dirent *de;
283
284 d = opendir(path);
285 if (!d) {
286 if (errno == ENOENT)
287 continue;
288
289 return -errno;
290 }
291
292 FOREACH_DIRENT_ALL(de, d, return -errno) {
293 _cleanup_(image_unrefp) Image *image = NULL;
294
295 if (!image_name_is_valid(de->d_name))
296 continue;
297
298 if (hashmap_contains(h, de->d_name))
299 continue;
300
301 r = image_make(NULL, dirfd(d), path, de->d_name, &image);
302 if (r == 0 || r == -ENOENT)
303 continue;
304 if (r < 0)
305 return r;
306
307 r = hashmap_put(h, image->name, image);
308 if (r < 0)
309 return r;
310
311 image = NULL;
312 }
313 }
314
315 if (!hashmap_contains(h, ".host")) {
316 _cleanup_(image_unrefp) Image *image = NULL;
317
318 r = image_make(".host", AT_FDCWD, NULL, "/", &image);
319 if (r < 0)
320 return r;
321
322 r = hashmap_put(h, image->name, image);
323 if (r < 0)
324 return r;
325
326 image = NULL;
327
328 }
329
330 return 0;
331 }
332
333 void image_hashmap_free(Hashmap *map) {
334 Image *i;
335
336 while ((i = hashmap_steal_first(map)))
337 image_unref(i);
338
339 hashmap_free(map);
340 }
341
342 int image_remove(Image *i) {
343 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
344 int r;
345
346 assert(i);
347
348 if (path_equal(i->path, "/") ||
349 path_startswith(i->path, "/usr"))
350 return -EROFS;
351
352 /* Make sure we don't interfere with a running nspawn */
353 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
354 if (r < 0)
355 return r;
356
357 switch (i->type) {
358
359 case IMAGE_SUBVOLUME:
360 return btrfs_subvol_remove(i->path);
361
362 case IMAGE_DIRECTORY:
363 /* Allow deletion of read-only directories */
364 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
365
366 /* fall through */
367
368 case IMAGE_RAW:
369 return rm_rf_dangerous(i->path, false, true, false);
370
371 default:
372 return -ENOTSUP;
373 }
374 }
375
376 int image_rename(Image *i, const char *new_name) {
377 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
378 _cleanup_free_ char *new_path = NULL, *nn = NULL;
379 unsigned file_attr = 0;
380 int r;
381
382 assert(i);
383
384 if (!image_name_is_valid(new_name))
385 return -EINVAL;
386
387 if (path_equal(i->path, "/") ||
388 path_startswith(i->path, "/usr"))
389 return -EROFS;
390
391 /* Make sure we don't interfere with a running nspawn */
392 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
393 if (r < 0)
394 return r;
395
396 /* Make sure nobody takes the new name, between the time we
397 * checked it is currently unused in all search paths, and the
398 * time we take possesion of it */
399 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
400 if (r < 0)
401 return r;
402
403 r = image_find(new_name, NULL);
404 if (r < 0)
405 return r;
406 if (r > 0)
407 return -EEXIST;
408
409 switch (i->type) {
410
411 case IMAGE_DIRECTORY:
412 /* Turn of the immutable bit while we rename the image, so that we can rename it */
413 (void) read_attr_path(i->path, &file_attr);
414
415 if (file_attr & FS_IMMUTABLE_FL)
416 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
417
418 /* fall through */
419
420 case IMAGE_SUBVOLUME:
421 new_path = file_in_same_dir(i->path, new_name);
422 break;
423
424 case IMAGE_RAW: {
425 const char *fn;
426
427 fn = strjoina(new_name, ".raw");
428 new_path = file_in_same_dir(i->path, fn);
429 break;
430 }
431
432 default:
433 return -ENOTSUP;
434 }
435
436 if (!new_path)
437 return -ENOMEM;
438
439 nn = strdup(new_name);
440 if (!nn)
441 return -ENOMEM;
442
443 if (renameat2(AT_FDCWD, i->path, AT_FDCWD, new_path, RENAME_NOREPLACE) < 0)
444 return -errno;
445
446 /* Restore the immutable bit, if it was set before */
447 if (file_attr & FS_IMMUTABLE_FL)
448 (void) chattr_path(new_path, true, FS_IMMUTABLE_FL);
449
450 free(i->path);
451 i->path = new_path;
452 new_path = NULL;
453
454 free(i->name);
455 i->name = nn;
456 nn = NULL;
457
458 return 0;
459 }
460
461 int image_clone(Image *i, const char *new_name, bool read_only) {
462 _cleanup_release_lock_file_ LockFile name_lock = LOCK_FILE_INIT;
463 const char *new_path;
464 int r;
465
466 assert(i);
467
468 if (!image_name_is_valid(new_name))
469 return -EINVAL;
470
471 /* Make sure nobody takes the new name, between the time we
472 * checked it is currently unused in all search paths, and the
473 * time we take possesion of it */
474 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
475 if (r < 0)
476 return r;
477
478 r = image_find(new_name, NULL);
479 if (r < 0)
480 return r;
481 if (r > 0)
482 return -EEXIST;
483
484 switch (i->type) {
485
486 case IMAGE_SUBVOLUME:
487 case IMAGE_DIRECTORY:
488 new_path = strjoina("/var/lib/machines/", new_name);
489
490 r = btrfs_subvol_snapshot(i->path, new_path, read_only, true);
491 break;
492
493 case IMAGE_RAW:
494 new_path = strjoina("/var/lib/machines/", new_name, ".raw");
495
496 r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, false, FS_NOCOW_FL);
497 break;
498
499 default:
500 return -ENOTSUP;
501 }
502
503 if (r < 0)
504 return r;
505
506 return 0;
507 }
508
509 int image_read_only(Image *i, bool b) {
510 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
511 int r;
512 assert(i);
513
514 if (path_equal(i->path, "/") ||
515 path_startswith(i->path, "/usr"))
516 return -EROFS;
517
518 /* Make sure we don't interfere with a running nspawn */
519 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
520 if (r < 0)
521 return r;
522
523 switch (i->type) {
524
525 case IMAGE_SUBVOLUME:
526 r = btrfs_subvol_set_read_only(i->path, b);
527 if (r < 0)
528 return r;
529
530 break;
531
532 case IMAGE_DIRECTORY:
533 /* For simple directory trees we cannot use the access
534 mode of the top-level directory, since it has an
535 effect on the container itself. However, we can
536 use the "immutable" flag, to at least make the
537 top-level directory read-only. It's not as good as
538 a read-only subvolume, but at least something, and
539 we can read the value back.*/
540
541 r = chattr_path(i->path, b, FS_IMMUTABLE_FL);
542 if (r < 0)
543 return r;
544
545 break;
546
547 case IMAGE_RAW: {
548 struct stat st;
549
550 if (stat(i->path, &st) < 0)
551 return -errno;
552
553 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
554 return -errno;
555
556 /* If the images is now read-only, it's a good time to
557 * defrag it, given that no write patterns will
558 * fragment it again. */
559 if (b)
560 (void) btrfs_defrag(i->path);
561 break;
562 }
563
564 default:
565 return -ENOTSUP;
566 }
567
568 return 0;
569 }
570
571 int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) {
572 _cleanup_free_ char *p = NULL;
573 LockFile t = LOCK_FILE_INIT;
574 struct stat st;
575 int r;
576
577 assert(path);
578 assert(global);
579 assert(local);
580
581 /* Locks an image path. This actually creates two locks: one
582 * "local" one, next to the image path itself, which might be
583 * shared via NFS. And another "global" one, in /run, that
584 * uses the device/inode number. This has the benefit that we
585 * can even lock a tree that is a mount point, correctly. */
586
587 if (path_equal(path, "/"))
588 return -EBUSY;
589
590 if (!path_is_absolute(path))
591 return -EINVAL;
592
593 if (stat(path, &st) >= 0) {
594 if (asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino) < 0)
595 return -ENOMEM;
596 }
597
598 r = make_lock_file_for(path, operation, &t);
599 if (r < 0)
600 return r;
601
602 if (p) {
603 mkdir_p("/run/systemd/nspawn/locks", 0600);
604
605 r = make_lock_file(p, operation, global);
606 if (r < 0) {
607 release_lock_file(&t);
608 return r;
609 }
610 }
611
612 *local = t;
613 return 0;
614 }
615
616 int image_set_limit(Image *i, uint64_t referenced_max) {
617 assert(i);
618
619 if (path_equal(i->path, "/") ||
620 path_startswith(i->path, "/usr"))
621 return -EROFS;
622
623 if (i->type != IMAGE_SUBVOLUME)
624 return -ENOTSUP;
625
626 return btrfs_quota_limit(i->path, referenced_max);
627 }
628
629 int image_name_lock(const char *name, int operation, LockFile *ret) {
630 const char *p;
631
632 assert(name);
633 assert(ret);
634
635 /* Locks an image name, regardless of the precise path used. */
636
637 if (!image_name_is_valid(name))
638 return -EINVAL;
639
640 if (streq(name, ".host"))
641 return -EBUSY;
642
643 mkdir_p("/run/systemd/nspawn/locks", 0600);
644 p = strjoina("/run/systemd/nspawn/locks/name-", name);
645
646 return make_lock_file(p, operation, ret);
647 }
648
649 bool image_name_is_valid(const char *s) {
650 if (!filename_is_valid(s))
651 return false;
652
653 if (string_has_cc(s, NULL))
654 return false;
655
656 if (!utf8_is_valid(s))
657 return false;
658
659 /* Temporary files for atomically creating new files */
660 if (startswith(s, ".#"))
661 return false;
662
663 return true;
664 }
665
666 static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
667 [IMAGE_DIRECTORY] = "directory",
668 [IMAGE_SUBVOLUME] = "subvolume",
669 [IMAGE_RAW] = "raw",
670 };
671
672 DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);