]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/machine-image.c
machined: use the FS_IMMUTABLE_FL file flag, if available, to implement a "read-only...
[thirdparty/systemd.git] / src / shared / machine-image.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2013 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/statfs.h>
23 #include <linux/fs.h>
24 #include <fcntl.h>
25
26 #include "strv.h"
27 #include "utf8.h"
28 #include "btrfs-util.h"
29 #include "path-util.h"
30 #include "copy.h"
31 #include "machine-image.h"
32
33 static const char image_search_path[] =
34 "/var/lib/machines\0"
35 "/var/lib/container\0"
36 "/usr/local/lib/machines\0"
37 "/usr/lib/machines\0";
38
39 Image *image_unref(Image *i) {
40 if (!i)
41 return NULL;
42
43 free(i->name);
44 free(i->path);
45 free(i);
46 return NULL;
47 }
48
49 static int image_new(
50 ImageType t,
51 const char *pretty,
52 const char *path,
53 const char *filename,
54 bool read_only,
55 usec_t crtime,
56 usec_t mtime,
57 Image **ret) {
58
59 _cleanup_(image_unrefp) Image *i = NULL;
60
61 assert(t >= 0);
62 assert(t < _IMAGE_TYPE_MAX);
63 assert(pretty);
64 assert(filename);
65 assert(ret);
66
67 i = new0(Image, 1);
68 if (!i)
69 return -ENOMEM;
70
71 i->type = t;
72 i->read_only = read_only;
73 i->crtime = crtime;
74 i->mtime = mtime;
75 i->size = i->size_exclusive = (uint64_t) -1;
76 i->limit = i->limit_exclusive = (uint64_t) -1;
77
78 i->name = strdup(pretty);
79 if (!i->name)
80 return -ENOMEM;
81
82 if (path)
83 i->path = strjoin(path, "/", filename, NULL);
84 else
85 i->path = strdup(filename);
86
87 if (!i->path)
88 return -ENOMEM;
89
90 path_kill_slashes(i->path);
91
92 *ret = i;
93 i = NULL;
94
95 return 0;
96 }
97
98 static int image_make(
99 const char *pretty,
100 int dfd,
101 const char *path,
102 const char *filename,
103 Image **ret) {
104
105 struct stat st;
106 bool read_only;
107 int r;
108
109 assert(filename);
110
111 /* We explicitly *do* follow symlinks here, since we want to
112 * allow symlinking trees into /var/lib/container/, and treat
113 * them normally. */
114
115 if (fstatat(dfd, filename, &st, 0) < 0)
116 return -errno;
117
118 read_only =
119 (path && path_startswith(path, "/usr")) ||
120 (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
121
122 if (S_ISDIR(st.st_mode)) {
123 _cleanup_close_ int fd = -1;
124 unsigned file_attr = 0;
125
126 if (!ret)
127 return 1;
128
129 if (!pretty)
130 pretty = filename;
131
132 fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
133 if (fd < 0)
134 return -errno;
135
136 /* btrfs subvolumes have inode 256 */
137 if (st.st_ino == 256) {
138 struct statfs sfs;
139
140 if (fstatfs(fd, &sfs) < 0)
141 return -errno;
142
143 if (F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC)) {
144 BtrfsSubvolInfo info;
145 BtrfsQuotaInfo quota;
146
147 /* It's a btrfs subvolume */
148
149 r = btrfs_subvol_get_info_fd(fd, &info);
150 if (r < 0)
151 return r;
152
153 r = image_new(IMAGE_SUBVOLUME,
154 pretty,
155 path,
156 filename,
157 info.read_only || read_only,
158 info.otime,
159 0,
160 ret);
161 if (r < 0)
162 return r;
163
164 r = btrfs_subvol_get_quota_fd(fd, &quota);
165 if (r >= 0) {
166 (*ret)->size = quota.referred;
167 (*ret)->size_exclusive = quota.exclusive;
168
169 (*ret)->limit = quota.referred_max;
170 (*ret)->limit_exclusive = quota.exclusive_max;
171 }
172
173 return 1;
174 }
175 }
176
177 /* If the IMMUTABLE bit is set, we consider the
178 * directory read-only. Since the ioctl is not
179 * supported everywhere we ignore failures. */
180 (void) read_attr_fd(fd, &file_attr);
181
182 /* It's just a normal directory. */
183 r = image_new(IMAGE_DIRECTORY,
184 pretty,
185 path,
186 filename,
187 read_only || (file_attr & FS_IMMUTABLE_FL),
188 0,
189 0,
190 ret);
191 if (r < 0)
192 return r;
193
194 return 1;
195
196 } else if (S_ISREG(st.st_mode) && endswith(filename, ".gpt")) {
197 usec_t crtime = 0;
198
199 /* It's a GPT block device */
200
201 if (!ret)
202 return 1;
203
204 fd_getcrtime_at(dfd, filename, &crtime, 0);
205
206 if (!pretty)
207 pretty = strndupa(filename, strlen(filename) - 4);
208
209 r = image_new(IMAGE_GPT,
210 pretty,
211 path,
212 filename,
213 !(st.st_mode & 0222) || read_only,
214 crtime,
215 timespec_load(&st.st_mtim),
216 ret);
217 if (r < 0)
218 return r;
219
220 (*ret)->size = (*ret)->size_exclusive = st.st_blocks * 512;
221 (*ret)->limit = (*ret)->limit_exclusive = st.st_size;
222
223 return 1;
224 }
225
226 return 0;
227 }
228
229 int image_find(const char *name, Image **ret) {
230 const char *path;
231 int r;
232
233 assert(name);
234
235 /* There are no images with invalid names */
236 if (!image_name_is_valid(name))
237 return 0;
238
239 NULSTR_FOREACH(path, image_search_path) {
240 _cleanup_closedir_ DIR *d = NULL;
241
242 d = opendir(path);
243 if (!d) {
244 if (errno == ENOENT)
245 continue;
246
247 return -errno;
248 }
249
250 r = image_make(NULL, dirfd(d), path, name, ret);
251 if (r == 0 || r == -ENOENT) {
252 _cleanup_free_ char *gpt = NULL;
253
254 gpt = strappend(name, ".gpt");
255 if (!gpt)
256 return -ENOMEM;
257
258 r = image_make(NULL, dirfd(d), path, gpt, ret);
259 if (r == 0 || r == -ENOENT)
260 continue;
261 }
262 if (r < 0)
263 return r;
264
265 return 1;
266 }
267
268 if (streq(name, ".host"))
269 return image_make(".host", AT_FDCWD, NULL, "/", ret);
270
271 return 0;
272 };
273
274 int image_discover(Hashmap *h) {
275 const char *path;
276 int r;
277
278 assert(h);
279
280 NULSTR_FOREACH(path, image_search_path) {
281 _cleanup_closedir_ DIR *d = NULL;
282 struct dirent *de;
283
284 d = opendir(path);
285 if (!d) {
286 if (errno == ENOENT)
287 continue;
288
289 return -errno;
290 }
291
292 FOREACH_DIRENT_ALL(de, d, return -errno) {
293 _cleanup_(image_unrefp) Image *image = NULL;
294
295 if (!image_name_is_valid(de->d_name))
296 continue;
297
298 if (hashmap_contains(h, de->d_name))
299 continue;
300
301 r = image_make(NULL, dirfd(d), path, de->d_name, &image);
302 if (r == 0 || r == -ENOENT)
303 continue;
304 if (r < 0)
305 return r;
306
307 r = hashmap_put(h, image->name, image);
308 if (r < 0)
309 return r;
310
311 image = NULL;
312 }
313 }
314
315 if (!hashmap_contains(h, ".host")) {
316 _cleanup_(image_unrefp) Image *image = NULL;
317
318 r = image_make(".host", AT_FDCWD, NULL, "/", &image);
319 if (r < 0)
320 return r;
321
322 r = hashmap_put(h, image->name, image);
323 if (r < 0)
324 return r;
325
326 image = NULL;
327
328 }
329
330 return 0;
331 }
332
333 void image_hashmap_free(Hashmap *map) {
334 Image *i;
335
336 while ((i = hashmap_steal_first(map)))
337 image_unref(i);
338
339 hashmap_free(map);
340 }
341
342 int image_remove(Image *i) {
343 assert(i);
344
345 if (path_equal(i->path, "/") ||
346 path_startswith(i->path, "/usr"))
347 return -EROFS;
348
349 switch (i->type) {
350
351 case IMAGE_SUBVOLUME:
352 return btrfs_subvol_remove(i->path);
353
354 case IMAGE_DIRECTORY:
355 /* Allow deletion of read-only directories */
356 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
357
358 /* fall through */
359
360 case IMAGE_GPT:
361 return rm_rf_dangerous(i->path, false, true, false);
362
363 default:
364 return -ENOTSUP;
365 }
366 }
367
368 int image_rename(Image *i, const char *new_name) {
369 _cleanup_free_ char *new_path = NULL, *nn = NULL;
370 unsigned file_attr = 0;
371 int r;
372
373 assert(i);
374
375 if (!image_name_is_valid(new_name))
376 return -EINVAL;
377
378 if (path_equal(i->path, "/") ||
379 path_startswith(i->path, "/usr"))
380 return -EROFS;
381
382 r = image_find(new_name, NULL);
383 if (r < 0)
384 return r;
385 if (r > 0)
386 return -EEXIST;
387
388 switch (i->type) {
389
390 case IMAGE_DIRECTORY:
391 /* Turn of the immutable bit while we rename the image, so that we can rename it */
392 (void) read_attr_path(i->path, &file_attr);
393
394 if (file_attr & FS_IMMUTABLE_FL)
395 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
396
397 /* fall through */
398
399 case IMAGE_SUBVOLUME:
400 new_path = file_in_same_dir(i->path, new_name);
401 break;
402
403 case IMAGE_GPT: {
404 const char *fn;
405
406 fn = strappenda(new_name, ".gpt");
407 new_path = file_in_same_dir(i->path, fn);
408 break;
409 }
410
411 default:
412 return -ENOTSUP;
413 }
414
415 if (!new_path)
416 return -ENOMEM;
417
418 nn = strdup(new_name);
419 if (!nn)
420 return -ENOMEM;
421
422 if (renameat2(AT_FDCWD, i->path, AT_FDCWD, new_path, RENAME_NOREPLACE) < 0)
423 return -errno;
424
425 /* Restore the immutable bit, if it was set before */
426 if (file_attr & FS_IMMUTABLE_FL)
427 (void) chattr_path(new_path, true, FS_IMMUTABLE_FL);
428
429 free(i->path);
430 i->path = new_path;
431 new_path = NULL;
432
433 free(i->name);
434 i->name = nn;
435 nn = NULL;
436
437 return 0;
438 }
439
440 int image_clone(Image *i, const char *new_name, bool read_only) {
441 const char *new_path;
442 int r;
443
444 assert(i);
445
446 if (!image_name_is_valid(new_name))
447 return -EINVAL;
448
449 r = image_find(new_name, NULL);
450 if (r < 0)
451 return r;
452 if (r > 0)
453 return -EEXIST;
454
455 switch (i->type) {
456
457 case IMAGE_SUBVOLUME:
458 case IMAGE_DIRECTORY:
459 new_path = strappenda("/var/lib/container/", new_name);
460
461 r = btrfs_subvol_snapshot(i->path, new_path, read_only, true);
462 break;
463
464 case IMAGE_GPT:
465 new_path = strappenda("/var/lib/container/", new_name, ".gpt");
466
467 r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, false, FS_NOCOW_FL);
468 break;
469
470 default:
471 return -ENOTSUP;
472 }
473
474 if (r < 0)
475 return r;
476
477 return 0;
478 }
479
480 int image_read_only(Image *i, bool b) {
481 int r;
482 assert(i);
483
484 if (path_equal(i->path, "/") ||
485 path_startswith(i->path, "/usr"))
486 return -EROFS;
487
488 switch (i->type) {
489
490 case IMAGE_SUBVOLUME:
491 r = btrfs_subvol_set_read_only(i->path, b);
492 if (r < 0)
493 return r;
494
495 break;
496
497 case IMAGE_DIRECTORY:
498 /* For simple directory trees we cannot use the access
499 mode of the top-level directory, since it has an
500 effect on the container itself. However, we can
501 use the "immutable" flag, to at least make the
502 top-level directory read-only. It's not as good as
503 a read-only subvolume, but at least something, and
504 we can read the value back.*/
505
506 r = chattr_path(i->path, b, FS_IMMUTABLE_FL);
507 if (r < 0)
508 return r;
509
510 break;
511
512 case IMAGE_GPT: {
513 struct stat st;
514
515 if (stat(i->path, &st) < 0)
516 return -errno;
517
518 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
519 return -errno;
520
521 /* If the images is now read-only, it's a good time to
522 * defrag it, given that no write patterns will
523 * fragment it again. */
524 if (b)
525 (void) btrfs_defrag(i->path);
526 break;
527 }
528
529 default:
530 return -ENOTSUP;
531 }
532
533 return 0;
534 }
535
536 static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
537 [IMAGE_DIRECTORY] = "directory",
538 [IMAGE_SUBVOLUME] = "subvolume",
539 [IMAGE_GPT] = "gpt",
540 };
541
542 DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);