]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/machine-pool.c
Merge pull request #1654 from poettering/util-lib
[thirdparty/systemd.git] / src / shared / machine-pool.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2015 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mount.h>
23 #include <sys/prctl.h>
24 #include <sys/statvfs.h>
25 #include <sys/vfs.h>
26
27 #include "btrfs-util.h"
28 #include "fd-util.h"
29 #include "lockfile-util.h"
30 #include "machine-pool.h"
31 #include "mkdir.h"
32 #include "path-util.h"
33 #include "process-util.h"
34 #include "signal-util.h"
35 #include "string-util.h"
36 #include "util.h"
37
38 #define VAR_LIB_MACHINES_SIZE_START (1024UL*1024UL*500UL)
39 #define VAR_LIB_MACHINES_FREE_MIN (1024UL*1024UL*750UL)
40
41 static int check_btrfs(void) {
42 struct statfs sfs;
43
44 if (statfs("/var/lib/machines", &sfs) < 0) {
45 if (errno != ENOENT)
46 return -errno;
47
48 if (statfs("/var/lib", &sfs) < 0)
49 return -errno;
50 }
51
52 return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC);
53 }
54
55 static int setup_machine_raw(uint64_t size, sd_bus_error *error) {
56 _cleanup_free_ char *tmp = NULL;
57 _cleanup_close_ int fd = -1;
58 struct statvfs ss;
59 pid_t pid = 0;
60 siginfo_t si;
61 int r;
62
63 /* We want to be able to make use of btrfs-specific file
64 * system features, in particular subvolumes, reflinks and
65 * quota. Hence, if we detect that /var/lib/machines.raw is
66 * not located on btrfs, let's create a loopback file, place a
67 * btrfs file system into it, and mount it to
68 * /var/lib/machines. */
69
70 fd = open("/var/lib/machines.raw", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
71 if (fd >= 0) {
72 r = fd;
73 fd = -1;
74 return r;
75 }
76
77 if (errno != ENOENT)
78 return sd_bus_error_set_errnof(error, errno, "Failed to open /var/lib/machines.raw: %m");
79
80 r = tempfn_xxxxxx("/var/lib/machines.raw", NULL, &tmp);
81 if (r < 0)
82 return r;
83
84 (void) mkdir_p_label("/var/lib", 0755);
85 fd = open(tmp, O_RDWR|O_CREAT|O_EXCL|O_NOCTTY|O_CLOEXEC, 0600);
86 if (fd < 0)
87 return sd_bus_error_set_errnof(error, errno, "Failed to create /var/lib/machines.raw: %m");
88
89 if (fstatvfs(fd, &ss) < 0) {
90 r = sd_bus_error_set_errnof(error, errno, "Failed to determine free space on /var/lib/machines.raw: %m");
91 goto fail;
92 }
93
94 if (ss.f_bsize * ss.f_bavail < VAR_LIB_MACHINES_FREE_MIN) {
95 r = sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Not enough free disk space to set up /var/lib/machines.");
96 goto fail;
97 }
98
99 if (ftruncate(fd, size) < 0) {
100 r = sd_bus_error_set_errnof(error, errno, "Failed to enlarge /var/lib/machines.raw: %m");
101 goto fail;
102 }
103
104 pid = fork();
105 if (pid < 0) {
106 r = sd_bus_error_set_errnof(error, errno, "Failed to fork mkfs.btrfs: %m");
107 goto fail;
108 }
109
110 if (pid == 0) {
111
112 /* Child */
113
114 (void) reset_all_signal_handlers();
115 (void) reset_signal_mask();
116 assert_se(prctl(PR_SET_PDEATHSIG, SIGTERM) == 0);
117
118 fd = safe_close(fd);
119
120 execlp("mkfs.btrfs", "-Lvar-lib-machines", tmp, NULL);
121 if (errno == ENOENT)
122 return 99;
123
124 _exit(EXIT_FAILURE);
125 }
126
127 r = wait_for_terminate(pid, &si);
128 if (r < 0) {
129 sd_bus_error_set_errnof(error, r, "Failed to wait for mkfs.btrfs: %m");
130 goto fail;
131 }
132
133 pid = 0;
134
135 if (si.si_code != CLD_EXITED) {
136 r = sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "mkfs.btrfs died abnormally.");
137 goto fail;
138 }
139 if (si.si_status == 99) {
140 r = sd_bus_error_set_errnof(error, ENOENT, "Cannot set up /var/lib/machines, mkfs.btrfs is missing");
141 goto fail;
142 }
143 if (si.si_status != 0) {
144 r = sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "mkfs.btrfs failed with error code %i", si.si_status);
145 goto fail;
146 }
147
148 r = rename_noreplace(AT_FDCWD, tmp, AT_FDCWD, "/var/lib/machines.raw");
149 if (r < 0) {
150 sd_bus_error_set_errnof(error, r, "Failed to move /var/lib/machines.raw into place: %m");
151 goto fail;
152 }
153
154 r = fd;
155 fd = -1;
156
157 return r;
158
159 fail:
160 unlink_noerrno(tmp);
161
162 if (pid > 1)
163 kill_and_sigcont(pid, SIGKILL);
164
165 return r;
166 }
167
168 int setup_machine_directory(uint64_t size, sd_bus_error *error) {
169 _cleanup_release_lock_file_ LockFile lock_file = LOCK_FILE_INIT;
170 struct loop_info64 info = {
171 .lo_flags = LO_FLAGS_AUTOCLEAR,
172 };
173 _cleanup_close_ int fd = -1, control = -1, loop = -1;
174 _cleanup_free_ char* loopdev = NULL;
175 char tmpdir[] = "/tmp/machine-pool.XXXXXX", *mntdir = NULL;
176 bool tmpdir_made = false, mntdir_made = false, mntdir_mounted = false;
177 char buf[FORMAT_BYTES_MAX];
178 int r, nr = -1;
179
180 /* btrfs cannot handle file systems < 16M, hence use this as minimum */
181 if (size == (uint64_t) -1)
182 size = VAR_LIB_MACHINES_SIZE_START;
183 else if (size < 16*1024*1024)
184 size = 16*1024*1024;
185
186 /* Make sure we only set the directory up once at a time */
187 r = make_lock_file("/run/systemd/machines.lock", LOCK_EX, &lock_file);
188 if (r < 0)
189 return r;
190
191 r = check_btrfs();
192 if (r < 0)
193 return sd_bus_error_set_errnof(error, r, "Failed to determine whether /var/lib/machines is located on btrfs: %m");
194 if (r > 0) {
195 (void) btrfs_subvol_make_label("/var/lib/machines");
196
197 r = btrfs_quota_enable("/var/lib/machines", true);
198 if (r < 0)
199 log_warning_errno(r, "Failed to enable quota for /var/lib/machines, ignoring: %m");
200
201 r = btrfs_subvol_auto_qgroup("/var/lib/machines", 0, true);
202 if (r < 0)
203 log_warning_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines, ignoring: %m");
204
205 return 1;
206 }
207
208 if (path_is_mount_point("/var/lib/machines", AT_SYMLINK_FOLLOW) > 0) {
209 log_debug("/var/lib/machines is already a mount point, not creating loopback file for it.");
210 return 0;
211 }
212
213 r = dir_is_populated("/var/lib/machines");
214 if (r < 0 && r != -ENOENT)
215 return r;
216 if (r > 0) {
217 log_debug("/var/log/machines is already populated, not creating loopback file for it.");
218 return 0;
219 }
220
221 r = mkfs_exists("btrfs");
222 if (r == -ENOENT) {
223 log_debug("mkfs.btrfs is missing, cannot create loopback file for /var/lib/machines.");
224 return 0;
225 }
226 if (r < 0)
227 return r;
228
229 fd = setup_machine_raw(size, error);
230 if (fd < 0)
231 return fd;
232
233 control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
234 if (control < 0)
235 return sd_bus_error_set_errnof(error, errno, "Failed to open /dev/loop-control: %m");
236
237 nr = ioctl(control, LOOP_CTL_GET_FREE);
238 if (nr < 0)
239 return sd_bus_error_set_errnof(error, errno, "Failed to allocate loop device: %m");
240
241 if (asprintf(&loopdev, "/dev/loop%i", nr) < 0) {
242 r = -ENOMEM;
243 goto fail;
244 }
245
246 loop = open(loopdev, O_CLOEXEC|O_RDWR|O_NOCTTY|O_NONBLOCK);
247 if (loop < 0) {
248 r = sd_bus_error_set_errnof(error, errno, "Failed to open loopback device: %m");
249 goto fail;
250 }
251
252 if (ioctl(loop, LOOP_SET_FD, fd) < 0) {
253 r = sd_bus_error_set_errnof(error, errno, "Failed to bind loopback device: %m");
254 goto fail;
255 }
256
257 if (ioctl(loop, LOOP_SET_STATUS64, &info) < 0) {
258 r = sd_bus_error_set_errnof(error, errno, "Failed to enable auto-clear for loopback device: %m");
259 goto fail;
260 }
261
262 /* We need to make sure the new /var/lib/machines directory
263 * has an access mode of 0700 at the time it is first made
264 * available. mkfs will create it with 0755 however. Hence,
265 * let's mount the directory into an inaccessible directory
266 * below /tmp first, fix the access mode, and move it to the
267 * public place then. */
268
269 if (!mkdtemp(tmpdir)) {
270 r = sd_bus_error_set_errnof(error, errno, "Failed to create temporary mount parent directory: %m");
271 goto fail;
272 }
273 tmpdir_made = true;
274
275 mntdir = strjoina(tmpdir, "/mnt");
276 if (mkdir(mntdir, 0700) < 0) {
277 r = sd_bus_error_set_errnof(error, errno, "Failed to create temporary mount directory: %m");
278 goto fail;
279 }
280 mntdir_made = true;
281
282 if (mount(loopdev, mntdir, "btrfs", 0, NULL) < 0) {
283 r = sd_bus_error_set_errnof(error, errno, "Failed to mount loopback device: %m");
284 goto fail;
285 }
286 mntdir_mounted = true;
287
288 r = btrfs_quota_enable(mntdir, true);
289 if (r < 0)
290 log_warning_errno(r, "Failed to enable quota, ignoring: %m");
291
292 r = btrfs_subvol_auto_qgroup(mntdir, 0, true);
293 if (r < 0)
294 log_warning_errno(r, "Failed to set up default quota hierarchy, ignoring: %m");
295
296 if (chmod(mntdir, 0700) < 0) {
297 r = sd_bus_error_set_errnof(error, errno, "Failed to fix owner: %m");
298 goto fail;
299 }
300
301 (void) mkdir_p_label("/var/lib/machines", 0700);
302
303 if (mount(mntdir, "/var/lib/machines", NULL, MS_BIND, NULL) < 0) {
304 r = sd_bus_error_set_errnof(error, errno, "Failed to mount directory into right place: %m");
305 goto fail;
306 }
307
308 (void) syncfs(fd);
309
310 log_info("Set up /var/lib/machines as btrfs loopback file system of size %s mounted on /var/lib/machines.raw.", format_bytes(buf, sizeof(buf), size));
311
312 (void) umount2(mntdir, MNT_DETACH);
313 (void) rmdir(mntdir);
314 (void) rmdir(tmpdir);
315
316 return 1;
317
318 fail:
319 if (mntdir_mounted)
320 (void) umount2(mntdir, MNT_DETACH);
321
322 if (mntdir_made)
323 (void) rmdir(mntdir);
324 if (tmpdir_made)
325 (void) rmdir(tmpdir);
326
327 if (loop >= 0) {
328 (void) ioctl(loop, LOOP_CLR_FD);
329 loop = safe_close(loop);
330 }
331
332 if (control >= 0 && nr >= 0)
333 (void) ioctl(control, LOOP_CTL_REMOVE, nr);
334
335 return r;
336 }
337
338 static int sync_path(const char *p) {
339 _cleanup_close_ int fd = -1;
340
341 fd = open(p, O_RDONLY|O_CLOEXEC|O_NOCTTY);
342 if (fd < 0)
343 return -errno;
344
345 if (syncfs(fd) < 0)
346 return -errno;
347
348 return 0;
349 }
350
351 int grow_machine_directory(void) {
352 char buf[FORMAT_BYTES_MAX];
353 struct statvfs a, b;
354 uint64_t old_size, new_size, max_add;
355 int r;
356
357 /* Ensure the disk space data is accurate */
358 sync_path("/var/lib/machines");
359 sync_path("/var/lib/machines.raw");
360
361 if (statvfs("/var/lib/machines.raw", &a) < 0)
362 return -errno;
363
364 if (statvfs("/var/lib/machines", &b) < 0)
365 return -errno;
366
367 /* Don't grow if not enough disk space is available on the host */
368 if (((uint64_t) a.f_bavail * (uint64_t) a.f_bsize) <= VAR_LIB_MACHINES_FREE_MIN)
369 return 0;
370
371 /* Don't grow if at least 1/3th of the fs is still free */
372 if (b.f_bavail > b.f_blocks / 3)
373 return 0;
374
375 /* Calculate how much we are willing to add at maximum */
376 max_add = ((uint64_t) a.f_bavail * (uint64_t) a.f_bsize) - VAR_LIB_MACHINES_FREE_MIN;
377
378 /* Calculate the old size */
379 old_size = (uint64_t) b.f_blocks * (uint64_t) b.f_bsize;
380
381 /* Calculate the new size as three times the size of what is used right now */
382 new_size = ((uint64_t) b.f_blocks - (uint64_t) b.f_bavail) * (uint64_t) b.f_bsize * 3;
383
384 /* Always, grow at least to the start size */
385 if (new_size < VAR_LIB_MACHINES_SIZE_START)
386 new_size = VAR_LIB_MACHINES_SIZE_START;
387
388 /* If the new size is smaller than the old size, don't grow */
389 if (new_size < old_size)
390 return 0;
391
392 /* Ensure we never add more than the maximum */
393 if (new_size > old_size + max_add)
394 new_size = old_size + max_add;
395
396 r = btrfs_resize_loopback("/var/lib/machines", new_size, true);
397 if (r <= 0)
398 return r;
399
400 /* Also bump the quota, of both the subvolume leaf qgroup, as
401 * well as of any subtree quota group by the same id but a
402 * higher level, if it exists. */
403 (void) btrfs_qgroup_set_limit("/var/lib/machines", 0, new_size);
404 (void) btrfs_subvol_set_subtree_quota_limit("/var/lib/machines", 0, new_size);
405
406 log_info("Grew /var/lib/machines btrfs loopback file system to %s.", format_bytes(buf, sizeof(buf), new_size));
407 return 1;
408 }