]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/shared/machine-pool.c
btrfs: beef-up btrfs support with a limited understanding of quota
[thirdparty/systemd.git] / src / shared / machine-pool.c
CommitLineData
432cea00
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2015 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/prctl.h>
23#include <sys/vfs.h>
24#include <sys/statvfs.h>
25#include <sys/mount.h>
26
27#include "util.h"
0b452006 28#include "process-util.h"
cd2eb9e9 29#include "lockfile-util.h"
432cea00
LP
30#include "mkdir.h"
31#include "btrfs-util.h"
32#include "path-util.h"
24882e06 33#include "signal-util.h"
432cea00
LP
34#include "machine-pool.h"
35
36#define VAR_LIB_MACHINES_SIZE_START (1024UL*1024UL*500UL)
37#define VAR_LIB_MACHINES_FREE_MIN (1024UL*1024UL*750UL)
38
39static int check_btrfs(void) {
40 struct statfs sfs;
41
42 if (statfs("/var/lib/machines", &sfs) < 0) {
43 if (errno != ENOENT)
44 return -errno;
45
46 if (statfs("/var/lib", &sfs) < 0)
47 return -errno;
48 }
49
50 return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC);
51}
52
4cee5eed 53static int setup_machine_raw(uint64_t size, sd_bus_error *error) {
432cea00
LP
54 _cleanup_free_ char *tmp = NULL;
55 _cleanup_close_ int fd = -1;
56 struct statvfs ss;
57 pid_t pid = 0;
58 siginfo_t si;
59 int r;
60
61 /* We want to be able to make use of btrfs-specific file
62 * system features, in particular subvolumes, reflinks and
63 * quota. Hence, if we detect that /var/lib/machines.raw is
64 * not located on btrfs, let's create a loopback file, place a
65 * btrfs file system into it, and mount it to
66 * /var/lib/machines. */
67
68 fd = open("/var/lib/machines.raw", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
69 if (fd >= 0) {
70 r = fd;
71 fd = -1;
72 return r;
73 }
74
75 if (errno != ENOENT)
76 return sd_bus_error_set_errnof(error, errno, "Failed to open /var/lib/machines.raw: %m");
77
14bcf25c 78 r = tempfn_xxxxxx("/var/lib/machines.raw", NULL, &tmp);
432cea00
LP
79 if (r < 0)
80 return r;
81
82 (void) mkdir_p_label("/var/lib", 0755);
83 fd = open(tmp, O_RDWR|O_CREAT|O_EXCL|O_NOCTTY|O_CLOEXEC, 0600);
84 if (fd < 0)
85 return sd_bus_error_set_errnof(error, errno, "Failed to create /var/lib/machines.raw: %m");
86
87 if (fstatvfs(fd, &ss) < 0) {
88 r = sd_bus_error_set_errnof(error, errno, "Failed to determine free space on /var/lib/machines.raw: %m");
89 goto fail;
90 }
91
92 if (ss.f_bsize * ss.f_bavail < VAR_LIB_MACHINES_FREE_MIN) {
93 r = sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Not enough free disk space to set up /var/lib/machines.");
94 goto fail;
95 }
96
4cee5eed 97 if (ftruncate(fd, size) < 0) {
432cea00
LP
98 r = sd_bus_error_set_errnof(error, errno, "Failed to enlarge /var/lib/machines.raw: %m");
99 goto fail;
100 }
101
102 pid = fork();
103 if (pid < 0) {
104 r = sd_bus_error_set_errnof(error, errno, "Failed to fork mkfs.btrfs: %m");
105 goto fail;
106 }
107
108 if (pid == 0) {
109
110 /* Child */
111
ce30c8dc
LP
112 (void) reset_all_signal_handlers();
113 (void) reset_signal_mask();
432cea00
LP
114 assert_se(prctl(PR_SET_PDEATHSIG, SIGTERM) == 0);
115
116 fd = safe_close(fd);
117
118 execlp("mkfs.btrfs", "-Lvar-lib-machines", tmp, NULL);
119 if (errno == ENOENT)
120 return 99;
121
122 _exit(EXIT_FAILURE);
123 }
124
125 r = wait_for_terminate(pid, &si);
126 if (r < 0) {
127 sd_bus_error_set_errnof(error, r, "Failed to wait for mkfs.btrfs: %m");
128 goto fail;
129 }
130
131 pid = 0;
132
133 if (si.si_code != CLD_EXITED) {
134 r = sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "mkfs.btrfs died abnormally.");
135 goto fail;
136 }
137 if (si.si_status == 99) {
138 r = sd_bus_error_set_errnof(error, ENOENT, "Cannot set up /var/lib/machines, mkfs.btrfs is missing");
139 goto fail;
140 }
141 if (si.si_status != 0) {
142 r = sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "mkfs.btrfs failed with error code %i", si.si_status);
143 goto fail;
144 }
145
f85ef957
AC
146 r = rename_noreplace(AT_FDCWD, tmp, AT_FDCWD, "/var/lib/machines.raw");
147 if (r < 0) {
148 sd_bus_error_set_errnof(error, r, "Failed to move /var/lib/machines.raw into place: %m");
432cea00
LP
149 goto fail;
150 }
151
152 r = fd;
153 fd = -1;
154
155 return r;
156
157fail:
132764a2 158 unlink_noerrno(tmp);
432cea00
LP
159
160 if (pid > 1)
161 kill_and_sigcont(pid, SIGKILL);
162
163 return r;
164}
165
4cee5eed 166int setup_machine_directory(uint64_t size, sd_bus_error *error) {
403e5b32 167 _cleanup_release_lock_file_ LockFile lock_file = LOCK_FILE_INIT;
432cea00
LP
168 struct loop_info64 info = {
169 .lo_flags = LO_FLAGS_AUTOCLEAR,
170 };
171 _cleanup_close_ int fd = -1, control = -1, loop = -1;
172 _cleanup_free_ char* loopdev = NULL;
5bcd08db 173 char tmpdir[] = "/tmp/machine-pool.XXXXXX", *mntdir = NULL;
432cea00 174 bool tmpdir_made = false, mntdir_made = false, mntdir_mounted = false;
26166c88 175 char buf[FORMAT_BYTES_MAX];
432cea00
LP
176 int r, nr = -1;
177
4cee5eed
LP
178 /* btrfs cannot handle file systems < 16M, hence use this as minimum */
179 if (size == (uint64_t) -1)
180 size = VAR_LIB_MACHINES_SIZE_START;
181 else if (size < 16*1024*1024)
182 size = 16*1024*1024;
183
403e5b32
LP
184 /* Make sure we only set the directory up once at a time */
185 r = make_lock_file("/run/systemd/machines.lock", LOCK_EX, &lock_file);
186 if (r < 0)
187 return r;
188
432cea00
LP
189 r = check_btrfs();
190 if (r < 0)
191 return sd_bus_error_set_errnof(error, r, "Failed to determine whether /var/lib/machines is located on btrfs: %m");
192 if (r > 0) {
193 (void) btrfs_subvol_make_label("/var/lib/machines");
194
195 r = btrfs_quota_enable("/var/lib/machines", true);
196 if (r < 0)
5bcd08db 197 log_warning_errno(r, "Failed to enable quota for /var/lib/machines, ignoring: %m");
432cea00 198
5bcd08db
LP
199 r = btrfs_subvol_auto_qgroup("/var/lib/machines", 0, true);
200 if (r < 0)
201 log_warning_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines, ignoring: %m");
202
203 return 1;
204 }
205
206 if (path_is_mount_point("/var/lib/machines", AT_SYMLINK_FOLLOW) > 0) {
207 log_debug("/var/lib/machines is already a mount point, not creating loopback file for it.");
432cea00
LP
208 return 0;
209 }
210
5bcd08db
LP
211 r = dir_is_populated("/var/lib/machines");
212 if (r < 0 && r != -ENOENT)
213 return r;
214 if (r > 0) {
215 log_debug("/var/log/machines is already populated, not creating loopback file for it.");
216 return 0;
217 }
218
219 r = mkfs_exists("btrfs");
220 if (r == -ENOENT) {
221 log_debug("mkfs.btrfs is missing, cannot create loopback file for /var/lib/machines.");
222 return 0;
223 }
224 if (r < 0)
225 return r;
432cea00 226
4cee5eed 227 fd = setup_machine_raw(size, error);
432cea00
LP
228 if (fd < 0)
229 return fd;
230
231 control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
232 if (control < 0)
233 return sd_bus_error_set_errnof(error, errno, "Failed to open /dev/loop-control: %m");
234
235 nr = ioctl(control, LOOP_CTL_GET_FREE);
236 if (nr < 0)
237 return sd_bus_error_set_errnof(error, errno, "Failed to allocate loop device: %m");
238
239 if (asprintf(&loopdev, "/dev/loop%i", nr) < 0) {
240 r = -ENOMEM;
241 goto fail;
242 }
243
244 loop = open(loopdev, O_CLOEXEC|O_RDWR|O_NOCTTY|O_NONBLOCK);
245 if (loop < 0) {
246 r = sd_bus_error_set_errnof(error, errno, "Failed to open loopback device: %m");
247 goto fail;
248 }
249
250 if (ioctl(loop, LOOP_SET_FD, fd) < 0) {
251 r = sd_bus_error_set_errnof(error, errno, "Failed to bind loopback device: %m");
252 goto fail;
253 }
254
255 if (ioctl(loop, LOOP_SET_STATUS64, &info) < 0) {
256 r = sd_bus_error_set_errnof(error, errno, "Failed to enable auto-clear for loopback device: %m");
257 goto fail;
258 }
259
260 /* We need to make sure the new /var/lib/machines directory
261 * has an access mode of 0700 at the time it is first made
262 * available. mkfs will create it with 0755 however. Hence,
263 * let's mount the directory into an inaccessible directory
264 * below /tmp first, fix the access mode, and move it to the
265 * public place then. */
266
267 if (!mkdtemp(tmpdir)) {
268 r = sd_bus_error_set_errnof(error, errno, "Failed to create temporary mount parent directory: %m");
269 goto fail;
270 }
271 tmpdir_made = true;
272
273 mntdir = strjoina(tmpdir, "/mnt");
274 if (mkdir(mntdir, 0700) < 0) {
275 r = sd_bus_error_set_errnof(error, errno, "Failed to create temporary mount directory: %m");
276 goto fail;
277 }
278 mntdir_made = true;
279
280 if (mount(loopdev, mntdir, "btrfs", 0, NULL) < 0) {
281 r = sd_bus_error_set_errnof(error, errno, "Failed to mount loopback device: %m");
282 goto fail;
283 }
284 mntdir_mounted = true;
285
286 r = btrfs_quota_enable(mntdir, true);
287 if (r < 0)
288 log_warning_errno(r, "Failed to enable quota, ignoring: %m");
289
5bcd08db
LP
290 r = btrfs_subvol_auto_qgroup(mntdir, 0, true);
291 if (r < 0)
292 log_warning_errno(r, "Failed to set up default quota hierarchy, ignoring: %m");
293
432cea00
LP
294 if (chmod(mntdir, 0700) < 0) {
295 r = sd_bus_error_set_errnof(error, errno, "Failed to fix owner: %m");
296 goto fail;
297 }
298
299 (void) mkdir_p_label("/var/lib/machines", 0700);
300
301 if (mount(mntdir, "/var/lib/machines", NULL, MS_BIND, NULL) < 0) {
302 r = sd_bus_error_set_errnof(error, errno, "Failed to mount directory into right place: %m");
303 goto fail;
304 }
305
26166c88
LP
306 (void) syncfs(fd);
307
308 log_info("Set up /var/lib/machines as btrfs loopback file system of size %s mounted on /var/lib/machines.raw.", format_bytes(buf, sizeof(buf), size));
309
432cea00
LP
310 (void) umount2(mntdir, MNT_DETACH);
311 (void) rmdir(mntdir);
312 (void) rmdir(tmpdir);
313
5bcd08db 314 return 1;
432cea00
LP
315
316fail:
317 if (mntdir_mounted)
318 (void) umount2(mntdir, MNT_DETACH);
319
320 if (mntdir_made)
321 (void) rmdir(mntdir);
322 if (tmpdir_made)
323 (void) rmdir(tmpdir);
324
325 if (loop >= 0) {
326 (void) ioctl(loop, LOOP_CLR_FD);
327 loop = safe_close(loop);
328 }
329
330 if (control >= 0 && nr >= 0)
331 (void) ioctl(control, LOOP_CTL_REMOVE, nr);
332
333 return r;
334}
26166c88
LP
335
336static int sync_path(const char *p) {
337 _cleanup_close_ int fd = -1;
338
339 fd = open(p, O_RDONLY|O_CLOEXEC|O_NOCTTY);
340 if (fd < 0)
341 return -errno;
342
343 if (syncfs(fd) < 0)
344 return -errno;
345
346 return 0;
347}
348
349int grow_machine_directory(void) {
350 char buf[FORMAT_BYTES_MAX];
351 struct statvfs a, b;
352 uint64_t old_size, new_size, max_add;
353 int r;
354
355 /* Ensure the disk space data is accurate */
356 sync_path("/var/lib/machines");
357 sync_path("/var/lib/machines.raw");
358
359 if (statvfs("/var/lib/machines.raw", &a) < 0)
360 return -errno;
361
362 if (statvfs("/var/lib/machines", &b) < 0)
363 return -errno;
364
365 /* Don't grow if not enough disk space is available on the host */
366 if (((uint64_t) a.f_bavail * (uint64_t) a.f_bsize) <= VAR_LIB_MACHINES_FREE_MIN)
367 return 0;
368
369 /* Don't grow if at least 1/3th of the fs is still free */
370 if (b.f_bavail > b.f_blocks / 3)
371 return 0;
372
373 /* Calculate how much we are willing to add at maximum */
374 max_add = ((uint64_t) a.f_bavail * (uint64_t) a.f_bsize) - VAR_LIB_MACHINES_FREE_MIN;
375
376 /* Calculate the old size */
377 old_size = (uint64_t) b.f_blocks * (uint64_t) b.f_bsize;
378
379 /* Calculate the new size as three times the size of what is used right now */
380 new_size = ((uint64_t) b.f_blocks - (uint64_t) b.f_bavail) * (uint64_t) b.f_bsize * 3;
381
382 /* Always, grow at least to the start size */
383 if (new_size < VAR_LIB_MACHINES_SIZE_START)
384 new_size = VAR_LIB_MACHINES_SIZE_START;
385
386 /* If the new size is smaller than the old size, don't grow */
387 if (new_size < old_size)
388 return 0;
389
390 /* Ensure we never add more than the maximum */
391 if (new_size > old_size + max_add)
392 new_size = old_size + max_add;
393
394 r = btrfs_resize_loopback("/var/lib/machines", new_size, true);
395 if (r <= 0)
396 return r;
397
5bcd08db
LP
398 /* Also bump the quota, of both the subvolume leaf qgroup, as
399 * well as of any subtree quota group by the same id but a
400 * higher level, if it exists. */
401 (void) btrfs_qgroup_set_limit("/var/lib/machines", 0, new_size);
402 (void) btrfs_subvol_set_subtree_quota_limit("/var/lib/machines", 0, new_size);
26166c88
LP
403
404 log_info("Grew /var/lib/machines btrfs loopback file system to %s.", format_bytes(buf, sizeof(buf), new_size));
405 return 1;
406}