]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/machine-pool.c
util-lib: move more file I/O related calls into fileio.[ch]
[thirdparty/systemd.git] / src / shared / machine-pool.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2015 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mount.h>
23 #include <sys/prctl.h>
24 #include <sys/statvfs.h>
25 #include <sys/vfs.h>
26
27 #include "btrfs-util.h"
28 #include "fd-util.h"
29 #include "fileio.h"
30 #include "lockfile-util.h"
31 #include "machine-pool.h"
32 #include "mkdir.h"
33 #include "parse-util.h"
34 #include "path-util.h"
35 #include "process-util.h"
36 #include "signal-util.h"
37 #include "string-util.h"
38 #include "util.h"
39
40 #define VAR_LIB_MACHINES_SIZE_START (1024UL*1024UL*500UL)
41 #define VAR_LIB_MACHINES_FREE_MIN (1024UL*1024UL*750UL)
42
43 static int check_btrfs(void) {
44 struct statfs sfs;
45
46 if (statfs("/var/lib/machines", &sfs) < 0) {
47 if (errno != ENOENT)
48 return -errno;
49
50 if (statfs("/var/lib", &sfs) < 0)
51 return -errno;
52 }
53
54 return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC);
55 }
56
57 static int setup_machine_raw(uint64_t size, sd_bus_error *error) {
58 _cleanup_free_ char *tmp = NULL;
59 _cleanup_close_ int fd = -1;
60 struct statvfs ss;
61 pid_t pid = 0;
62 siginfo_t si;
63 int r;
64
65 /* We want to be able to make use of btrfs-specific file
66 * system features, in particular subvolumes, reflinks and
67 * quota. Hence, if we detect that /var/lib/machines.raw is
68 * not located on btrfs, let's create a loopback file, place a
69 * btrfs file system into it, and mount it to
70 * /var/lib/machines. */
71
72 fd = open("/var/lib/machines.raw", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
73 if (fd >= 0) {
74 r = fd;
75 fd = -1;
76 return r;
77 }
78
79 if (errno != ENOENT)
80 return sd_bus_error_set_errnof(error, errno, "Failed to open /var/lib/machines.raw: %m");
81
82 r = tempfn_xxxxxx("/var/lib/machines.raw", NULL, &tmp);
83 if (r < 0)
84 return r;
85
86 (void) mkdir_p_label("/var/lib", 0755);
87 fd = open(tmp, O_RDWR|O_CREAT|O_EXCL|O_NOCTTY|O_CLOEXEC, 0600);
88 if (fd < 0)
89 return sd_bus_error_set_errnof(error, errno, "Failed to create /var/lib/machines.raw: %m");
90
91 if (fstatvfs(fd, &ss) < 0) {
92 r = sd_bus_error_set_errnof(error, errno, "Failed to determine free space on /var/lib/machines.raw: %m");
93 goto fail;
94 }
95
96 if (ss.f_bsize * ss.f_bavail < VAR_LIB_MACHINES_FREE_MIN) {
97 r = sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Not enough free disk space to set up /var/lib/machines.");
98 goto fail;
99 }
100
101 if (ftruncate(fd, size) < 0) {
102 r = sd_bus_error_set_errnof(error, errno, "Failed to enlarge /var/lib/machines.raw: %m");
103 goto fail;
104 }
105
106 pid = fork();
107 if (pid < 0) {
108 r = sd_bus_error_set_errnof(error, errno, "Failed to fork mkfs.btrfs: %m");
109 goto fail;
110 }
111
112 if (pid == 0) {
113
114 /* Child */
115
116 (void) reset_all_signal_handlers();
117 (void) reset_signal_mask();
118 assert_se(prctl(PR_SET_PDEATHSIG, SIGTERM) == 0);
119
120 fd = safe_close(fd);
121
122 execlp("mkfs.btrfs", "-Lvar-lib-machines", tmp, NULL);
123 if (errno == ENOENT)
124 return 99;
125
126 _exit(EXIT_FAILURE);
127 }
128
129 r = wait_for_terminate(pid, &si);
130 if (r < 0) {
131 sd_bus_error_set_errnof(error, r, "Failed to wait for mkfs.btrfs: %m");
132 goto fail;
133 }
134
135 pid = 0;
136
137 if (si.si_code != CLD_EXITED) {
138 r = sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "mkfs.btrfs died abnormally.");
139 goto fail;
140 }
141 if (si.si_status == 99) {
142 r = sd_bus_error_set_errnof(error, ENOENT, "Cannot set up /var/lib/machines, mkfs.btrfs is missing");
143 goto fail;
144 }
145 if (si.si_status != 0) {
146 r = sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "mkfs.btrfs failed with error code %i", si.si_status);
147 goto fail;
148 }
149
150 r = rename_noreplace(AT_FDCWD, tmp, AT_FDCWD, "/var/lib/machines.raw");
151 if (r < 0) {
152 sd_bus_error_set_errnof(error, r, "Failed to move /var/lib/machines.raw into place: %m");
153 goto fail;
154 }
155
156 r = fd;
157 fd = -1;
158
159 return r;
160
161 fail:
162 unlink_noerrno(tmp);
163
164 if (pid > 1)
165 kill_and_sigcont(pid, SIGKILL);
166
167 return r;
168 }
169
170 int setup_machine_directory(uint64_t size, sd_bus_error *error) {
171 _cleanup_release_lock_file_ LockFile lock_file = LOCK_FILE_INIT;
172 struct loop_info64 info = {
173 .lo_flags = LO_FLAGS_AUTOCLEAR,
174 };
175 _cleanup_close_ int fd = -1, control = -1, loop = -1;
176 _cleanup_free_ char* loopdev = NULL;
177 char tmpdir[] = "/tmp/machine-pool.XXXXXX", *mntdir = NULL;
178 bool tmpdir_made = false, mntdir_made = false, mntdir_mounted = false;
179 char buf[FORMAT_BYTES_MAX];
180 int r, nr = -1;
181
182 /* btrfs cannot handle file systems < 16M, hence use this as minimum */
183 if (size == (uint64_t) -1)
184 size = VAR_LIB_MACHINES_SIZE_START;
185 else if (size < 16*1024*1024)
186 size = 16*1024*1024;
187
188 /* Make sure we only set the directory up once at a time */
189 r = make_lock_file("/run/systemd/machines.lock", LOCK_EX, &lock_file);
190 if (r < 0)
191 return r;
192
193 r = check_btrfs();
194 if (r < 0)
195 return sd_bus_error_set_errnof(error, r, "Failed to determine whether /var/lib/machines is located on btrfs: %m");
196 if (r > 0) {
197 (void) btrfs_subvol_make_label("/var/lib/machines");
198
199 r = btrfs_quota_enable("/var/lib/machines", true);
200 if (r < 0)
201 log_warning_errno(r, "Failed to enable quota for /var/lib/machines, ignoring: %m");
202
203 r = btrfs_subvol_auto_qgroup("/var/lib/machines", 0, true);
204 if (r < 0)
205 log_warning_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines, ignoring: %m");
206
207 return 1;
208 }
209
210 if (path_is_mount_point("/var/lib/machines", AT_SYMLINK_FOLLOW) > 0) {
211 log_debug("/var/lib/machines is already a mount point, not creating loopback file for it.");
212 return 0;
213 }
214
215 r = dir_is_populated("/var/lib/machines");
216 if (r < 0 && r != -ENOENT)
217 return r;
218 if (r > 0) {
219 log_debug("/var/log/machines is already populated, not creating loopback file for it.");
220 return 0;
221 }
222
223 r = mkfs_exists("btrfs");
224 if (r == -ENOENT) {
225 log_debug("mkfs.btrfs is missing, cannot create loopback file for /var/lib/machines.");
226 return 0;
227 }
228 if (r < 0)
229 return r;
230
231 fd = setup_machine_raw(size, error);
232 if (fd < 0)
233 return fd;
234
235 control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
236 if (control < 0)
237 return sd_bus_error_set_errnof(error, errno, "Failed to open /dev/loop-control: %m");
238
239 nr = ioctl(control, LOOP_CTL_GET_FREE);
240 if (nr < 0)
241 return sd_bus_error_set_errnof(error, errno, "Failed to allocate loop device: %m");
242
243 if (asprintf(&loopdev, "/dev/loop%i", nr) < 0) {
244 r = -ENOMEM;
245 goto fail;
246 }
247
248 loop = open(loopdev, O_CLOEXEC|O_RDWR|O_NOCTTY|O_NONBLOCK);
249 if (loop < 0) {
250 r = sd_bus_error_set_errnof(error, errno, "Failed to open loopback device: %m");
251 goto fail;
252 }
253
254 if (ioctl(loop, LOOP_SET_FD, fd) < 0) {
255 r = sd_bus_error_set_errnof(error, errno, "Failed to bind loopback device: %m");
256 goto fail;
257 }
258
259 if (ioctl(loop, LOOP_SET_STATUS64, &info) < 0) {
260 r = sd_bus_error_set_errnof(error, errno, "Failed to enable auto-clear for loopback device: %m");
261 goto fail;
262 }
263
264 /* We need to make sure the new /var/lib/machines directory
265 * has an access mode of 0700 at the time it is first made
266 * available. mkfs will create it with 0755 however. Hence,
267 * let's mount the directory into an inaccessible directory
268 * below /tmp first, fix the access mode, and move it to the
269 * public place then. */
270
271 if (!mkdtemp(tmpdir)) {
272 r = sd_bus_error_set_errnof(error, errno, "Failed to create temporary mount parent directory: %m");
273 goto fail;
274 }
275 tmpdir_made = true;
276
277 mntdir = strjoina(tmpdir, "/mnt");
278 if (mkdir(mntdir, 0700) < 0) {
279 r = sd_bus_error_set_errnof(error, errno, "Failed to create temporary mount directory: %m");
280 goto fail;
281 }
282 mntdir_made = true;
283
284 if (mount(loopdev, mntdir, "btrfs", 0, NULL) < 0) {
285 r = sd_bus_error_set_errnof(error, errno, "Failed to mount loopback device: %m");
286 goto fail;
287 }
288 mntdir_mounted = true;
289
290 r = btrfs_quota_enable(mntdir, true);
291 if (r < 0)
292 log_warning_errno(r, "Failed to enable quota, ignoring: %m");
293
294 r = btrfs_subvol_auto_qgroup(mntdir, 0, true);
295 if (r < 0)
296 log_warning_errno(r, "Failed to set up default quota hierarchy, ignoring: %m");
297
298 if (chmod(mntdir, 0700) < 0) {
299 r = sd_bus_error_set_errnof(error, errno, "Failed to fix owner: %m");
300 goto fail;
301 }
302
303 (void) mkdir_p_label("/var/lib/machines", 0700);
304
305 if (mount(mntdir, "/var/lib/machines", NULL, MS_BIND, NULL) < 0) {
306 r = sd_bus_error_set_errnof(error, errno, "Failed to mount directory into right place: %m");
307 goto fail;
308 }
309
310 (void) syncfs(fd);
311
312 log_info("Set up /var/lib/machines as btrfs loopback file system of size %s mounted on /var/lib/machines.raw.", format_bytes(buf, sizeof(buf), size));
313
314 (void) umount2(mntdir, MNT_DETACH);
315 (void) rmdir(mntdir);
316 (void) rmdir(tmpdir);
317
318 return 1;
319
320 fail:
321 if (mntdir_mounted)
322 (void) umount2(mntdir, MNT_DETACH);
323
324 if (mntdir_made)
325 (void) rmdir(mntdir);
326 if (tmpdir_made)
327 (void) rmdir(tmpdir);
328
329 if (loop >= 0) {
330 (void) ioctl(loop, LOOP_CLR_FD);
331 loop = safe_close(loop);
332 }
333
334 if (control >= 0 && nr >= 0)
335 (void) ioctl(control, LOOP_CTL_REMOVE, nr);
336
337 return r;
338 }
339
340 static int sync_path(const char *p) {
341 _cleanup_close_ int fd = -1;
342
343 fd = open(p, O_RDONLY|O_CLOEXEC|O_NOCTTY);
344 if (fd < 0)
345 return -errno;
346
347 if (syncfs(fd) < 0)
348 return -errno;
349
350 return 0;
351 }
352
353 int grow_machine_directory(void) {
354 char buf[FORMAT_BYTES_MAX];
355 struct statvfs a, b;
356 uint64_t old_size, new_size, max_add;
357 int r;
358
359 /* Ensure the disk space data is accurate */
360 sync_path("/var/lib/machines");
361 sync_path("/var/lib/machines.raw");
362
363 if (statvfs("/var/lib/machines.raw", &a) < 0)
364 return -errno;
365
366 if (statvfs("/var/lib/machines", &b) < 0)
367 return -errno;
368
369 /* Don't grow if not enough disk space is available on the host */
370 if (((uint64_t) a.f_bavail * (uint64_t) a.f_bsize) <= VAR_LIB_MACHINES_FREE_MIN)
371 return 0;
372
373 /* Don't grow if at least 1/3th of the fs is still free */
374 if (b.f_bavail > b.f_blocks / 3)
375 return 0;
376
377 /* Calculate how much we are willing to add at maximum */
378 max_add = ((uint64_t) a.f_bavail * (uint64_t) a.f_bsize) - VAR_LIB_MACHINES_FREE_MIN;
379
380 /* Calculate the old size */
381 old_size = (uint64_t) b.f_blocks * (uint64_t) b.f_bsize;
382
383 /* Calculate the new size as three times the size of what is used right now */
384 new_size = ((uint64_t) b.f_blocks - (uint64_t) b.f_bavail) * (uint64_t) b.f_bsize * 3;
385
386 /* Always, grow at least to the start size */
387 if (new_size < VAR_LIB_MACHINES_SIZE_START)
388 new_size = VAR_LIB_MACHINES_SIZE_START;
389
390 /* If the new size is smaller than the old size, don't grow */
391 if (new_size < old_size)
392 return 0;
393
394 /* Ensure we never add more than the maximum */
395 if (new_size > old_size + max_add)
396 new_size = old_size + max_add;
397
398 r = btrfs_resize_loopback("/var/lib/machines", new_size, true);
399 if (r <= 0)
400 return r;
401
402 /* Also bump the quota, of both the subvolume leaf qgroup, as
403 * well as of any subtree quota group by the same id but a
404 * higher level, if it exists. */
405 (void) btrfs_qgroup_set_limit("/var/lib/machines", 0, new_size);
406 (void) btrfs_subvol_set_subtree_quota_limit("/var/lib/machines", 0, new_size);
407
408 log_info("Grew /var/lib/machines btrfs loopback file system to %s.", format_bytes(buf, sizeof(buf), new_size));
409 return 1;
410 }