]>
Commit | Line | Data |
---|---|---|
9dd11ed0 MT |
1 | /*############################################################################# |
2 | # # | |
3 | # Pakfire - The IPFire package management system # | |
e3ddb498 | 4 | # Copyright (C) 2022 Pakfire development team # |
9dd11ed0 MT |
5 | # # |
6 | # This program is free software: you can redistribute it and/or modify # | |
7 | # it under the terms of the GNU General Public License as published by # | |
8 | # the Free Software Foundation, either version 3 of the License, or # | |
9 | # (at your option) any later version. # | |
10 | # # | |
11 | # This program is distributed in the hope that it will be useful, # | |
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # | |
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # | |
14 | # GNU General Public License for more details. # | |
15 | # # | |
16 | # You should have received a copy of the GNU General Public License # | |
17 | # along with this program. If not, see <http://www.gnu.org/licenses/>. # | |
18 | # # | |
19 | #############################################################################*/ | |
20 | ||
21 | #include <errno.h> | |
e3ddb498 | 22 | #include <fcntl.h> |
d59e29c8 | 23 | #include <linux/bpf.h> |
034ba70e | 24 | #include <signal.h> |
9dd11ed0 | 25 | #include <stdlib.h> |
9dd11ed0 | 26 | #include <sys/types.h> |
9dd11ed0 | 27 | |
d59e29c8 MT |
28 | // libbpf |
29 | #include <bpf/bpf.h> | |
30 | ||
9dd11ed0 MT |
31 | #include <pakfire/cgroup.h> |
32 | #include <pakfire/logging.h> | |
e3ddb498 | 33 | #include <pakfire/pakfire.h> |
d973a13d | 34 | #include <pakfire/string.h> |
9dd11ed0 MT |
35 | #include <pakfire/util.h> |
36 | ||
18684c4d | 37 | static __thread char bpf_log_buffer[BPF_LOG_BUF_SIZE]; |
d59e29c8 | 38 | |
e3ddb498 | 39 | #define BUFFER_SIZE 64 * 1024 |
9dd11ed0 | 40 | |
d59e29c8 MT |
41 | // Short form of mov, dst_reg = src_reg |
42 | #define BPF_MOV64_IMM(DST, IMM) \ | |
43 | ((struct bpf_insn){ \ | |
44 | .code = BPF_ALU64 | BPF_MOV | BPF_K, \ | |
45 | .dst_reg = DST, \ | |
46 | .src_reg = 0, \ | |
47 | .off = 0, \ | |
48 | .imm = IMM \ | |
49 | }) | |
50 | ||
51 | // Program exit | |
52 | #define BPF_EXIT_INSN() \ | |
53 | ((struct bpf_insn){ \ | |
54 | .code = BPF_JMP | BPF_EXIT, \ | |
55 | .dst_reg = 0, \ | |
56 | .src_reg = 0, \ | |
57 | .off = 0, \ | |
58 | .imm = 0 \ | |
59 | }) | |
60 | ||
2901c3a7 MT |
61 | enum pakfire_cgroup_controllers { |
62 | PAKFIRE_CGROUP_CONTROLLER_CPU = (1 << 0), | |
63 | PAKFIRE_CGROUP_CONTROLLER_MEMORY = (1 << 1), | |
64 | PAKFIRE_CGROUP_CONTROLLER_PIDS = (1 << 2), | |
65 | PAKFIRE_CGROUP_CONTROLLER_IO = (1 << 3), | |
66 | }; | |
67 | ||
68 | static const enum pakfire_cgroup_controllers pakfire_cgroup_accounting_controllers = | |
69 | PAKFIRE_CGROUP_CONTROLLER_CPU | | |
70 | PAKFIRE_CGROUP_CONTROLLER_MEMORY | | |
71 | PAKFIRE_CGROUP_CONTROLLER_PIDS | | |
72 | PAKFIRE_CGROUP_CONTROLLER_IO; | |
73 | ||
e3ddb498 MT |
74 | struct pakfire_cgroup { |
75 | struct pakfire* pakfire; | |
76 | int nrefs; | |
9dd11ed0 | 77 | |
31d7e29a MT |
78 | // Store the root path |
79 | char root[PATH_MAX]; | |
80 | ||
2901c3a7 MT |
81 | // Flags |
82 | int flags; | |
83 | ||
e3ddb498 MT |
84 | // Store the path |
85 | char path[PATH_MAX]; | |
9dd11ed0 | 86 | |
e3ddb498 MT |
87 | // File descriptor to cgroup |
88 | int fd; | |
d59e29c8 MT |
89 | |
90 | // FD to the devices filter program | |
91 | int devicesfd; | |
e3ddb498 | 92 | }; |
9e1e7985 | 93 | |
e3ddb498 MT |
94 | // Returns true if this is the root cgroup |
95 | static int pakfire_cgroup_is_root(struct pakfire_cgroup* cgroup) { | |
96 | return !*cgroup->path; | |
97 | } | |
9e1e7985 | 98 | |
2901c3a7 MT |
99 | static int pakfire_cgroup_has_flag(struct pakfire_cgroup* cgroup, int flag) { |
100 | return cgroup->flags & flag; | |
101 | } | |
102 | ||
31d7e29a MT |
103 | static int pakfire_cgroup_set_root(struct pakfire_cgroup* cgroup) { |
104 | int r; | |
105 | ||
106 | // Find the current UID | |
107 | const uid_t uid = getuid(); | |
108 | ||
109 | switch (uid) { | |
110 | // root | |
111 | case 0: | |
112 | r = pakfire_string_set(cgroup->root, "/sys/fs/cgroup"); | |
98eefc02 | 113 | break; |
31d7e29a MT |
114 | |
115 | // unprivileged users | |
116 | default: | |
117 | r = pakfire_string_format(cgroup->root, | |
a8a41064 | 118 | "/sys/fs/cgroup/user.slice/user-%u.slice/user@%u.service", uid, uid); |
98eefc02 | 119 | break; |
31d7e29a MT |
120 | } |
121 | ||
122 | if (r) | |
123 | ERROR(cgroup->pakfire, "Could not determine cgroup root: %m\n"); | |
124 | ||
125 | return r; | |
126 | } | |
127 | ||
e3ddb498 MT |
128 | static const char* pakfire_cgroup_name(struct pakfire_cgroup* cgroup) { |
129 | if (pakfire_cgroup_is_root(cgroup)) | |
130 | return "(root)"; | |
9e1e7985 | 131 | |
e3ddb498 | 132 | return cgroup->path; |
9e1e7985 MT |
133 | } |
134 | ||
2901c3a7 MT |
135 | static const char* pakfire_cgroup_controller_name( |
136 | enum pakfire_cgroup_controllers controller) { | |
137 | switch (controller) { | |
138 | case PAKFIRE_CGROUP_CONTROLLER_CPU: | |
139 | return "cpu"; | |
140 | ||
141 | case PAKFIRE_CGROUP_CONTROLLER_MEMORY: | |
142 | return "memory"; | |
143 | ||
144 | case PAKFIRE_CGROUP_CONTROLLER_PIDS: | |
145 | return "pids"; | |
146 | ||
147 | case PAKFIRE_CGROUP_CONTROLLER_IO: | |
148 | return "io"; | |
149 | } | |
150 | ||
151 | return NULL; | |
152 | } | |
153 | ||
154 | static enum pakfire_cgroup_controllers pakfire_cgroup_find_controller_by_name( | |
155 | const char* name) { | |
156 | const char* n = NULL; | |
157 | ||
158 | // Walk through the bitmap | |
159 | for (unsigned int i = 1; i; i <<= 1) { | |
160 | n = pakfire_cgroup_controller_name(i); | |
161 | if (!n) | |
162 | break; | |
163 | ||
164 | // Match | |
165 | if (strcmp(name, n) == 0) | |
166 | return i; | |
167 | } | |
168 | ||
169 | // Nothing found | |
170 | return 0; | |
171 | } | |
172 | ||
173 | static struct pakfire_cgroup* pakfire_cgroup_parent(struct pakfire_cgroup* cgroup) { | |
174 | struct pakfire_cgroup* parent = NULL; | |
f54eb6e7 | 175 | char path[PATH_MAX]; |
2901c3a7 MT |
176 | int r; |
177 | ||
178 | // Cannot return parent for root group | |
179 | if (pakfire_cgroup_is_root(cgroup)) | |
180 | return NULL; | |
181 | ||
182 | // Determine the path of the parent | |
f54eb6e7 MT |
183 | r = pakfire_dirname(path, cgroup->path); |
184 | if (r) { | |
2901c3a7 MT |
185 | ERROR(cgroup->pakfire, "Could not determine path for parent cgroup: %m\n"); |
186 | return NULL; | |
187 | } | |
188 | ||
189 | // dirname() returns . if no directory component could be found | |
190 | if (strcmp(path, ".") == 0) | |
f54eb6e7 | 191 | *path = '\0'; |
2901c3a7 MT |
192 | |
193 | // Open the cgroup | |
194 | r = pakfire_cgroup_open(&parent, cgroup->pakfire, path, 0); | |
195 | if (r) { | |
196 | ERROR(cgroup->pakfire, "Could not open parent cgroup: %m\n"); | |
197 | parent = NULL; | |
198 | } | |
199 | ||
2901c3a7 MT |
200 | return parent; |
201 | } | |
202 | ||
e3ddb498 MT |
203 | static void pakfire_cgroup_free(struct pakfire_cgroup* cgroup) { |
204 | DEBUG(cgroup->pakfire, "Releasing cgroup %s at %p\n", | |
205 | pakfire_cgroup_name(cgroup), cgroup); | |
9e1e7985 | 206 | |
d59e29c8 | 207 | // Close the file descriptors |
2868fb2f | 208 | if (cgroup->fd > 0) |
e3ddb498 | 209 | close(cgroup->fd); |
d59e29c8 MT |
210 | if (cgroup->devicesfd > 0) |
211 | close(cgroup->devicesfd); | |
9e1e7985 | 212 | |
e3ddb498 MT |
213 | pakfire_unref(cgroup->pakfire); |
214 | free(cgroup); | |
9e1e7985 MT |
215 | } |
216 | ||
d59e29c8 MT |
217 | static int pakfire_cgroup_setup_devices(struct pakfire_cgroup* cgroup) { |
218 | LIBBPF_OPTS(bpf_prog_load_opts, opts, | |
219 | // Log Buffer | |
220 | .log_buf = bpf_log_buffer, | |
221 | .log_size = sizeof(bpf_log_buffer), | |
222 | ); | |
223 | int r; | |
224 | ||
225 | struct bpf_insn program[] = { | |
226 | BPF_MOV64_IMM(BPF_REG_0, 1), // r0 = 1 | |
227 | BPF_EXIT_INSN(), // return r0 | |
228 | }; | |
229 | ||
230 | // Load the BPF program | |
231 | r = bpf_prog_load(BPF_PROG_TYPE_CGROUP_DEVICE, NULL, "GPL", | |
232 | program, sizeof(program) / sizeof(*program), &opts); | |
233 | if (r < 0) { | |
234 | ERROR(cgroup->pakfire, "Could not load BPF program: %m\n"); | |
235 | return r; | |
236 | } | |
237 | ||
238 | // Store the file descriptor | |
239 | cgroup->devicesfd = r; | |
240 | ||
241 | // Attach the program to the cgroup | |
242 | r = bpf_prog_attach(cgroup->devicesfd, cgroup->fd, | |
243 | BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI); | |
244 | if (r) { | |
245 | ERROR(cgroup->pakfire, "Could not attach BPF program to cgroup: %m\n"); | |
246 | return r; | |
247 | } | |
248 | ||
249 | return 0; | |
250 | } | |
251 | ||
01cf6134 | 252 | static int pakfire_cgroup_open_root(struct pakfire_cgroup* cgroup) { |
31d7e29a | 253 | int fd = open(cgroup->root, O_DIRECTORY|O_PATH|O_CLOEXEC); |
01cf6134 | 254 | if (fd < 0) { |
31d7e29a | 255 | ERROR(cgroup->pakfire, "Could not open %s: %m\n", cgroup->root); |
01cf6134 MT |
256 | return -1; |
257 | } | |
258 | ||
259 | return fd; | |
260 | } | |
261 | ||
e3ddb498 MT |
262 | static int __pakfire_cgroup_create(struct pakfire_cgroup* cgroup) { |
263 | char path[PATH_MAX]; | |
264 | int r; | |
9dd11ed0 | 265 | |
e3ddb498 | 266 | DEBUG(cgroup->pakfire, "Trying to create cgroup %s\n", pakfire_cgroup_name(cgroup)); |
9dd11ed0 | 267 | |
e3ddb498 | 268 | // Compose the absolute path |
31d7e29a | 269 | r = pakfire_path_join(path, cgroup->root, cgroup->path); |
56796f84 | 270 | if (r) |
e3ddb498 | 271 | return 1; |
9dd11ed0 | 272 | |
e3ddb498 MT |
273 | // Try creating the directory |
274 | return pakfire_mkdir(path, 0755); | |
275 | } | |
9dd11ed0 | 276 | |
e3ddb498 MT |
277 | /* |
278 | Opens the cgroup and returns a file descriptor. | |
9dd11ed0 | 279 | |
e3ddb498 | 280 | If the cgroup does not exist, it will try to create it. |
9dd11ed0 | 281 | |
e3ddb498 MT |
282 | This function returns a negative value on error. |
283 | */ | |
284 | static int __pakfire_cgroup_open(struct pakfire_cgroup* cgroup) { | |
e3ddb498 MT |
285 | int fd = -1; |
286 | int r; | |
9dd11ed0 | 287 | |
e3ddb498 | 288 | // Open file descriptor of the cgroup root |
01cf6134 MT |
289 | int rootfd = pakfire_cgroup_open_root(cgroup); |
290 | if (rootfd < 0) | |
e3ddb498 | 291 | return -1; |
9dd11ed0 | 292 | |
e3ddb498 MT |
293 | // Return the rootfd for the root group |
294 | if (pakfire_cgroup_is_root(cgroup)) | |
295 | return rootfd; | |
9dd11ed0 | 296 | |
e3ddb498 MT |
297 | RETRY: |
298 | fd = openat(rootfd, cgroup->path, O_DIRECTORY|O_PATH|O_CLOEXEC); | |
299 | if (fd < 0) { | |
300 | switch (errno) { | |
301 | // If the cgroup doesn't exist yet, try to create it | |
302 | case ENOENT: | |
303 | r = __pakfire_cgroup_create(cgroup); | |
304 | if (r) | |
305 | goto ERROR; | |
9dd11ed0 | 306 | |
e3ddb498 MT |
307 | // Retry open after successful creation |
308 | goto RETRY; | |
9dd11ed0 | 309 | |
e3ddb498 MT |
310 | // Exit on all other errors |
311 | default: | |
312 | ERROR(cgroup->pakfire, "Could not open cgroup %s: %m\n", | |
313 | pakfire_cgroup_name(cgroup)); | |
314 | goto ERROR; | |
315 | } | |
316 | } | |
9dd11ed0 | 317 | |
e3ddb498 MT |
318 | ERROR: |
319 | if (rootfd > 0) | |
320 | close(rootfd); | |
9dd11ed0 | 321 | |
e3ddb498 MT |
322 | return fd; |
323 | } | |
9dd11ed0 | 324 | |
034ba70e MT |
325 | static int pakfire_cgroup_access(struct pakfire_cgroup* cgroup, const char* path, |
326 | int mode, int flags) { | |
327 | return faccessat(cgroup->fd, path, mode, flags); | |
328 | } | |
329 | ||
330 | static FILE* pakfire_cgroup_open_file(struct pakfire_cgroup* cgroup, | |
331 | const char* path, const char* mode) { | |
332 | FILE* f = NULL; | |
333 | ||
334 | // Open cgroup.procs | |
335 | int fd = openat(cgroup->fd, "cgroup.procs", O_CLOEXEC); | |
336 | if (fd < 0) { | |
337 | ERROR(cgroup->pakfire, "%s: Could not open %s: %m\n", | |
338 | pakfire_cgroup_name(cgroup), path); | |
339 | goto ERROR; | |
340 | } | |
341 | ||
342 | // Convert into file handle | |
343 | f = fdopen(fd, mode); | |
344 | if (!f) | |
345 | goto ERROR; | |
346 | ||
347 | ERROR: | |
2868fb2f | 348 | if (fd > 0) |
034ba70e MT |
349 | close(fd); |
350 | ||
351 | return f; | |
352 | } | |
353 | ||
2901c3a7 | 354 | static ssize_t pakfire_cgroup_read(struct pakfire_cgroup* cgroup, const char* path, |
e3ddb498 | 355 | char* buffer, size_t length) { |
2901c3a7 | 356 | ssize_t bytes_read = -1; |
9dd11ed0 | 357 | |
e3ddb498 MT |
358 | // Check if this cgroup has been destroyed already |
359 | if (!cgroup->fd) { | |
360 | ERROR(cgroup->pakfire, "Trying to read from destroyed cgroup\n"); | |
2901c3a7 | 361 | return -1; |
9dd11ed0 MT |
362 | } |
363 | ||
e3ddb498 MT |
364 | // Open the file |
365 | int fd = openat(cgroup->fd, path, O_CLOEXEC); | |
366 | if (fd < 0) { | |
367 | DEBUG(cgroup->pakfire, "Could not open %s/%s: %m\n", | |
368 | pakfire_cgroup_name(cgroup), path); | |
369 | goto ERROR; | |
370 | } | |
9dd11ed0 | 371 | |
e3ddb498 | 372 | // Read file content into buffer |
2901c3a7 MT |
373 | bytes_read = read(fd, buffer, length); |
374 | if (bytes_read < 0) { | |
e3ddb498 MT |
375 | DEBUG(cgroup->pakfire, "Could not read from %s/%s: %m\n", |
376 | pakfire_cgroup_name(cgroup), path); | |
377 | goto ERROR; | |
378 | } | |
9dd11ed0 | 379 | |
2901c3a7 | 380 | // Terminate the buffer |
cc7f589e | 381 | if ((size_t)bytes_read < length) |
2901c3a7 | 382 | buffer[bytes_read] = '\0'; |
9dd11ed0 | 383 | |
e3ddb498 MT |
384 | ERROR: |
385 | if (fd > 0) | |
386 | close(fd); | |
9dd11ed0 | 387 | |
2901c3a7 | 388 | return bytes_read; |
9dd11ed0 MT |
389 | } |
390 | ||
e3ddb498 MT |
391 | static int pakfire_cgroup_write(struct pakfire_cgroup* cgroup, |
392 | const char* path, const char* format, ...) { | |
305de320 | 393 | va_list args; |
e3ddb498 MT |
394 | int r = 0; |
395 | ||
396 | // Check if this cgroup has been destroyed already | |
397 | if (!cgroup->fd) { | |
398 | ERROR(cgroup->pakfire, "Trying to write to destroyed cgroup\n"); | |
399 | errno = EPERM; | |
400 | return 1; | |
401 | } | |
9dd11ed0 | 402 | |
e3ddb498 MT |
403 | // Open the file |
404 | int fd = openat(cgroup->fd, path, O_WRONLY|O_CLOEXEC); | |
405 | if (fd < 0) { | |
406 | DEBUG(cgroup->pakfire, "Could not open %s/%s for writing: %m\n", | |
407 | pakfire_cgroup_name(cgroup), path); | |
9dd11ed0 | 408 | return 1; |
e3ddb498 | 409 | } |
9dd11ed0 | 410 | |
e3ddb498 | 411 | // Write buffer |
305de320 | 412 | va_start(args, format); |
e3ddb498 | 413 | ssize_t bytes_written = vdprintf(fd, format, args); |
305de320 MT |
414 | va_end(args); |
415 | ||
e3ddb498 MT |
416 | // Check if content was written okay |
417 | if (bytes_written < 0) { | |
418 | DEBUG(cgroup->pakfire, "Could not write to %s/%s: %m\n", | |
419 | pakfire_cgroup_name(cgroup), path); | |
420 | r = 1; | |
421 | } | |
820c32c7 | 422 | |
e3ddb498 MT |
423 | // Close fd |
424 | close(fd); | |
305de320 MT |
425 | |
426 | return r; | |
427 | } | |
428 | ||
2901c3a7 MT |
429 | static int pakfire_cgroup_read_controllers( |
430 | struct pakfire_cgroup* cgroup, const char* name) { | |
431 | char buffer[BUFFER_SIZE]; | |
432 | char* p = NULL; | |
9dd11ed0 | 433 | |
2901c3a7 MT |
434 | // Discovered controllers |
435 | int controllers = 0; | |
69cfa22d | 436 | |
2901c3a7 MT |
437 | // Read cgroup.controllers file |
438 | ssize_t bytes_read = pakfire_cgroup_read(cgroup, name, buffer, sizeof(buffer)); | |
439 | if (bytes_read < 0) | |
440 | return -1; | |
441 | ||
442 | // If the file was empty, there is nothing more to do | |
443 | if (bytes_read == 0) | |
444 | return 0; | |
445 | ||
446 | char* token = strtok_r(buffer, " \n", &p); | |
447 | ||
448 | while (token) { | |
449 | DEBUG(cgroup->pakfire, "Found controller '%s'\n", token); | |
450 | ||
451 | // Try finding this controller | |
452 | int controller = pakfire_cgroup_find_controller_by_name(token); | |
453 | if (controller) | |
454 | controllers |= controller; | |
455 | ||
456 | // Move on to next token | |
457 | token = strtok_r(NULL, " \n", &p); | |
9dd11ed0 MT |
458 | } |
459 | ||
2901c3a7 MT |
460 | // Return discovered controllers |
461 | return controllers; | |
462 | } | |
9dd11ed0 | 463 | |
2901c3a7 MT |
464 | /* |
465 | Returns a bitmap of all available controllers | |
466 | */ | |
467 | static int pakfire_cgroup_available_controllers(struct pakfire_cgroup* cgroup) { | |
468 | return pakfire_cgroup_read_controllers(cgroup, "cgroup.controllers"); | |
469 | } | |
470 | ||
471 | /* | |
472 | Returns a bitmap of all enabled controllers | |
473 | */ | |
474 | static int pakfire_cgroup_enabled_controllers(struct pakfire_cgroup* cgroup) { | |
475 | return pakfire_cgroup_read_controllers(cgroup, "cgroup.subtree_control"); | |
476 | } | |
477 | ||
478 | /* | |
479 | This function takes a bitmap of controllers that should be enabled. | |
480 | */ | |
481 | static int pakfire_cgroup_enable_controllers(struct pakfire_cgroup* cgroup, | |
482 | enum pakfire_cgroup_controllers controllers) { | |
483 | struct pakfire_cgroup* parent = NULL; | |
484 | int r = 1; | |
485 | ||
486 | // Find all enabled controllers | |
487 | const int enabled_controllers = pakfire_cgroup_enabled_controllers(cgroup); | |
488 | if (enabled_controllers < 0) { | |
489 | ERROR(cgroup->pakfire, "Could not fetch enabled controllers: %m\n"); | |
490 | goto ERROR; | |
9dd11ed0 MT |
491 | } |
492 | ||
2901c3a7 MT |
493 | // Filter out anything that is already enabled |
494 | controllers = (controllers & ~enabled_controllers); | |
9dd11ed0 | 495 | |
2901c3a7 MT |
496 | // Exit if everything is already enabled |
497 | if (!controllers) { | |
498 | DEBUG(cgroup->pakfire, "All controllers are already enabled\n"); | |
499 | return 0; | |
500 | } | |
501 | ||
502 | // Find all available controllers | |
503 | const int available_controllers = pakfire_cgroup_available_controllers(cgroup); | |
504 | if (available_controllers < 0) { | |
505 | ERROR(cgroup->pakfire, "Could not fetch available controllers: %m\n"); | |
506 | goto ERROR; | |
507 | } | |
508 | ||
509 | // Are all controllers we need available, yet? | |
510 | if (controllers & ~available_controllers) { | |
511 | DEBUG(cgroup->pakfire, "Not all controllers are available, yet\n"); | |
512 | ||
513 | parent = pakfire_cgroup_parent(cgroup); | |
514 | ||
515 | // Enable everything we need on the parent group | |
516 | if (parent) { | |
517 | r = pakfire_cgroup_enable_controllers(parent, controllers); | |
518 | if (r) | |
519 | goto ERROR; | |
520 | } | |
521 | } | |
522 | ||
523 | // Determine how many iterations we will need | |
524 | const int iterations = 1 << (sizeof(controllers) * 8 - __builtin_clz(controllers)); | |
525 | ||
526 | // Iterate over all known controllers | |
527 | for (int controller = 1; controller < iterations; controller <<= 1) { | |
528 | // Skip enabling this controller if not requested | |
529 | if (!(controller & controllers)) | |
530 | continue; | |
531 | ||
532 | // Fetch name | |
533 | const char* name = pakfire_cgroup_controller_name(controller); | |
534 | ||
535 | DEBUG(cgroup->pakfire, "Enabling controller %s in cgroup %s\n", | |
536 | name, pakfire_cgroup_name(cgroup)); | |
537 | ||
538 | // Try enabling the controller (this will succeed if it already is enabled) | |
539 | r = pakfire_cgroup_write(cgroup, "cgroup.subtree_control", "+%s\n", name); | |
540 | if (r) { | |
541 | ERROR(cgroup->pakfire, "Could not enable controller %s in cgroup %s\n", | |
542 | name, pakfire_cgroup_name(cgroup)); | |
543 | goto ERROR; | |
544 | } | |
545 | } | |
546 | ||
547 | ERROR: | |
548 | if (parent) | |
549 | pakfire_cgroup_unref(parent); | |
550 | ||
551 | return r; | |
552 | } | |
553 | ||
554 | static int pakfire_cgroup_enable_accounting(struct pakfire_cgroup* cgroup) { | |
555 | // Enable all accounting controllers | |
556 | return pakfire_cgroup_enable_controllers(cgroup, | |
557 | pakfire_cgroup_accounting_controllers); | |
9dd11ed0 MT |
558 | } |
559 | ||
e3ddb498 MT |
560 | /* |
561 | Entry function to open a new cgroup. | |
9dd11ed0 | 562 | |
e3ddb498 MT |
563 | If the cgroup doesn't exist, it will be created including any parent cgroups. |
564 | */ | |
565 | int pakfire_cgroup_open(struct pakfire_cgroup** cgroup, | |
2901c3a7 | 566 | struct pakfire* pakfire, const char* path, int flags) { |
e3ddb498 | 567 | int r = 1; |
9dd11ed0 | 568 | |
e3ddb498 MT |
569 | // Allocate the cgroup struct |
570 | struct pakfire_cgroup* c = calloc(1, sizeof(*c)); | |
571 | if (!c) | |
572 | return 1; | |
9dd11ed0 | 573 | |
e3ddb498 | 574 | DEBUG(pakfire, "Allocated cgroup %s at %p\n", path, c); |
9dd11ed0 | 575 | |
e3ddb498 MT |
576 | // Keep a reference to pakfire |
577 | c->pakfire = pakfire_ref(pakfire); | |
4630031c | 578 | |
e3ddb498 MT |
579 | // Initialize reference counter |
580 | c->nrefs = 1; | |
5ae21aa1 | 581 | |
31d7e29a MT |
582 | // Find the root |
583 | r = pakfire_cgroup_set_root(c); | |
584 | if (r) | |
585 | goto ERROR; | |
586 | ||
e3ddb498 MT |
587 | // Copy path |
588 | pakfire_string_set(c->path, path); | |
5ae21aa1 | 589 | |
2901c3a7 MT |
590 | // Copy flags |
591 | c->flags = flags; | |
592 | ||
e3ddb498 MT |
593 | // Open a file descriptor |
594 | c->fd = __pakfire_cgroup_open(c); | |
e8a18682 MT |
595 | if (c->fd < 0) { |
596 | r = 1; | |
e3ddb498 | 597 | goto ERROR; |
e8a18682 | 598 | } |
4630031c | 599 | |
2901c3a7 MT |
600 | // Enable accounting if requested |
601 | if (pakfire_cgroup_has_flag(c, PAKFIRE_CGROUP_ENABLE_ACCOUNTING)) { | |
602 | r = pakfire_cgroup_enable_accounting(c); | |
603 | if (r) | |
604 | goto ERROR; | |
605 | } | |
606 | ||
d59e29c8 MT |
607 | // Setup the devices filter |
608 | r = pakfire_cgroup_setup_devices(c); | |
609 | if (r) | |
610 | goto ERROR; | |
611 | ||
e3ddb498 | 612 | *cgroup = c; |
4630031c | 613 | return 0; |
4630031c | 614 | |
e3ddb498 MT |
615 | ERROR: |
616 | pakfire_cgroup_free(c); | |
617 | return r; | |
4630031c MT |
618 | } |
619 | ||
e3ddb498 MT |
620 | struct pakfire_cgroup* pakfire_cgroup_ref(struct pakfire_cgroup* cgroup) { |
621 | ++cgroup->nrefs; | |
4630031c | 622 | |
e3ddb498 | 623 | return cgroup; |
1b41d3b1 MT |
624 | } |
625 | ||
e3ddb498 MT |
626 | struct pakfire_cgroup* pakfire_cgroup_unref(struct pakfire_cgroup* cgroup) { |
627 | if (--cgroup->nrefs > 0) | |
628 | return cgroup; | |
1b41d3b1 | 629 | |
e3ddb498 MT |
630 | pakfire_cgroup_free(cgroup); |
631 | return NULL; | |
1b41d3b1 MT |
632 | } |
633 | ||
aca565fc MT |
634 | // Open a child cgroup |
635 | int pakfire_cgroup_child(struct pakfire_cgroup** child, | |
636 | struct pakfire_cgroup* cgroup, const char* name, int flags) { | |
637 | char path[PATH_MAX]; | |
638 | int r; | |
639 | ||
366a3be1 MT |
640 | // Check input |
641 | if (!name) { | |
642 | errno = EINVAL; | |
643 | return 1; | |
644 | } | |
645 | ||
aca565fc MT |
646 | // Join paths |
647 | r = pakfire_path_join(path, cgroup->path, name); | |
56796f84 | 648 | if (r) |
aca565fc MT |
649 | return 1; |
650 | ||
651 | // Open the child group | |
652 | return pakfire_cgroup_open(child, cgroup->pakfire, path, flags); | |
653 | } | |
654 | ||
034ba70e MT |
655 | static int pakfire_cgroup_procs_callback(struct pakfire_cgroup* cgroup, |
656 | int (*callback)(struct pakfire_cgroup* cgroup, pid_t pid, void* data), void* data) { | |
657 | int r = 0; | |
658 | ||
659 | // Check if we have a callback | |
660 | if (!callback) { | |
661 | errno = EINVAL; | |
662 | return 1; | |
663 | } | |
664 | ||
665 | // Open cgroup.procs | |
666 | FILE* f = pakfire_cgroup_open_file(cgroup, "cgroup.procs", "r"); | |
667 | if (!f) | |
668 | return 1; | |
669 | ||
670 | char* line = NULL; | |
671 | size_t l = 0; | |
672 | ||
673 | // Walk through all PIDs | |
674 | while (1) { | |
675 | ssize_t bytes_read = getline(&line, &l, f); | |
676 | if (bytes_read < 0) | |
677 | break; | |
678 | ||
679 | // Parse PID | |
680 | pid_t pid = strtol(line, NULL, 10); | |
681 | ||
682 | // Call callback function | |
683 | r = callback(cgroup, pid, data); | |
684 | if (r) | |
685 | break; | |
686 | } | |
687 | ||
688 | // Cleanup | |
689aa7de MT |
689 | if (line) |
690 | free(line); | |
691 | if (f) | |
692 | fclose(f); | |
034ba70e MT |
693 | |
694 | return r; | |
695 | } | |
696 | ||
697 | static int send_sigkill(struct pakfire_cgroup* cgroup, const pid_t pid, void* data) { | |
698 | DEBUG(cgroup->pakfire, "Sending signal SIGKILL to PID %d\n", pid); | |
699 | ||
700 | int r = kill(pid, SIGKILL); | |
701 | if (r < 0 && errno != ESRCH) { | |
702 | ERROR(cgroup->pakfire, "Could not send signal SIGKILL to PID %d: %m\n", pid); | |
703 | return r; | |
704 | } | |
705 | ||
706 | return r; | |
707 | } | |
708 | ||
e3ddb498 MT |
709 | /* |
710 | Immediately kills all processes in this cgroup | |
711 | */ | |
689aa7de | 712 | static int pakfire_cgroup_killall(struct pakfire_cgroup* cgroup) { |
034ba70e | 713 | DEBUG(cgroup->pakfire, "%s: Killing all processes\n", pakfire_cgroup_name(cgroup)); |
1b41d3b1 | 714 | |
034ba70e MT |
715 | // Do we have support for cgroup.kill? |
716 | int r = pakfire_cgroup_access(cgroup, "cgroup.kill", F_OK, 0); | |
717 | ||
718 | // Fall back to the legacy version | |
719 | if (r && errno == ENOENT) { | |
720 | return pakfire_cgroup_procs_callback(cgroup, send_sigkill, NULL); | |
721 | } | |
722 | ||
723 | return pakfire_cgroup_write(cgroup, "cgroup.kill", "1"); | |
e3ddb498 | 724 | } |
1b41d3b1 | 725 | |
e3ddb498 MT |
726 | /* |
727 | Immediately destroys this cgroup | |
728 | */ | |
729 | int pakfire_cgroup_destroy(struct pakfire_cgroup* cgroup) { | |
730 | int r; | |
1b41d3b1 | 731 | |
01cf6134 MT |
732 | // Cannot call this for the root group |
733 | if (pakfire_cgroup_is_root(cgroup)) { | |
734 | errno = EPERM; | |
735 | return 1; | |
736 | } | |
737 | ||
738 | DEBUG(cgroup->pakfire, "Destroying cgroup %s\n", pakfire_cgroup_name(cgroup)); | |
739 | ||
e3ddb498 MT |
740 | // Kill everything in this group |
741 | r = pakfire_cgroup_killall(cgroup); | |
742 | if (r) | |
743 | return r; | |
1b41d3b1 | 744 | |
e3ddb498 | 745 | // Close the file descriptor |
2868fb2f | 746 | if (cgroup->fd > 0) { |
e3ddb498 MT |
747 | close(cgroup->fd); |
748 | cgroup->fd = 0; | |
d5256224 | 749 | } |
d5256224 | 750 | |
01cf6134 MT |
751 | // Open the root directory |
752 | int fd = pakfire_cgroup_open_root(cgroup); | |
753 | if (fd < 0) | |
754 | return 1; | |
755 | ||
756 | // Delete the directory | |
757 | r = unlinkat(fd, cgroup->path, AT_REMOVEDIR); | |
758 | if (r) | |
759 | ERROR(cgroup->pakfire, "Could not destroy cgroup: %m\n"); | |
760 | ||
761 | // Close fd | |
762 | close(fd); | |
763 | ||
764 | return r; | |
d5256224 | 765 | } |
820c32c7 | 766 | |
e3ddb498 MT |
767 | int pakfire_cgroup_fd(struct pakfire_cgroup* cgroup) { |
768 | return cgroup->fd; | |
820c32c7 | 769 | } |
46dd01c6 MT |
770 | |
771 | // Memory | |
772 | ||
773 | int pakfire_cgroup_set_guaranteed_memory(struct pakfire_cgroup* cgroup, size_t mem) { | |
774 | int r; | |
775 | ||
776 | // Enable memory controller | |
777 | r = pakfire_cgroup_enable_controllers(cgroup, PAKFIRE_CGROUP_CONTROLLER_MEMORY); | |
778 | if (r) | |
779 | return r; | |
780 | ||
781 | DEBUG(cgroup->pakfire, "%s: Setting guaranteed memory to %zu byte(s)\n", | |
782 | pakfire_cgroup_name(cgroup), mem); | |
783 | ||
784 | // Set value | |
785 | r = pakfire_cgroup_write(cgroup, "memory.min", "%zu\n", mem); | |
786 | if (r) | |
787 | ERROR(cgroup->pakfire, "%s: Could not set guaranteed memory: %m\n", | |
788 | pakfire_cgroup_name(cgroup)); | |
789 | ||
790 | return r; | |
791 | } | |
792 | ||
793 | int pakfire_cgroup_set_memory_limit(struct pakfire_cgroup* cgroup, size_t mem) { | |
794 | int r; | |
795 | ||
796 | // Enable memory controller | |
797 | r = pakfire_cgroup_enable_controllers(cgroup, PAKFIRE_CGROUP_CONTROLLER_MEMORY); | |
798 | if (r) | |
799 | return r; | |
800 | ||
801 | DEBUG(cgroup->pakfire, "%s: Setting memory limit to %zu byte(s)\n", | |
802 | pakfire_cgroup_name(cgroup), mem); | |
803 | ||
804 | // Set value | |
805 | r = pakfire_cgroup_write(cgroup, "memory.max", "%zu\n", mem); | |
806 | if (r) | |
807 | ERROR(cgroup->pakfire, "%s: Could not set memory limit: %m\n", | |
808 | pakfire_cgroup_name(cgroup)); | |
809 | ||
810 | return r; | |
811 | } | |
d3b93302 MT |
812 | |
813 | // PIDs | |
814 | ||
815 | int pakfire_cgroup_set_pid_limit(struct pakfire_cgroup* cgroup, size_t limit) { | |
816 | int r; | |
817 | ||
818 | // Enable PID controller | |
819 | r = pakfire_cgroup_enable_controllers(cgroup, PAKFIRE_CGROUP_CONTROLLER_PIDS); | |
820 | if (r) | |
821 | return r; | |
822 | ||
823 | DEBUG(cgroup->pakfire, "%s: Setting PID limit to %zu\n", | |
824 | pakfire_cgroup_name(cgroup), limit); | |
825 | ||
826 | // Set value | |
827 | r = pakfire_cgroup_write(cgroup, "pids.max", "%zu\n", limit); | |
828 | if (r) | |
829 | ERROR(cgroup->pakfire, "%s: Could not set PID limit: %m\n", | |
830 | pakfire_cgroup_name(cgroup)); | |
831 | ||
832 | return r; | |
833 | } | |
6b7cf275 MT |
834 | |
835 | // Stats | |
836 | ||
837 | static int __pakfire_cgroup_read_stats_line(struct pakfire_cgroup* cgroup, | |
838 | int (*callback)(struct pakfire_cgroup* cgroup, const char* key, unsigned long val, void* data), | |
839 | void* data, char* line) { | |
840 | char* p = NULL; | |
841 | ||
842 | DEBUG(cgroup->pakfire, "Parsing line: %s\n", line); | |
843 | ||
844 | char key[NAME_MAX]; | |
845 | unsigned long val = 0; | |
846 | ||
847 | // Number of the field | |
848 | int i = 0; | |
849 | ||
850 | char* elem = strtok_r(line, " ", &p); | |
851 | while (elem) { | |
852 | switch (i++) { | |
853 | // First field is the key | |
854 | case 0: | |
855 | // Copy the key | |
856 | pakfire_string_set(key, elem); | |
857 | break; | |
858 | ||
859 | // The second field is some value | |
860 | case 1: | |
861 | val = strtoul(elem, NULL, 10); | |
862 | break; | |
863 | ||
864 | // Ignore the rest | |
865 | default: | |
866 | DEBUG(cgroup->pakfire, "%s: Unknown value in cgroup stats (%d): %s\n", | |
867 | pakfire_cgroup_name(cgroup), i, elem); | |
868 | break; | |
869 | } | |
870 | ||
871 | elem = strtok_r(NULL, " ", &p); | |
872 | } | |
873 | ||
874 | // Check if we parsed both fields | |
875 | if (i < 2) { | |
876 | ERROR(cgroup->pakfire, "Could not parse line\n"); | |
877 | return 1; | |
878 | } | |
879 | ||
880 | // Call the callback | |
881 | return callback(cgroup, key, val, data); | |
882 | } | |
883 | ||
884 | static int __pakfire_cgroup_read_stats(struct pakfire_cgroup* cgroup, const char* path, | |
885 | int (*callback)(struct pakfire_cgroup* cgroup, const char* key, unsigned long val, void* data), | |
886 | void* data) { | |
887 | char* p = NULL; | |
888 | int r; | |
889 | ||
890 | char buffer[BUFFER_SIZE]; | |
891 | ||
892 | DEBUG(cgroup->pakfire, "%s: Reading stats from %s\n", pakfire_cgroup_name(cgroup), path); | |
893 | ||
894 | // Open the file | |
895 | r = pakfire_cgroup_read(cgroup, path, buffer, sizeof(buffer)); | |
896 | if (r < 0) | |
897 | goto ERROR; | |
898 | ||
899 | char* line = strtok_r(buffer, "\n", &p); | |
900 | while (line) { | |
901 | // Parse the line | |
902 | r = __pakfire_cgroup_read_stats_line(cgroup, callback, data, line); | |
903 | if (r) | |
904 | goto ERROR; | |
905 | ||
906 | // Move to the next line | |
907 | line = strtok_r(NULL, "\n", &p); | |
908 | } | |
909 | ||
910 | ERROR: | |
911 | return r; | |
912 | } | |
913 | ||
914 | struct pakfire_cgroup_stat_entry { | |
915 | const char* key; | |
916 | unsigned long* val; | |
917 | }; | |
918 | ||
919 | static int __pakfire_cgroup_parse_cpu_stats(struct pakfire_cgroup* cgroup, | |
920 | const char* key, unsigned long val, void* data) { | |
921 | struct pakfire_cgroup_cpu_stats* stats = (struct pakfire_cgroup_cpu_stats*)data; | |
922 | ||
923 | const struct pakfire_cgroup_stat_entry entries[] = { | |
924 | { "system_usec", &stats->system_usec }, | |
925 | { "usage_usec", &stats->usage_usec }, | |
926 | { "user_usec", &stats->user_usec }, | |
927 | { NULL, NULL }, | |
928 | }; | |
929 | // Find and store value | |
930 | for (const struct pakfire_cgroup_stat_entry* entry = entries; entry->key; entry++) { | |
931 | if (strcmp(entry->key, key) == 0) { | |
932 | *entry->val = val; | |
933 | return 0; | |
934 | } | |
935 | } | |
936 | ||
a8a41064 | 937 | DEBUG(cgroup->pakfire, "Unknown key for CPU stats: %s = %lu\n", key, val); |
6b7cf275 MT |
938 | |
939 | return 0; | |
940 | } | |
941 | ||
942 | static int __pakfire_cgroup_parse_memory_stats(struct pakfire_cgroup* cgroup, | |
943 | const char* key, unsigned long val, void* data) { | |
944 | struct pakfire_cgroup_memory_stats* stats = (struct pakfire_cgroup_memory_stats*)data; | |
945 | ||
946 | const struct pakfire_cgroup_stat_entry entries[] = { | |
947 | { "anon", &stats->anon }, | |
948 | { "file", &stats->file }, | |
949 | { "kernel", &stats->kernel }, | |
950 | { "kernel_stack", &stats->kernel_stack }, | |
951 | { "pagetables", &stats->pagetables }, | |
952 | { "percpu", &stats->percpu }, | |
953 | { "sock", &stats->sock }, | |
954 | { "vmalloc", &stats->vmalloc }, | |
955 | { "shmem", &stats->shmem }, | |
956 | { "zswap", &stats->zswap }, | |
957 | { "zswapped", &stats->zswapped }, | |
958 | { "file_mapped", &stats->file_mapped }, | |
959 | { "file_dirty", &stats->file_dirty }, | |
960 | { "file_writeback", &stats->file_writeback }, | |
961 | { "swapcached", &stats->swapcached }, | |
962 | { "anon_thp", &stats->anon_thp }, | |
963 | { "file_thp", &stats->file_thp }, | |
964 | { "shmem_thp", &stats->shmem_thp }, | |
965 | { "inactive_anon", &stats->inactive_anon }, | |
966 | { "active_anon", &stats->active_anon }, | |
967 | { "inactive_file", &stats->inactive_file }, | |
968 | { "active_file", &stats->active_file }, | |
969 | { "unevictable", &stats->unevictable }, | |
970 | { "slab_reclaimable", &stats->slab_reclaimable }, | |
971 | { "slab_unreclaimable", &stats->slab_unreclaimable }, | |
972 | { "slab", &stats->slab }, | |
973 | { "workingset_refault_anon", &stats->workingset_refault_anon }, | |
974 | { "workingset_refault_file", &stats->workingset_refault_file }, | |
975 | { "workingset_activate_anon", &stats->workingset_activate_anon }, | |
976 | { "workingset_activate_file", &stats->workingset_activate_file }, | |
977 | { "workingset_restore_anon", &stats->workingset_restore_anon }, | |
978 | { "workingset_restore_file", &stats->workingset_restore_file }, | |
979 | { "workingset_nodereclaim", &stats->workingset_nodereclaim }, | |
980 | { "pgfault", &stats->pgfault }, | |
981 | { "pgmajfault", &stats->pgmajfault }, | |
982 | { "pgrefill", &stats->pgrefill }, | |
983 | { "pgscan", &stats->pgscan }, | |
984 | { "pgsteal", &stats->pgsteal }, | |
985 | { "pgactivate", &stats->pgactivate }, | |
986 | { "pgdeactivate", &stats->pgdeactivate }, | |
987 | { "pglazyfree", &stats->pglazyfree }, | |
988 | { "pglazyfreed", &stats->pglazyfreed }, | |
989 | { "thp_fault_alloc", &stats->thp_fault_alloc }, | |
990 | { "thp_collapse_alloc", &stats->thp_collapse_alloc }, | |
991 | { NULL, NULL }, | |
992 | }; | |
993 | ||
994 | // Find and store value | |
995 | for (const struct pakfire_cgroup_stat_entry* entry = entries; entry->key; entry++) { | |
996 | if (strcmp(entry->key, key) == 0) { | |
997 | *entry->val = val; | |
998 | return 0; | |
999 | } | |
1000 | } | |
1001 | ||
1002 | // Log any unknown keys | |
a8a41064 | 1003 | DEBUG(cgroup->pakfire, "Unknown key for memory stats: %s = %lu\n", key, val); |
6b7cf275 MT |
1004 | |
1005 | return 0; | |
1006 | } | |
1007 | ||
1008 | int pakfire_cgroup_stat(struct pakfire_cgroup* cgroup, | |
1009 | struct pakfire_cgroup_stats* stats) { | |
1010 | int r; | |
1011 | ||
1012 | // Check input | |
1013 | if (!stats) { | |
1014 | errno = EINVAL; | |
1015 | return 1; | |
1016 | } | |
1017 | ||
1018 | // Read CPU stats | |
1019 | r = __pakfire_cgroup_read_stats(cgroup, "cpu.stat", | |
1020 | __pakfire_cgroup_parse_cpu_stats, &stats->cpu); | |
1021 | if (r) | |
1022 | goto ERROR; | |
1023 | ||
1024 | // Read memory stats | |
1025 | r = __pakfire_cgroup_read_stats(cgroup, "memory.stat", | |
1026 | __pakfire_cgroup_parse_memory_stats, &stats->memory); | |
1027 | if (r) | |
1028 | goto ERROR; | |
1029 | ||
1030 | ERROR: | |
1031 | if (r) | |
1032 | ERROR(cgroup->pakfire, "%s: Could not read cgroup stats: %m\n", | |
1033 | pakfire_cgroup_name(cgroup)); | |
1034 | ||
1035 | return r; | |
1036 | } | |
1037 | ||
1038 | int pakfire_cgroup_stat_dump(struct pakfire_cgroup* cgroup, | |
1039 | const struct pakfire_cgroup_stats* stats) { | |
1040 | // Check input | |
1041 | if (!stats) { | |
1042 | errno = EINVAL; | |
1043 | return 1; | |
1044 | } | |
1045 | ||
1046 | DEBUG(cgroup->pakfire, "%s: Total CPU time usage: %lu\n", | |
1047 | pakfire_cgroup_name(cgroup), stats->cpu.usage_usec); | |
1048 | ||
1049 | return 0; | |
1050 | } |