]>
Commit | Line | Data |
---|---|---|
9dd11ed0 MT |
1 | /*############################################################################# |
2 | # # | |
3 | # Pakfire - The IPFire package management system # | |
e3ddb498 | 4 | # Copyright (C) 2022 Pakfire development team # |
9dd11ed0 MT |
5 | # # |
6 | # This program is free software: you can redistribute it and/or modify # | |
7 | # it under the terms of the GNU General Public License as published by # | |
8 | # the Free Software Foundation, either version 3 of the License, or # | |
9 | # (at your option) any later version. # | |
10 | # # | |
11 | # This program is distributed in the hope that it will be useful, # | |
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # | |
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # | |
14 | # GNU General Public License for more details. # | |
15 | # # | |
16 | # You should have received a copy of the GNU General Public License # | |
17 | # along with this program. If not, see <http://www.gnu.org/licenses/>. # | |
18 | # # | |
19 | #############################################################################*/ | |
20 | ||
21 | #include <errno.h> | |
e3ddb498 | 22 | #include <fcntl.h> |
d59e29c8 | 23 | #include <linux/bpf.h> |
034ba70e | 24 | #include <signal.h> |
9dd11ed0 | 25 | #include <stdlib.h> |
9dd11ed0 | 26 | #include <sys/types.h> |
9dd11ed0 | 27 | |
d59e29c8 MT |
28 | // libbpf |
29 | #include <bpf/bpf.h> | |
30 | ||
9dd11ed0 MT |
31 | #include <pakfire/cgroup.h> |
32 | #include <pakfire/logging.h> | |
e3ddb498 | 33 | #include <pakfire/pakfire.h> |
d973a13d | 34 | #include <pakfire/string.h> |
9dd11ed0 MT |
35 | #include <pakfire/util.h> |
36 | ||
18684c4d | 37 | static __thread char bpf_log_buffer[BPF_LOG_BUF_SIZE]; |
d59e29c8 | 38 | |
e3ddb498 | 39 | #define BUFFER_SIZE 64 * 1024 |
9dd11ed0 | 40 | |
d59e29c8 MT |
41 | // Short form of mov, dst_reg = src_reg |
42 | #define BPF_MOV64_IMM(DST, IMM) \ | |
43 | ((struct bpf_insn){ \ | |
44 | .code = BPF_ALU64 | BPF_MOV | BPF_K, \ | |
45 | .dst_reg = DST, \ | |
46 | .src_reg = 0, \ | |
47 | .off = 0, \ | |
48 | .imm = IMM \ | |
49 | }) | |
50 | ||
51 | // Program exit | |
52 | #define BPF_EXIT_INSN() \ | |
53 | ((struct bpf_insn){ \ | |
54 | .code = BPF_JMP | BPF_EXIT, \ | |
55 | .dst_reg = 0, \ | |
56 | .src_reg = 0, \ | |
57 | .off = 0, \ | |
58 | .imm = 0 \ | |
59 | }) | |
60 | ||
2901c3a7 MT |
61 | enum pakfire_cgroup_controllers { |
62 | PAKFIRE_CGROUP_CONTROLLER_CPU = (1 << 0), | |
63 | PAKFIRE_CGROUP_CONTROLLER_MEMORY = (1 << 1), | |
64 | PAKFIRE_CGROUP_CONTROLLER_PIDS = (1 << 2), | |
65 | PAKFIRE_CGROUP_CONTROLLER_IO = (1 << 3), | |
66 | }; | |
67 | ||
68 | static const enum pakfire_cgroup_controllers pakfire_cgroup_accounting_controllers = | |
69 | PAKFIRE_CGROUP_CONTROLLER_CPU | | |
70 | PAKFIRE_CGROUP_CONTROLLER_MEMORY | | |
71 | PAKFIRE_CGROUP_CONTROLLER_PIDS | | |
72 | PAKFIRE_CGROUP_CONTROLLER_IO; | |
73 | ||
e3ddb498 MT |
74 | struct pakfire_cgroup { |
75 | struct pakfire* pakfire; | |
76 | int nrefs; | |
9dd11ed0 | 77 | |
31d7e29a MT |
78 | // Store the root path |
79 | char root[PATH_MAX]; | |
80 | ||
2901c3a7 MT |
81 | // Flags |
82 | int flags; | |
83 | ||
e3ddb498 MT |
84 | // Store the path |
85 | char path[PATH_MAX]; | |
9dd11ed0 | 86 | |
e3ddb498 MT |
87 | // File descriptor to cgroup |
88 | int fd; | |
d59e29c8 MT |
89 | |
90 | // FD to the devices filter program | |
91 | int devicesfd; | |
e3ddb498 | 92 | }; |
9e1e7985 | 93 | |
e3ddb498 MT |
94 | // Returns true if this is the root cgroup |
95 | static int pakfire_cgroup_is_root(struct pakfire_cgroup* cgroup) { | |
96 | return !*cgroup->path; | |
97 | } | |
9e1e7985 | 98 | |
2901c3a7 MT |
99 | static int pakfire_cgroup_has_flag(struct pakfire_cgroup* cgroup, int flag) { |
100 | return cgroup->flags & flag; | |
101 | } | |
102 | ||
31d7e29a MT |
103 | static int pakfire_cgroup_set_root(struct pakfire_cgroup* cgroup) { |
104 | int r; | |
105 | ||
106 | // Find the current UID | |
107 | const uid_t uid = getuid(); | |
108 | ||
109 | switch (uid) { | |
110 | // root | |
111 | case 0: | |
112 | r = pakfire_string_set(cgroup->root, "/sys/fs/cgroup"); | |
98eefc02 | 113 | break; |
31d7e29a MT |
114 | |
115 | // unprivileged users | |
116 | default: | |
117 | r = pakfire_string_format(cgroup->root, | |
118 | "/sys/fs/cgroup/user.slice/user-%d.slice/user@%d.service", uid, uid); | |
98eefc02 | 119 | break; |
31d7e29a MT |
120 | } |
121 | ||
122 | if (r) | |
123 | ERROR(cgroup->pakfire, "Could not determine cgroup root: %m\n"); | |
124 | ||
125 | return r; | |
126 | } | |
127 | ||
e3ddb498 MT |
128 | static const char* pakfire_cgroup_name(struct pakfire_cgroup* cgroup) { |
129 | if (pakfire_cgroup_is_root(cgroup)) | |
130 | return "(root)"; | |
9e1e7985 | 131 | |
e3ddb498 | 132 | return cgroup->path; |
9e1e7985 MT |
133 | } |
134 | ||
2901c3a7 MT |
135 | static const char* pakfire_cgroup_controller_name( |
136 | enum pakfire_cgroup_controllers controller) { | |
137 | switch (controller) { | |
138 | case PAKFIRE_CGROUP_CONTROLLER_CPU: | |
139 | return "cpu"; | |
140 | ||
141 | case PAKFIRE_CGROUP_CONTROLLER_MEMORY: | |
142 | return "memory"; | |
143 | ||
144 | case PAKFIRE_CGROUP_CONTROLLER_PIDS: | |
145 | return "pids"; | |
146 | ||
147 | case PAKFIRE_CGROUP_CONTROLLER_IO: | |
148 | return "io"; | |
149 | } | |
150 | ||
151 | return NULL; | |
152 | } | |
153 | ||
154 | static enum pakfire_cgroup_controllers pakfire_cgroup_find_controller_by_name( | |
155 | const char* name) { | |
156 | const char* n = NULL; | |
157 | ||
158 | // Walk through the bitmap | |
159 | for (unsigned int i = 1; i; i <<= 1) { | |
160 | n = pakfire_cgroup_controller_name(i); | |
161 | if (!n) | |
162 | break; | |
163 | ||
164 | // Match | |
165 | if (strcmp(name, n) == 0) | |
166 | return i; | |
167 | } | |
168 | ||
169 | // Nothing found | |
170 | return 0; | |
171 | } | |
172 | ||
173 | static struct pakfire_cgroup* pakfire_cgroup_parent(struct pakfire_cgroup* cgroup) { | |
174 | struct pakfire_cgroup* parent = NULL; | |
175 | int r; | |
176 | ||
177 | // Cannot return parent for root group | |
178 | if (pakfire_cgroup_is_root(cgroup)) | |
179 | return NULL; | |
180 | ||
181 | // Determine the path of the parent | |
bbbc9842 | 182 | const char* path = pakfire_dirname(cgroup->path); |
2901c3a7 MT |
183 | if (!path) { |
184 | ERROR(cgroup->pakfire, "Could not determine path for parent cgroup: %m\n"); | |
185 | return NULL; | |
186 | } | |
187 | ||
188 | // dirname() returns . if no directory component could be found | |
189 | if (strcmp(path, ".") == 0) | |
bbbc9842 | 190 | path = NULL; |
2901c3a7 MT |
191 | |
192 | // Open the cgroup | |
193 | r = pakfire_cgroup_open(&parent, cgroup->pakfire, path, 0); | |
194 | if (r) { | |
195 | ERROR(cgroup->pakfire, "Could not open parent cgroup: %m\n"); | |
196 | parent = NULL; | |
197 | } | |
198 | ||
2901c3a7 MT |
199 | return parent; |
200 | } | |
201 | ||
e3ddb498 MT |
202 | static void pakfire_cgroup_free(struct pakfire_cgroup* cgroup) { |
203 | DEBUG(cgroup->pakfire, "Releasing cgroup %s at %p\n", | |
204 | pakfire_cgroup_name(cgroup), cgroup); | |
9e1e7985 | 205 | |
d59e29c8 | 206 | // Close the file descriptors |
2868fb2f | 207 | if (cgroup->fd > 0) |
e3ddb498 | 208 | close(cgroup->fd); |
d59e29c8 MT |
209 | if (cgroup->devicesfd > 0) |
210 | close(cgroup->devicesfd); | |
9e1e7985 | 211 | |
e3ddb498 MT |
212 | pakfire_unref(cgroup->pakfire); |
213 | free(cgroup); | |
9e1e7985 MT |
214 | } |
215 | ||
d59e29c8 MT |
216 | static int pakfire_cgroup_setup_devices(struct pakfire_cgroup* cgroup) { |
217 | LIBBPF_OPTS(bpf_prog_load_opts, opts, | |
218 | // Log Buffer | |
219 | .log_buf = bpf_log_buffer, | |
220 | .log_size = sizeof(bpf_log_buffer), | |
221 | ); | |
222 | int r; | |
223 | ||
224 | struct bpf_insn program[] = { | |
225 | BPF_MOV64_IMM(BPF_REG_0, 1), // r0 = 1 | |
226 | BPF_EXIT_INSN(), // return r0 | |
227 | }; | |
228 | ||
229 | // Load the BPF program | |
230 | r = bpf_prog_load(BPF_PROG_TYPE_CGROUP_DEVICE, NULL, "GPL", | |
231 | program, sizeof(program) / sizeof(*program), &opts); | |
232 | if (r < 0) { | |
233 | ERROR(cgroup->pakfire, "Could not load BPF program: %m\n"); | |
234 | return r; | |
235 | } | |
236 | ||
237 | // Store the file descriptor | |
238 | cgroup->devicesfd = r; | |
239 | ||
240 | // Attach the program to the cgroup | |
241 | r = bpf_prog_attach(cgroup->devicesfd, cgroup->fd, | |
242 | BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI); | |
243 | if (r) { | |
244 | ERROR(cgroup->pakfire, "Could not attach BPF program to cgroup: %m\n"); | |
245 | return r; | |
246 | } | |
247 | ||
248 | return 0; | |
249 | } | |
250 | ||
01cf6134 | 251 | static int pakfire_cgroup_open_root(struct pakfire_cgroup* cgroup) { |
31d7e29a | 252 | int fd = open(cgroup->root, O_DIRECTORY|O_PATH|O_CLOEXEC); |
01cf6134 | 253 | if (fd < 0) { |
31d7e29a | 254 | ERROR(cgroup->pakfire, "Could not open %s: %m\n", cgroup->root); |
01cf6134 MT |
255 | return -1; |
256 | } | |
257 | ||
258 | return fd; | |
259 | } | |
260 | ||
e3ddb498 MT |
261 | static int __pakfire_cgroup_create(struct pakfire_cgroup* cgroup) { |
262 | char path[PATH_MAX]; | |
263 | int r; | |
9dd11ed0 | 264 | |
e3ddb498 | 265 | DEBUG(cgroup->pakfire, "Trying to create cgroup %s\n", pakfire_cgroup_name(cgroup)); |
9dd11ed0 | 266 | |
e3ddb498 | 267 | // Compose the absolute path |
31d7e29a | 268 | r = pakfire_path_join(path, cgroup->root, cgroup->path); |
56796f84 | 269 | if (r) |
e3ddb498 | 270 | return 1; |
9dd11ed0 | 271 | |
e3ddb498 MT |
272 | // Try creating the directory |
273 | return pakfire_mkdir(path, 0755); | |
274 | } | |
9dd11ed0 | 275 | |
e3ddb498 MT |
276 | /* |
277 | Opens the cgroup and returns a file descriptor. | |
9dd11ed0 | 278 | |
e3ddb498 | 279 | If the cgroup does not exist, it will try to create it. |
9dd11ed0 | 280 | |
e3ddb498 MT |
281 | This function returns a negative value on error. |
282 | */ | |
283 | static int __pakfire_cgroup_open(struct pakfire_cgroup* cgroup) { | |
e3ddb498 MT |
284 | int fd = -1; |
285 | int r; | |
9dd11ed0 | 286 | |
e3ddb498 | 287 | // Open file descriptor of the cgroup root |
01cf6134 MT |
288 | int rootfd = pakfire_cgroup_open_root(cgroup); |
289 | if (rootfd < 0) | |
e3ddb498 | 290 | return -1; |
9dd11ed0 | 291 | |
e3ddb498 MT |
292 | // Return the rootfd for the root group |
293 | if (pakfire_cgroup_is_root(cgroup)) | |
294 | return rootfd; | |
9dd11ed0 | 295 | |
e3ddb498 MT |
296 | RETRY: |
297 | fd = openat(rootfd, cgroup->path, O_DIRECTORY|O_PATH|O_CLOEXEC); | |
298 | if (fd < 0) { | |
299 | switch (errno) { | |
300 | // If the cgroup doesn't exist yet, try to create it | |
301 | case ENOENT: | |
302 | r = __pakfire_cgroup_create(cgroup); | |
303 | if (r) | |
304 | goto ERROR; | |
9dd11ed0 | 305 | |
e3ddb498 MT |
306 | // Retry open after successful creation |
307 | goto RETRY; | |
9dd11ed0 | 308 | |
e3ddb498 MT |
309 | // Exit on all other errors |
310 | default: | |
311 | ERROR(cgroup->pakfire, "Could not open cgroup %s: %m\n", | |
312 | pakfire_cgroup_name(cgroup)); | |
313 | goto ERROR; | |
314 | } | |
315 | } | |
9dd11ed0 | 316 | |
e3ddb498 MT |
317 | ERROR: |
318 | if (rootfd > 0) | |
319 | close(rootfd); | |
9dd11ed0 | 320 | |
e3ddb498 MT |
321 | return fd; |
322 | } | |
9dd11ed0 | 323 | |
034ba70e MT |
324 | static int pakfire_cgroup_access(struct pakfire_cgroup* cgroup, const char* path, |
325 | int mode, int flags) { | |
326 | return faccessat(cgroup->fd, path, mode, flags); | |
327 | } | |
328 | ||
329 | static FILE* pakfire_cgroup_open_file(struct pakfire_cgroup* cgroup, | |
330 | const char* path, const char* mode) { | |
331 | FILE* f = NULL; | |
332 | ||
333 | // Open cgroup.procs | |
334 | int fd = openat(cgroup->fd, "cgroup.procs", O_CLOEXEC); | |
335 | if (fd < 0) { | |
336 | ERROR(cgroup->pakfire, "%s: Could not open %s: %m\n", | |
337 | pakfire_cgroup_name(cgroup), path); | |
338 | goto ERROR; | |
339 | } | |
340 | ||
341 | // Convert into file handle | |
342 | f = fdopen(fd, mode); | |
343 | if (!f) | |
344 | goto ERROR; | |
345 | ||
346 | ERROR: | |
2868fb2f | 347 | if (fd > 0) |
034ba70e MT |
348 | close(fd); |
349 | ||
350 | return f; | |
351 | } | |
352 | ||
2901c3a7 | 353 | static ssize_t pakfire_cgroup_read(struct pakfire_cgroup* cgroup, const char* path, |
e3ddb498 | 354 | char* buffer, size_t length) { |
2901c3a7 | 355 | ssize_t bytes_read = -1; |
9dd11ed0 | 356 | |
e3ddb498 MT |
357 | // Check if this cgroup has been destroyed already |
358 | if (!cgroup->fd) { | |
359 | ERROR(cgroup->pakfire, "Trying to read from destroyed cgroup\n"); | |
2901c3a7 | 360 | return -1; |
9dd11ed0 MT |
361 | } |
362 | ||
e3ddb498 MT |
363 | // Open the file |
364 | int fd = openat(cgroup->fd, path, O_CLOEXEC); | |
365 | if (fd < 0) { | |
366 | DEBUG(cgroup->pakfire, "Could not open %s/%s: %m\n", | |
367 | pakfire_cgroup_name(cgroup), path); | |
368 | goto ERROR; | |
369 | } | |
9dd11ed0 | 370 | |
e3ddb498 | 371 | // Read file content into buffer |
2901c3a7 MT |
372 | bytes_read = read(fd, buffer, length); |
373 | if (bytes_read < 0) { | |
e3ddb498 MT |
374 | DEBUG(cgroup->pakfire, "Could not read from %s/%s: %m\n", |
375 | pakfire_cgroup_name(cgroup), path); | |
376 | goto ERROR; | |
377 | } | |
9dd11ed0 | 378 | |
2901c3a7 | 379 | // Terminate the buffer |
cc7f589e | 380 | if ((size_t)bytes_read < length) |
2901c3a7 | 381 | buffer[bytes_read] = '\0'; |
9dd11ed0 | 382 | |
e3ddb498 MT |
383 | ERROR: |
384 | if (fd > 0) | |
385 | close(fd); | |
9dd11ed0 | 386 | |
2901c3a7 | 387 | return bytes_read; |
9dd11ed0 MT |
388 | } |
389 | ||
e3ddb498 MT |
390 | static int pakfire_cgroup_write(struct pakfire_cgroup* cgroup, |
391 | const char* path, const char* format, ...) { | |
305de320 | 392 | va_list args; |
e3ddb498 MT |
393 | int r = 0; |
394 | ||
395 | // Check if this cgroup has been destroyed already | |
396 | if (!cgroup->fd) { | |
397 | ERROR(cgroup->pakfire, "Trying to write to destroyed cgroup\n"); | |
398 | errno = EPERM; | |
399 | return 1; | |
400 | } | |
9dd11ed0 | 401 | |
e3ddb498 MT |
402 | // Open the file |
403 | int fd = openat(cgroup->fd, path, O_WRONLY|O_CLOEXEC); | |
404 | if (fd < 0) { | |
405 | DEBUG(cgroup->pakfire, "Could not open %s/%s for writing: %m\n", | |
406 | pakfire_cgroup_name(cgroup), path); | |
9dd11ed0 | 407 | return 1; |
e3ddb498 | 408 | } |
9dd11ed0 | 409 | |
e3ddb498 | 410 | // Write buffer |
305de320 | 411 | va_start(args, format); |
e3ddb498 | 412 | ssize_t bytes_written = vdprintf(fd, format, args); |
305de320 MT |
413 | va_end(args); |
414 | ||
e3ddb498 MT |
415 | // Check if content was written okay |
416 | if (bytes_written < 0) { | |
417 | DEBUG(cgroup->pakfire, "Could not write to %s/%s: %m\n", | |
418 | pakfire_cgroup_name(cgroup), path); | |
419 | r = 1; | |
420 | } | |
820c32c7 | 421 | |
e3ddb498 MT |
422 | // Close fd |
423 | close(fd); | |
305de320 MT |
424 | |
425 | return r; | |
426 | } | |
427 | ||
2901c3a7 MT |
428 | static int pakfire_cgroup_read_controllers( |
429 | struct pakfire_cgroup* cgroup, const char* name) { | |
430 | char buffer[BUFFER_SIZE]; | |
431 | char* p = NULL; | |
9dd11ed0 | 432 | |
2901c3a7 MT |
433 | // Discovered controllers |
434 | int controllers = 0; | |
69cfa22d | 435 | |
2901c3a7 MT |
436 | // Read cgroup.controllers file |
437 | ssize_t bytes_read = pakfire_cgroup_read(cgroup, name, buffer, sizeof(buffer)); | |
438 | if (bytes_read < 0) | |
439 | return -1; | |
440 | ||
441 | // If the file was empty, there is nothing more to do | |
442 | if (bytes_read == 0) | |
443 | return 0; | |
444 | ||
445 | char* token = strtok_r(buffer, " \n", &p); | |
446 | ||
447 | while (token) { | |
448 | DEBUG(cgroup->pakfire, "Found controller '%s'\n", token); | |
449 | ||
450 | // Try finding this controller | |
451 | int controller = pakfire_cgroup_find_controller_by_name(token); | |
452 | if (controller) | |
453 | controllers |= controller; | |
454 | ||
455 | // Move on to next token | |
456 | token = strtok_r(NULL, " \n", &p); | |
9dd11ed0 MT |
457 | } |
458 | ||
2901c3a7 MT |
459 | // Return discovered controllers |
460 | return controllers; | |
461 | } | |
9dd11ed0 | 462 | |
2901c3a7 MT |
463 | /* |
464 | Returns a bitmap of all available controllers | |
465 | */ | |
466 | static int pakfire_cgroup_available_controllers(struct pakfire_cgroup* cgroup) { | |
467 | return pakfire_cgroup_read_controllers(cgroup, "cgroup.controllers"); | |
468 | } | |
469 | ||
470 | /* | |
471 | Returns a bitmap of all enabled controllers | |
472 | */ | |
473 | static int pakfire_cgroup_enabled_controllers(struct pakfire_cgroup* cgroup) { | |
474 | return pakfire_cgroup_read_controllers(cgroup, "cgroup.subtree_control"); | |
475 | } | |
476 | ||
477 | /* | |
478 | This function takes a bitmap of controllers that should be enabled. | |
479 | */ | |
480 | static int pakfire_cgroup_enable_controllers(struct pakfire_cgroup* cgroup, | |
481 | enum pakfire_cgroup_controllers controllers) { | |
482 | struct pakfire_cgroup* parent = NULL; | |
483 | int r = 1; | |
484 | ||
485 | // Find all enabled controllers | |
486 | const int enabled_controllers = pakfire_cgroup_enabled_controllers(cgroup); | |
487 | if (enabled_controllers < 0) { | |
488 | ERROR(cgroup->pakfire, "Could not fetch enabled controllers: %m\n"); | |
489 | goto ERROR; | |
9dd11ed0 MT |
490 | } |
491 | ||
2901c3a7 MT |
492 | // Filter out anything that is already enabled |
493 | controllers = (controllers & ~enabled_controllers); | |
9dd11ed0 | 494 | |
2901c3a7 MT |
495 | // Exit if everything is already enabled |
496 | if (!controllers) { | |
497 | DEBUG(cgroup->pakfire, "All controllers are already enabled\n"); | |
498 | return 0; | |
499 | } | |
500 | ||
501 | // Find all available controllers | |
502 | const int available_controllers = pakfire_cgroup_available_controllers(cgroup); | |
503 | if (available_controllers < 0) { | |
504 | ERROR(cgroup->pakfire, "Could not fetch available controllers: %m\n"); | |
505 | goto ERROR; | |
506 | } | |
507 | ||
508 | // Are all controllers we need available, yet? | |
509 | if (controllers & ~available_controllers) { | |
510 | DEBUG(cgroup->pakfire, "Not all controllers are available, yet\n"); | |
511 | ||
512 | parent = pakfire_cgroup_parent(cgroup); | |
513 | ||
514 | // Enable everything we need on the parent group | |
515 | if (parent) { | |
516 | r = pakfire_cgroup_enable_controllers(parent, controllers); | |
517 | if (r) | |
518 | goto ERROR; | |
519 | } | |
520 | } | |
521 | ||
522 | // Determine how many iterations we will need | |
523 | const int iterations = 1 << (sizeof(controllers) * 8 - __builtin_clz(controllers)); | |
524 | ||
525 | // Iterate over all known controllers | |
526 | for (int controller = 1; controller < iterations; controller <<= 1) { | |
527 | // Skip enabling this controller if not requested | |
528 | if (!(controller & controllers)) | |
529 | continue; | |
530 | ||
531 | // Fetch name | |
532 | const char* name = pakfire_cgroup_controller_name(controller); | |
533 | ||
534 | DEBUG(cgroup->pakfire, "Enabling controller %s in cgroup %s\n", | |
535 | name, pakfire_cgroup_name(cgroup)); | |
536 | ||
537 | // Try enabling the controller (this will succeed if it already is enabled) | |
538 | r = pakfire_cgroup_write(cgroup, "cgroup.subtree_control", "+%s\n", name); | |
539 | if (r) { | |
540 | ERROR(cgroup->pakfire, "Could not enable controller %s in cgroup %s\n", | |
541 | name, pakfire_cgroup_name(cgroup)); | |
542 | goto ERROR; | |
543 | } | |
544 | } | |
545 | ||
546 | ERROR: | |
547 | if (parent) | |
548 | pakfire_cgroup_unref(parent); | |
549 | ||
550 | return r; | |
551 | } | |
552 | ||
553 | static int pakfire_cgroup_enable_accounting(struct pakfire_cgroup* cgroup) { | |
554 | // Enable all accounting controllers | |
555 | return pakfire_cgroup_enable_controllers(cgroup, | |
556 | pakfire_cgroup_accounting_controllers); | |
9dd11ed0 MT |
557 | } |
558 | ||
e3ddb498 MT |
559 | /* |
560 | Entry function to open a new cgroup. | |
9dd11ed0 | 561 | |
e3ddb498 MT |
562 | If the cgroup doesn't exist, it will be created including any parent cgroups. |
563 | */ | |
564 | int pakfire_cgroup_open(struct pakfire_cgroup** cgroup, | |
2901c3a7 | 565 | struct pakfire* pakfire, const char* path, int flags) { |
e3ddb498 | 566 | int r = 1; |
9dd11ed0 | 567 | |
e3ddb498 MT |
568 | // Allocate the cgroup struct |
569 | struct pakfire_cgroup* c = calloc(1, sizeof(*c)); | |
570 | if (!c) | |
571 | return 1; | |
9dd11ed0 | 572 | |
e3ddb498 | 573 | DEBUG(pakfire, "Allocated cgroup %s at %p\n", path, c); |
9dd11ed0 | 574 | |
e3ddb498 MT |
575 | // Keep a reference to pakfire |
576 | c->pakfire = pakfire_ref(pakfire); | |
4630031c | 577 | |
e3ddb498 MT |
578 | // Initialize reference counter |
579 | c->nrefs = 1; | |
5ae21aa1 | 580 | |
31d7e29a MT |
581 | // Find the root |
582 | r = pakfire_cgroup_set_root(c); | |
583 | if (r) | |
584 | goto ERROR; | |
585 | ||
e3ddb498 MT |
586 | // Copy path |
587 | pakfire_string_set(c->path, path); | |
5ae21aa1 | 588 | |
2901c3a7 MT |
589 | // Copy flags |
590 | c->flags = flags; | |
591 | ||
e3ddb498 MT |
592 | // Open a file descriptor |
593 | c->fd = __pakfire_cgroup_open(c); | |
e8a18682 MT |
594 | if (c->fd < 0) { |
595 | r = 1; | |
e3ddb498 | 596 | goto ERROR; |
e8a18682 | 597 | } |
4630031c | 598 | |
2901c3a7 MT |
599 | // Enable accounting if requested |
600 | if (pakfire_cgroup_has_flag(c, PAKFIRE_CGROUP_ENABLE_ACCOUNTING)) { | |
601 | r = pakfire_cgroup_enable_accounting(c); | |
602 | if (r) | |
603 | goto ERROR; | |
604 | } | |
605 | ||
d59e29c8 MT |
606 | // Setup the devices filter |
607 | r = pakfire_cgroup_setup_devices(c); | |
608 | if (r) | |
609 | goto ERROR; | |
610 | ||
e3ddb498 | 611 | *cgroup = c; |
4630031c | 612 | return 0; |
4630031c | 613 | |
e3ddb498 MT |
614 | ERROR: |
615 | pakfire_cgroup_free(c); | |
616 | return r; | |
4630031c MT |
617 | } |
618 | ||
e3ddb498 MT |
619 | struct pakfire_cgroup* pakfire_cgroup_ref(struct pakfire_cgroup* cgroup) { |
620 | ++cgroup->nrefs; | |
4630031c | 621 | |
e3ddb498 | 622 | return cgroup; |
1b41d3b1 MT |
623 | } |
624 | ||
e3ddb498 MT |
625 | struct pakfire_cgroup* pakfire_cgroup_unref(struct pakfire_cgroup* cgroup) { |
626 | if (--cgroup->nrefs > 0) | |
627 | return cgroup; | |
1b41d3b1 | 628 | |
e3ddb498 MT |
629 | pakfire_cgroup_free(cgroup); |
630 | return NULL; | |
1b41d3b1 MT |
631 | } |
632 | ||
aca565fc MT |
633 | // Open a child cgroup |
634 | int pakfire_cgroup_child(struct pakfire_cgroup** child, | |
635 | struct pakfire_cgroup* cgroup, const char* name, int flags) { | |
636 | char path[PATH_MAX]; | |
637 | int r; | |
638 | ||
366a3be1 MT |
639 | // Check input |
640 | if (!name) { | |
641 | errno = EINVAL; | |
642 | return 1; | |
643 | } | |
644 | ||
aca565fc MT |
645 | // Join paths |
646 | r = pakfire_path_join(path, cgroup->path, name); | |
56796f84 | 647 | if (r) |
aca565fc MT |
648 | return 1; |
649 | ||
650 | // Open the child group | |
651 | return pakfire_cgroup_open(child, cgroup->pakfire, path, flags); | |
652 | } | |
653 | ||
034ba70e MT |
654 | static int pakfire_cgroup_procs_callback(struct pakfire_cgroup* cgroup, |
655 | int (*callback)(struct pakfire_cgroup* cgroup, pid_t pid, void* data), void* data) { | |
656 | int r = 0; | |
657 | ||
658 | // Check if we have a callback | |
659 | if (!callback) { | |
660 | errno = EINVAL; | |
661 | return 1; | |
662 | } | |
663 | ||
664 | // Open cgroup.procs | |
665 | FILE* f = pakfire_cgroup_open_file(cgroup, "cgroup.procs", "r"); | |
666 | if (!f) | |
667 | return 1; | |
668 | ||
669 | char* line = NULL; | |
670 | size_t l = 0; | |
671 | ||
672 | // Walk through all PIDs | |
673 | while (1) { | |
674 | ssize_t bytes_read = getline(&line, &l, f); | |
675 | if (bytes_read < 0) | |
676 | break; | |
677 | ||
678 | // Parse PID | |
679 | pid_t pid = strtol(line, NULL, 10); | |
680 | ||
681 | // Call callback function | |
682 | r = callback(cgroup, pid, data); | |
683 | if (r) | |
684 | break; | |
685 | } | |
686 | ||
687 | // Cleanup | |
689aa7de MT |
688 | if (line) |
689 | free(line); | |
690 | if (f) | |
691 | fclose(f); | |
034ba70e MT |
692 | |
693 | return r; | |
694 | } | |
695 | ||
696 | static int send_sigkill(struct pakfire_cgroup* cgroup, const pid_t pid, void* data) { | |
697 | DEBUG(cgroup->pakfire, "Sending signal SIGKILL to PID %d\n", pid); | |
698 | ||
699 | int r = kill(pid, SIGKILL); | |
700 | if (r < 0 && errno != ESRCH) { | |
701 | ERROR(cgroup->pakfire, "Could not send signal SIGKILL to PID %d: %m\n", pid); | |
702 | return r; | |
703 | } | |
704 | ||
705 | return r; | |
706 | } | |
707 | ||
e3ddb498 MT |
708 | /* |
709 | Immediately kills all processes in this cgroup | |
710 | */ | |
689aa7de | 711 | static int pakfire_cgroup_killall(struct pakfire_cgroup* cgroup) { |
034ba70e | 712 | DEBUG(cgroup->pakfire, "%s: Killing all processes\n", pakfire_cgroup_name(cgroup)); |
1b41d3b1 | 713 | |
034ba70e MT |
714 | // Do we have support for cgroup.kill? |
715 | int r = pakfire_cgroup_access(cgroup, "cgroup.kill", F_OK, 0); | |
716 | ||
717 | // Fall back to the legacy version | |
718 | if (r && errno == ENOENT) { | |
719 | return pakfire_cgroup_procs_callback(cgroup, send_sigkill, NULL); | |
720 | } | |
721 | ||
722 | return pakfire_cgroup_write(cgroup, "cgroup.kill", "1"); | |
e3ddb498 | 723 | } |
1b41d3b1 | 724 | |
e3ddb498 MT |
725 | /* |
726 | Immediately destroys this cgroup | |
727 | */ | |
728 | int pakfire_cgroup_destroy(struct pakfire_cgroup* cgroup) { | |
729 | int r; | |
1b41d3b1 | 730 | |
01cf6134 MT |
731 | // Cannot call this for the root group |
732 | if (pakfire_cgroup_is_root(cgroup)) { | |
733 | errno = EPERM; | |
734 | return 1; | |
735 | } | |
736 | ||
737 | DEBUG(cgroup->pakfire, "Destroying cgroup %s\n", pakfire_cgroup_name(cgroup)); | |
738 | ||
e3ddb498 MT |
739 | // Kill everything in this group |
740 | r = pakfire_cgroup_killall(cgroup); | |
741 | if (r) | |
742 | return r; | |
1b41d3b1 | 743 | |
e3ddb498 | 744 | // Close the file descriptor |
2868fb2f | 745 | if (cgroup->fd > 0) { |
e3ddb498 MT |
746 | close(cgroup->fd); |
747 | cgroup->fd = 0; | |
d5256224 | 748 | } |
d5256224 | 749 | |
01cf6134 MT |
750 | // Open the root directory |
751 | int fd = pakfire_cgroup_open_root(cgroup); | |
752 | if (fd < 0) | |
753 | return 1; | |
754 | ||
755 | // Delete the directory | |
756 | r = unlinkat(fd, cgroup->path, AT_REMOVEDIR); | |
757 | if (r) | |
758 | ERROR(cgroup->pakfire, "Could not destroy cgroup: %m\n"); | |
759 | ||
760 | // Close fd | |
761 | close(fd); | |
762 | ||
763 | return r; | |
d5256224 | 764 | } |
820c32c7 | 765 | |
e3ddb498 MT |
766 | int pakfire_cgroup_fd(struct pakfire_cgroup* cgroup) { |
767 | return cgroup->fd; | |
820c32c7 | 768 | } |
46dd01c6 MT |
769 | |
770 | // Memory | |
771 | ||
772 | int pakfire_cgroup_set_guaranteed_memory(struct pakfire_cgroup* cgroup, size_t mem) { | |
773 | int r; | |
774 | ||
775 | // Enable memory controller | |
776 | r = pakfire_cgroup_enable_controllers(cgroup, PAKFIRE_CGROUP_CONTROLLER_MEMORY); | |
777 | if (r) | |
778 | return r; | |
779 | ||
780 | DEBUG(cgroup->pakfire, "%s: Setting guaranteed memory to %zu byte(s)\n", | |
781 | pakfire_cgroup_name(cgroup), mem); | |
782 | ||
783 | // Set value | |
784 | r = pakfire_cgroup_write(cgroup, "memory.min", "%zu\n", mem); | |
785 | if (r) | |
786 | ERROR(cgroup->pakfire, "%s: Could not set guaranteed memory: %m\n", | |
787 | pakfire_cgroup_name(cgroup)); | |
788 | ||
789 | return r; | |
790 | } | |
791 | ||
792 | int pakfire_cgroup_set_memory_limit(struct pakfire_cgroup* cgroup, size_t mem) { | |
793 | int r; | |
794 | ||
795 | // Enable memory controller | |
796 | r = pakfire_cgroup_enable_controllers(cgroup, PAKFIRE_CGROUP_CONTROLLER_MEMORY); | |
797 | if (r) | |
798 | return r; | |
799 | ||
800 | DEBUG(cgroup->pakfire, "%s: Setting memory limit to %zu byte(s)\n", | |
801 | pakfire_cgroup_name(cgroup), mem); | |
802 | ||
803 | // Set value | |
804 | r = pakfire_cgroup_write(cgroup, "memory.max", "%zu\n", mem); | |
805 | if (r) | |
806 | ERROR(cgroup->pakfire, "%s: Could not set memory limit: %m\n", | |
807 | pakfire_cgroup_name(cgroup)); | |
808 | ||
809 | return r; | |
810 | } | |
d3b93302 MT |
811 | |
812 | // PIDs | |
813 | ||
814 | int pakfire_cgroup_set_pid_limit(struct pakfire_cgroup* cgroup, size_t limit) { | |
815 | int r; | |
816 | ||
817 | // Enable PID controller | |
818 | r = pakfire_cgroup_enable_controllers(cgroup, PAKFIRE_CGROUP_CONTROLLER_PIDS); | |
819 | if (r) | |
820 | return r; | |
821 | ||
822 | DEBUG(cgroup->pakfire, "%s: Setting PID limit to %zu\n", | |
823 | pakfire_cgroup_name(cgroup), limit); | |
824 | ||
825 | // Set value | |
826 | r = pakfire_cgroup_write(cgroup, "pids.max", "%zu\n", limit); | |
827 | if (r) | |
828 | ERROR(cgroup->pakfire, "%s: Could not set PID limit: %m\n", | |
829 | pakfire_cgroup_name(cgroup)); | |
830 | ||
831 | return r; | |
832 | } | |
6b7cf275 MT |
833 | |
834 | // Stats | |
835 | ||
836 | static int __pakfire_cgroup_read_stats_line(struct pakfire_cgroup* cgroup, | |
837 | int (*callback)(struct pakfire_cgroup* cgroup, const char* key, unsigned long val, void* data), | |
838 | void* data, char* line) { | |
839 | char* p = NULL; | |
840 | ||
841 | DEBUG(cgroup->pakfire, "Parsing line: %s\n", line); | |
842 | ||
843 | char key[NAME_MAX]; | |
844 | unsigned long val = 0; | |
845 | ||
846 | // Number of the field | |
847 | int i = 0; | |
848 | ||
849 | char* elem = strtok_r(line, " ", &p); | |
850 | while (elem) { | |
851 | switch (i++) { | |
852 | // First field is the key | |
853 | case 0: | |
854 | // Copy the key | |
855 | pakfire_string_set(key, elem); | |
856 | break; | |
857 | ||
858 | // The second field is some value | |
859 | case 1: | |
860 | val = strtoul(elem, NULL, 10); | |
861 | break; | |
862 | ||
863 | // Ignore the rest | |
864 | default: | |
865 | DEBUG(cgroup->pakfire, "%s: Unknown value in cgroup stats (%d): %s\n", | |
866 | pakfire_cgroup_name(cgroup), i, elem); | |
867 | break; | |
868 | } | |
869 | ||
870 | elem = strtok_r(NULL, " ", &p); | |
871 | } | |
872 | ||
873 | // Check if we parsed both fields | |
874 | if (i < 2) { | |
875 | ERROR(cgroup->pakfire, "Could not parse line\n"); | |
876 | return 1; | |
877 | } | |
878 | ||
879 | // Call the callback | |
880 | return callback(cgroup, key, val, data); | |
881 | } | |
882 | ||
883 | static int __pakfire_cgroup_read_stats(struct pakfire_cgroup* cgroup, const char* path, | |
884 | int (*callback)(struct pakfire_cgroup* cgroup, const char* key, unsigned long val, void* data), | |
885 | void* data) { | |
886 | char* p = NULL; | |
887 | int r; | |
888 | ||
889 | char buffer[BUFFER_SIZE]; | |
890 | ||
891 | DEBUG(cgroup->pakfire, "%s: Reading stats from %s\n", pakfire_cgroup_name(cgroup), path); | |
892 | ||
893 | // Open the file | |
894 | r = pakfire_cgroup_read(cgroup, path, buffer, sizeof(buffer)); | |
895 | if (r < 0) | |
896 | goto ERROR; | |
897 | ||
898 | char* line = strtok_r(buffer, "\n", &p); | |
899 | while (line) { | |
900 | // Parse the line | |
901 | r = __pakfire_cgroup_read_stats_line(cgroup, callback, data, line); | |
902 | if (r) | |
903 | goto ERROR; | |
904 | ||
905 | // Move to the next line | |
906 | line = strtok_r(NULL, "\n", &p); | |
907 | } | |
908 | ||
909 | ERROR: | |
910 | return r; | |
911 | } | |
912 | ||
913 | struct pakfire_cgroup_stat_entry { | |
914 | const char* key; | |
915 | unsigned long* val; | |
916 | }; | |
917 | ||
918 | static int __pakfire_cgroup_parse_cpu_stats(struct pakfire_cgroup* cgroup, | |
919 | const char* key, unsigned long val, void* data) { | |
920 | struct pakfire_cgroup_cpu_stats* stats = (struct pakfire_cgroup_cpu_stats*)data; | |
921 | ||
922 | const struct pakfire_cgroup_stat_entry entries[] = { | |
923 | { "system_usec", &stats->system_usec }, | |
924 | { "usage_usec", &stats->usage_usec }, | |
925 | { "user_usec", &stats->user_usec }, | |
926 | { NULL, NULL }, | |
927 | }; | |
928 | // Find and store value | |
929 | for (const struct pakfire_cgroup_stat_entry* entry = entries; entry->key; entry++) { | |
930 | if (strcmp(entry->key, key) == 0) { | |
931 | *entry->val = val; | |
932 | return 0; | |
933 | } | |
934 | } | |
935 | ||
936 | DEBUG(cgroup->pakfire, "Unknown key for CPU stats: %s = %ld\n", key, val); | |
937 | ||
938 | return 0; | |
939 | } | |
940 | ||
941 | static int __pakfire_cgroup_parse_memory_stats(struct pakfire_cgroup* cgroup, | |
942 | const char* key, unsigned long val, void* data) { | |
943 | struct pakfire_cgroup_memory_stats* stats = (struct pakfire_cgroup_memory_stats*)data; | |
944 | ||
945 | const struct pakfire_cgroup_stat_entry entries[] = { | |
946 | { "anon", &stats->anon }, | |
947 | { "file", &stats->file }, | |
948 | { "kernel", &stats->kernel }, | |
949 | { "kernel_stack", &stats->kernel_stack }, | |
950 | { "pagetables", &stats->pagetables }, | |
951 | { "percpu", &stats->percpu }, | |
952 | { "sock", &stats->sock }, | |
953 | { "vmalloc", &stats->vmalloc }, | |
954 | { "shmem", &stats->shmem }, | |
955 | { "zswap", &stats->zswap }, | |
956 | { "zswapped", &stats->zswapped }, | |
957 | { "file_mapped", &stats->file_mapped }, | |
958 | { "file_dirty", &stats->file_dirty }, | |
959 | { "file_writeback", &stats->file_writeback }, | |
960 | { "swapcached", &stats->swapcached }, | |
961 | { "anon_thp", &stats->anon_thp }, | |
962 | { "file_thp", &stats->file_thp }, | |
963 | { "shmem_thp", &stats->shmem_thp }, | |
964 | { "inactive_anon", &stats->inactive_anon }, | |
965 | { "active_anon", &stats->active_anon }, | |
966 | { "inactive_file", &stats->inactive_file }, | |
967 | { "active_file", &stats->active_file }, | |
968 | { "unevictable", &stats->unevictable }, | |
969 | { "slab_reclaimable", &stats->slab_reclaimable }, | |
970 | { "slab_unreclaimable", &stats->slab_unreclaimable }, | |
971 | { "slab", &stats->slab }, | |
972 | { "workingset_refault_anon", &stats->workingset_refault_anon }, | |
973 | { "workingset_refault_file", &stats->workingset_refault_file }, | |
974 | { "workingset_activate_anon", &stats->workingset_activate_anon }, | |
975 | { "workingset_activate_file", &stats->workingset_activate_file }, | |
976 | { "workingset_restore_anon", &stats->workingset_restore_anon }, | |
977 | { "workingset_restore_file", &stats->workingset_restore_file }, | |
978 | { "workingset_nodereclaim", &stats->workingset_nodereclaim }, | |
979 | { "pgfault", &stats->pgfault }, | |
980 | { "pgmajfault", &stats->pgmajfault }, | |
981 | { "pgrefill", &stats->pgrefill }, | |
982 | { "pgscan", &stats->pgscan }, | |
983 | { "pgsteal", &stats->pgsteal }, | |
984 | { "pgactivate", &stats->pgactivate }, | |
985 | { "pgdeactivate", &stats->pgdeactivate }, | |
986 | { "pglazyfree", &stats->pglazyfree }, | |
987 | { "pglazyfreed", &stats->pglazyfreed }, | |
988 | { "thp_fault_alloc", &stats->thp_fault_alloc }, | |
989 | { "thp_collapse_alloc", &stats->thp_collapse_alloc }, | |
990 | { NULL, NULL }, | |
991 | }; | |
992 | ||
993 | // Find and store value | |
994 | for (const struct pakfire_cgroup_stat_entry* entry = entries; entry->key; entry++) { | |
995 | if (strcmp(entry->key, key) == 0) { | |
996 | *entry->val = val; | |
997 | return 0; | |
998 | } | |
999 | } | |
1000 | ||
1001 | // Log any unknown keys | |
1002 | DEBUG(cgroup->pakfire, "Unknown key for memory stats: %s = %ld\n", key, val); | |
1003 | ||
1004 | return 0; | |
1005 | } | |
1006 | ||
1007 | int pakfire_cgroup_stat(struct pakfire_cgroup* cgroup, | |
1008 | struct pakfire_cgroup_stats* stats) { | |
1009 | int r; | |
1010 | ||
1011 | // Check input | |
1012 | if (!stats) { | |
1013 | errno = EINVAL; | |
1014 | return 1; | |
1015 | } | |
1016 | ||
1017 | // Read CPU stats | |
1018 | r = __pakfire_cgroup_read_stats(cgroup, "cpu.stat", | |
1019 | __pakfire_cgroup_parse_cpu_stats, &stats->cpu); | |
1020 | if (r) | |
1021 | goto ERROR; | |
1022 | ||
1023 | // Read memory stats | |
1024 | r = __pakfire_cgroup_read_stats(cgroup, "memory.stat", | |
1025 | __pakfire_cgroup_parse_memory_stats, &stats->memory); | |
1026 | if (r) | |
1027 | goto ERROR; | |
1028 | ||
1029 | ERROR: | |
1030 | if (r) | |
1031 | ERROR(cgroup->pakfire, "%s: Could not read cgroup stats: %m\n", | |
1032 | pakfire_cgroup_name(cgroup)); | |
1033 | ||
1034 | return r; | |
1035 | } | |
1036 | ||
1037 | int pakfire_cgroup_stat_dump(struct pakfire_cgroup* cgroup, | |
1038 | const struct pakfire_cgroup_stats* stats) { | |
1039 | // Check input | |
1040 | if (!stats) { | |
1041 | errno = EINVAL; | |
1042 | return 1; | |
1043 | } | |
1044 | ||
1045 | DEBUG(cgroup->pakfire, "%s: Total CPU time usage: %lu\n", | |
1046 | pakfire_cgroup_name(cgroup), stats->cpu.usage_usec); | |
1047 | ||
1048 | return 0; | |
1049 | } |