src/basic/cgroup-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2010 Lennart Poettering
   6
   7   systemd is free software; you can redistribute it and/or modify it
   8   under the terms of the GNU Lesser General Public License as published by
   9   the Free Software Foundation; either version 2.1 of the License, or
  10   (at your option) any later version.
  11
  12   systemd is distributed in the hope that it will be useful, but
  13   WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15   Lesser General Public License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public License
  18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  19 ***/
  20
  21 #include <dirent.h>
  22 #include <errno.h>
  23 #include <ftw.h>
  24 #include <limits.h>
  25 #include <signal.h>
  26 #include <stddef.h>
  27 #include <stdio_ext.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30 #include <sys/stat.h>
  31 #include <sys/statfs.h>
  32 #include <sys/types.h>
  33 #include <sys/xattr.h>
  34 #include <unistd.h>
  35
  36 #include "alloc-util.h"
  37 #include "cgroup-util.h"
  38 #include "def.h"
  39 #include "dirent-util.h"
  40 #include "extract-word.h"
  41 #include "fd-util.h"
  42 #include "fileio.h"
  43 #include "format-util.h"
  44 #include "fs-util.h"
  45 #include "log.h"
  46 #include "login-util.h"
  47 #include "macro.h"
  48 #include "missing.h"
  49 #include "mkdir.h"
  50 #include "parse-util.h"
  51 #include "path-util.h"
  52 #include "proc-cmdline.h"
  53 #include "process-util.h"
  54 #include "set.h"
  55 #include "special.h"
  56 #include "stat-util.h"
  57 #include "stdio-util.h"
  58 #include "string-table.h"
  59 #include "string-util.h"
  60 #include "strv.h"
  61 #include "unit-name.h"
  62 #include "user-util.h"
  63
  64 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
  65         _cleanup_free_ char *fs = NULL;
  66         FILE *f;
  67         int r;
  68
  69         assert(_f);
  70
  71         r = cg_get_path(controller, path, "cgroup.procs", &fs);
  72         if (r < 0)
  73                 return r;
  74
  75         f = fopen(fs, "re");
  76         if (!f)
  77                 return -errno;
  78
  79         *_f = f;
  80         return 0;
  81 }
  82
  83 int cg_read_pid(FILE *f, pid_t *_pid) {
  84         unsigned long ul;
  85
  86         /* Note that the cgroup.procs might contain duplicates! See
  87          * cgroups.txt for details. */
  88
  89         assert(f);
  90         assert(_pid);
  91
  92         errno = 0;
  93         if (fscanf(f, "%lu", &ul) != 1) {
  94
  95                 if (feof(f))
  96                         return 0;
  97
  98                 return errno > 0 ? -errno : -EIO;
  99         }
 100
 101         if (ul <= 0)
 102                 return -EIO;
 103
 104         *_pid = (pid_t) ul;
 105         return 1;
 106 }
 107
 108 int cg_read_event(
 109                 const char *controller,
 110                 const char *path,
 111                 const char *event,
 112                 char **val) {
 113
 114         _cleanup_free_ char *events = NULL, *content = NULL;
 115         char *p, *line;
 116         int r;
 117
 118         r = cg_get_path(controller, path, "cgroup.events", &events);
 119         if (r < 0)
 120                 return r;
 121
 122         r = read_full_file(events, &content, NULL);
 123         if (r < 0)
 124                 return r;
 125
 126         p = content;
 127         while ((line = strsep(&p, "\n"))) {
 128                 char *key;
 129
 130                 key = strsep(&line, " ");
 131                 if (!key || !line)
 132                         return -EINVAL;
 133
 134                 if (strcmp(key, event))
 135                         continue;
 136
 137                 *val = strdup(line);
 138                 return 0;
 139         }
 140
 141         return -ENOENT;
 142 }
 143
 144 bool cg_ns_supported(void) {
 145         static thread_local int enabled = -1;
 146
 147         if (enabled >= 0)
 148                 return enabled;
 149
 150         if (access("/proc/self/ns/cgroup", F_OK) == 0)
 151                 enabled = 1;
 152         else
 153                 enabled = 0;
 154
 155         return enabled;
 156 }
 157
 158 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
 159         _cleanup_free_ char *fs = NULL;
 160         int r;
 161         DIR *d;
 162
 163         assert(_d);
 164
 165         /* This is not recursive! */
 166
 167         r = cg_get_path(controller, path, NULL, &fs);
 168         if (r < 0)
 169                 return r;
 170
 171         d = opendir(fs);
 172         if (!d)
 173                 return -errno;
 174
 175         *_d = d;
 176         return 0;
 177 }
 178
 179 int cg_read_subgroup(DIR *d, char **fn) {
 180         struct dirent *de;
 181
 182         assert(d);
 183         assert(fn);
 184
 185         FOREACH_DIRENT_ALL(de, d, return -errno) {
 186                 char *b;
 187
 188                 if (de->d_type != DT_DIR)
 189                         continue;
 190
 191                 if (dot_or_dot_dot(de->d_name))
 192                         continue;
 193
 194                 b = strdup(de->d_name);
 195                 if (!b)
 196                         return -ENOMEM;
 197
 198                 *fn = b;
 199                 return 1;
 200         }
 201
 202         return 0;
 203 }
 204
 205 int cg_rmdir(const char *controller, const char *path) {
 206         _cleanup_free_ char *p = NULL;
 207         int r;
 208
 209         r = cg_get_path(controller, path, NULL, &p);
 210         if (r < 0)
 211                 return r;
 212
 213         r = rmdir(p);
 214         if (r < 0 && errno != ENOENT)
 215                 return -errno;
 216
 217         r = cg_hybrid_unified();
 218         if (r < 0)
 219                 return r;
 220         if (r == 0)
 221                 return 0;
 222
 223         if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 224                 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
 225                 if (r < 0)
 226                         log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
 227         }
 228
 229         return 0;
 230 }
 231
 232 int cg_kill(
 233                 const char *controller,
 234                 const char *path,
 235                 int sig,
 236                 CGroupFlags flags,
 237                 Set *s,
 238                 cg_kill_log_func_t log_kill,
 239                 void *userdata) {
 240
 241         _cleanup_set_free_ Set *allocated_set = NULL;
 242         bool done = false;
 243         int r, ret = 0;
 244         pid_t my_pid;
 245
 246         assert(sig >= 0);
 247
 248          /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
 249           * SIGCONT on SIGKILL. */
 250         if (IN_SET(sig, SIGCONT, SIGKILL))
 251                 flags &= ~CGROUP_SIGCONT;
 252
 253         /* This goes through the tasks list and kills them all. This
 254          * is repeated until no further processes are added to the
 255          * tasks list, to properly handle forking processes */
 256
 257         if (!s) {
 258                 s = allocated_set = set_new(NULL);
 259                 if (!s)
 260                         return -ENOMEM;
 261         }
 262
 263         my_pid = getpid_cached();
 264
 265         do {
 266                 _cleanup_fclose_ FILE *f = NULL;
 267                 pid_t pid = 0;
 268                 done = true;
 269
 270                 r = cg_enumerate_processes(controller, path, &f);
 271                 if (r < 0) {
 272                         if (ret >= 0 && r != -ENOENT)
 273                                 return r;
 274
 275                         return ret;
 276                 }
 277
 278                 while ((r = cg_read_pid(f, &pid)) > 0) {
 279
 280                         if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
 281                                 continue;
 282
 283                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 284                                 continue;
 285
 286                         if (log_kill)
 287                                 log_kill(pid, sig, userdata);
 288
 289                         /* If we haven't killed this process yet, kill
 290                          * it */
 291                         if (kill(pid, sig) < 0) {
 292                                 if (ret >= 0 && errno != ESRCH)
 293                                         ret = -errno;
 294                         } else {
 295                                 if (flags & CGROUP_SIGCONT)
 296                                         (void) kill(pid, SIGCONT);
 297
 298                                 if (ret == 0)
 299                                         ret = 1;
 300                         }
 301
 302                         done = false;
 303
 304                         r = set_put(s, PID_TO_PTR(pid));
 305                         if (r < 0) {
 306                                 if (ret >= 0)
 307                                         return r;
 308
 309                                 return ret;
 310                         }
 311                 }
 312
 313                 if (r < 0) {
 314                         if (ret >= 0)
 315                                 return r;
 316
 317                         return ret;
 318                 }
 319
 320                 /* To avoid racing against processes which fork
 321                  * quicker than we can kill them we repeat this until
 322                  * no new pids need to be killed. */
 323
 324         } while (!done);
 325
 326         return ret;
 327 }
 328
 329 int cg_kill_recursive(
 330                 const char *controller,
 331                 const char *path,
 332                 int sig,
 333                 CGroupFlags flags,
 334                 Set *s,
 335                 cg_kill_log_func_t log_kill,
 336                 void *userdata) {
 337
 338         _cleanup_set_free_ Set *allocated_set = NULL;
 339         _cleanup_closedir_ DIR *d = NULL;
 340         int r, ret;
 341         char *fn;
 342
 343         assert(path);
 344         assert(sig >= 0);
 345
 346         if (!s) {
 347                 s = allocated_set = set_new(NULL);
 348                 if (!s)
 349                         return -ENOMEM;
 350         }
 351
 352         ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
 353
 354         r = cg_enumerate_subgroups(controller, path, &d);
 355         if (r < 0) {
 356                 if (ret >= 0 && r != -ENOENT)
 357                         return r;
 358
 359                 return ret;
 360         }
 361
 362         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 363                 _cleanup_free_ char *p = NULL;
 364
 365                 p = strjoin(path, "/", fn);
 366                 free(fn);
 367                 if (!p)
 368                         return -ENOMEM;
 369
 370                 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
 371                 if (r != 0 && ret >= 0)
 372                         ret = r;
 373         }
 374         if (ret >= 0 && r < 0)
 375                 ret = r;
 376
 377         if (flags & CGROUP_REMOVE) {
 378                 r = cg_rmdir(controller, path);
 379                 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
 380                         return r;
 381         }
 382
 383         return ret;
 384 }
 385
 386 int cg_migrate(
 387                 const char *cfrom,
 388                 const char *pfrom,
 389                 const char *cto,
 390                 const char *pto,
 391                 CGroupFlags flags) {
 392
 393         bool done = false;
 394         _cleanup_set_free_ Set *s = NULL;
 395         int r, ret = 0;
 396         pid_t my_pid;
 397
 398         assert(cfrom);
 399         assert(pfrom);
 400         assert(cto);
 401         assert(pto);
 402
 403         s = set_new(NULL);
 404         if (!s)
 405                 return -ENOMEM;
 406
 407         my_pid = getpid_cached();
 408
 409         do {
 410                 _cleanup_fclose_ FILE *f = NULL;
 411                 pid_t pid = 0;
 412                 done = true;
 413
 414                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 415                 if (r < 0) {
 416                         if (ret >= 0 && r != -ENOENT)
 417                                 return r;
 418
 419                         return ret;
 420                 }
 421
 422                 while ((r = cg_read_pid(f, &pid)) > 0) {
 423
 424                         /* This might do weird stuff if we aren't a
 425                          * single-threaded program. However, we
 426                          * luckily know we are not */
 427                         if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
 428                                 continue;
 429
 430                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 431                                 continue;
 432
 433                         /* Ignore kernel threads. Since they can only
 434                          * exist in the root cgroup, we only check for
 435                          * them there. */
 436                         if (cfrom &&
 437                             (isempty(pfrom) || path_equal(pfrom, "/")) &&
 438                             is_kernel_thread(pid) > 0)
 439                                 continue;
 440
 441                         r = cg_attach(cto, pto, pid);
 442                         if (r < 0) {
 443                                 if (ret >= 0 && r != -ESRCH)
 444                                         ret = r;
 445                         } else if (ret == 0)
 446                                 ret = 1;
 447
 448                         done = false;
 449
 450                         r = set_put(s, PID_TO_PTR(pid));
 451                         if (r < 0) {
 452                                 if (ret >= 0)
 453                                         return r;
 454
 455                                 return ret;
 456                         }
 457                 }
 458
 459                 if (r < 0) {
 460                         if (ret >= 0)
 461                                 return r;
 462
 463                         return ret;
 464                 }
 465         } while (!done);
 466
 467         return ret;
 468 }
 469
 470 int cg_migrate_recursive(
 471                 const char *cfrom,
 472                 const char *pfrom,
 473                 const char *cto,
 474                 const char *pto,
 475                 CGroupFlags flags) {
 476
 477         _cleanup_closedir_ DIR *d = NULL;
 478         int r, ret = 0;
 479         char *fn;
 480
 481         assert(cfrom);
 482         assert(pfrom);
 483         assert(cto);
 484         assert(pto);
 485
 486         ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
 487
 488         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 489         if (r < 0) {
 490                 if (ret >= 0 && r != -ENOENT)
 491                         return r;
 492
 493                 return ret;
 494         }
 495
 496         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 497                 _cleanup_free_ char *p = NULL;
 498
 499                 p = strjoin(pfrom, "/", fn);
 500                 free(fn);
 501                 if (!p)
 502                         return -ENOMEM;
 503
 504                 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
 505                 if (r != 0 && ret >= 0)
 506                         ret = r;
 507         }
 508
 509         if (r < 0 && ret >= 0)
 510                 ret = r;
 511
 512         if (flags & CGROUP_REMOVE) {
 513                 r = cg_rmdir(cfrom, pfrom);
 514                 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
 515                         return r;
 516         }
 517
 518         return ret;
 519 }
 520
 521 int cg_migrate_recursive_fallback(
 522                 const char *cfrom,
 523                 const char *pfrom,
 524                 const char *cto,
 525                 const char *pto,
 526                 CGroupFlags flags) {
 527
 528         int r;
 529
 530         assert(cfrom);
 531         assert(pfrom);
 532         assert(cto);
 533         assert(pto);
 534
 535         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
 536         if (r < 0) {
 537                 char prefix[strlen(pto) + 1];
 538
 539                 /* This didn't work? Then let's try all prefixes of the destination */
 540
 541                 PATH_FOREACH_PREFIX(prefix, pto) {
 542                         int q;
 543
 544                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
 545                         if (q >= 0)
 546                                 return q;
 547                 }
 548         }
 549
 550         return r;
 551 }
 552
 553 static const char *controller_to_dirname(const char *controller) {
 554         const char *e;
 555
 556         assert(controller);
 557
 558         /* Converts a controller name to the directory name below
 559          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
 560          * just cuts off the name= prefixed used for named
 561          * hierarchies, if it is specified. */
 562
 563         if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 564                 if (cg_hybrid_unified() > 0)
 565                         controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
 566                 else
 567                         controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
 568         }
 569
 570         e = startswith(controller, "name=");
 571         if (e)
 572                 return e;
 573
 574         return controller;
 575 }
 576
 577 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
 578         const char *dn;
 579         char *t = NULL;
 580
 581         assert(fs);
 582         assert(controller);
 583
 584         dn = controller_to_dirname(controller);
 585
 586         if (isempty(path) && isempty(suffix))
 587                 t = strappend("/sys/fs/cgroup/", dn);
 588         else if (isempty(path))
 589                 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix);
 590         else if (isempty(suffix))
 591                 t = strjoin("/sys/fs/cgroup/", dn, "/", path);
 592         else
 593                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix);
 594         if (!t)
 595                 return -ENOMEM;
 596
 597         *fs = t;
 598         return 0;
 599 }
 600
 601 static int join_path_unified(const char *path, const char *suffix, char **fs) {
 602         char *t;
 603
 604         assert(fs);
 605
 606         if (isempty(path) && isempty(suffix))
 607                 t = strdup("/sys/fs/cgroup");
 608         else if (isempty(path))
 609                 t = strappend("/sys/fs/cgroup/", suffix);
 610         else if (isempty(suffix))
 611                 t = strappend("/sys/fs/cgroup/", path);
 612         else
 613                 t = strjoin("/sys/fs/cgroup/", path, "/", suffix);
 614         if (!t)
 615                 return -ENOMEM;
 616
 617         *fs = t;
 618         return 0;
 619 }
 620
 621 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
 622         int r;
 623
 624         assert(fs);
 625
 626         if (!controller) {
 627                 char *t;
 628
 629                 /* If no controller is specified, we return the path
 630                  * *below* the controllers, without any prefix. */
 631
 632                 if (!path && !suffix)
 633                         return -EINVAL;
 634
 635                 if (!suffix)
 636                         t = strdup(path);
 637                 else if (!path)
 638                         t = strdup(suffix);
 639                 else
 640                         t = strjoin(path, "/", suffix);
 641                 if (!t)
 642                         return -ENOMEM;
 643
 644                 *fs = path_kill_slashes(t);
 645                 return 0;
 646         }
 647
 648         if (!cg_controller_is_valid(controller))
 649                 return -EINVAL;
 650
 651         r = cg_all_unified();
 652         if (r < 0)
 653                 return r;
 654         if (r > 0)
 655                 r = join_path_unified(path, suffix, fs);
 656         else
 657                 r = join_path_legacy(controller, path, suffix, fs);
 658         if (r < 0)
 659                 return r;
 660
 661         path_kill_slashes(*fs);
 662         return 0;
 663 }
 664
 665 static int controller_is_accessible(const char *controller) {
 666         int r;
 667
 668         assert(controller);
 669
 670         /* Checks whether a specific controller is accessible,
 671          * i.e. its hierarchy mounted. In the unified hierarchy all
 672          * controllers are considered accessible, except for the named
 673          * hierarchies */
 674
 675         if (!cg_controller_is_valid(controller))
 676                 return -EINVAL;
 677
 678         r = cg_all_unified();
 679         if (r < 0)
 680                 return r;
 681         if (r > 0) {
 682                 /* We don't support named hierarchies if we are using
 683                  * the unified hierarchy. */
 684
 685                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
 686                         return 0;
 687
 688                 if (startswith(controller, "name="))
 689                         return -EOPNOTSUPP;
 690
 691         } else {
 692                 const char *cc, *dn;
 693
 694                 dn = controller_to_dirname(controller);
 695                 cc = strjoina("/sys/fs/cgroup/", dn);
 696
 697                 if (laccess(cc, F_OK) < 0)
 698                         return -errno;
 699         }
 700
 701         return 0;
 702 }
 703
 704 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
 705         int r;
 706
 707         assert(controller);
 708         assert(fs);
 709
 710         /* Check if the specified controller is actually accessible */
 711         r = controller_is_accessible(controller);
 712         if (r < 0)
 713                 return r;
 714
 715         return cg_get_path(controller, path, suffix, fs);
 716 }
 717
 718 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 719         assert(path);
 720         assert(sb);
 721         assert(ftwbuf);
 722
 723         if (typeflag != FTW_DP)
 724                 return 0;
 725
 726         if (ftwbuf->level < 1)
 727                 return 0;
 728
 729         (void) rmdir(path);
 730         return 0;
 731 }
 732
 733 int cg_trim(const char *controller, const char *path, bool delete_root) {
 734         _cleanup_free_ char *fs = NULL;
 735         int r = 0, q;
 736
 737         assert(path);
 738
 739         r = cg_get_path(controller, path, NULL, &fs);
 740         if (r < 0)
 741                 return r;
 742
 743         errno = 0;
 744         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 745                 if (errno == ENOENT)
 746                         r = 0;
 747                 else if (errno > 0)
 748                         r = -errno;
 749                 else
 750                         r = -EIO;
 751         }
 752
 753         if (delete_root) {
 754                 if (rmdir(fs) < 0 && errno != ENOENT)
 755                         return -errno;
 756         }
 757
 758         q = cg_hybrid_unified();
 759         if (q < 0)
 760                 return q;
 761         if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 762                 q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
 763                 if (q < 0)
 764                         log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
 765         }
 766
 767         return r;
 768 }
 769
 770 int cg_create(const char *controller, const char *path) {
 771         _cleanup_free_ char *fs = NULL;
 772         int r;
 773
 774         r = cg_get_path_and_check(controller, path, NULL, &fs);
 775         if (r < 0)
 776                 return r;
 777
 778         r = mkdir_parents(fs, 0755);
 779         if (r < 0)
 780                 return r;
 781
 782         if (mkdir(fs, 0755) < 0) {
 783
 784                 if (errno == EEXIST)
 785                         return 0;
 786
 787                 return -errno;
 788         }
 789
 790         r = cg_hybrid_unified();
 791         if (r < 0)
 792                 return r;
 793
 794         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 795                 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
 796                 if (r < 0)
 797                         log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
 798         }
 799
 800         return 1;
 801 }
 802
 803 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 804         int r, q;
 805
 806         assert(pid >= 0);
 807
 808         r = cg_create(controller, path);
 809         if (r < 0)
 810                 return r;
 811
 812         q = cg_attach(controller, path, pid);
 813         if (q < 0)
 814                 return q;
 815
 816         /* This does not remove the cgroup on failure */
 817         return r;
 818 }
 819
 820 int cg_attach(const char *controller, const char *path, pid_t pid) {
 821         _cleanup_free_ char *fs = NULL;
 822         char c[DECIMAL_STR_MAX(pid_t) + 2];
 823         int r;
 824
 825         assert(path);
 826         assert(pid >= 0);
 827
 828         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 829         if (r < 0)
 830                 return r;
 831
 832         if (pid == 0)
 833                 pid = getpid_cached();
 834
 835         xsprintf(c, PID_FMT "\n", pid);
 836
 837         r = write_string_file(fs, c, 0);
 838         if (r < 0)
 839                 return r;
 840
 841         r = cg_hybrid_unified();
 842         if (r < 0)
 843                 return r;
 844
 845         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 846                 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
 847                 if (r < 0)
 848                         log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
 849         }
 850
 851         return 0;
 852 }
 853
 854 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 855         int r;
 856
 857         assert(controller);
 858         assert(path);
 859         assert(pid >= 0);
 860
 861         r = cg_attach(controller, path, pid);
 862         if (r < 0) {
 863                 char prefix[strlen(path) + 1];
 864
 865                 /* This didn't work? Then let's try all prefixes of
 866                  * the destination */
 867
 868                 PATH_FOREACH_PREFIX(prefix, path) {
 869                         int q;
 870
 871                         q = cg_attach(controller, prefix, pid);
 872                         if (q >= 0)
 873                                 return q;
 874                 }
 875         }
 876
 877         return r;
 878 }
 879
 880 int cg_set_access(
 881                 const char *controller,
 882                 const char *path,
 883                 uid_t uid,
 884                 gid_t gid) {
 885
 886         struct Attribute {
 887                 const char *name;
 888                 bool fatal;
 889         };
 890
 891         /* cgroupsv1, aka legacy/non-unified */
 892         static const struct Attribute legacy_attributes[] = {
 893                 { "cgroup.procs",           true  },
 894                 { "tasks",                  false },
 895                 { "cgroup.clone_children",  false },
 896                 {},
 897         };
 898
 899         /* cgroupsv2, aka unified */
 900         static const struct Attribute unified_attributes[] = {
 901                 { "cgroup.procs",           true  },
 902                 { "cgroup.subtree_control", true  },
 903                 { "cgroup.threads",         false },
 904                 {},
 905         };
 906
 907         static const struct Attribute* const attributes[] = {
 908                 [false] = legacy_attributes,
 909                 [true]  = unified_attributes,
 910         };
 911
 912         _cleanup_free_ char *fs = NULL;
 913         const struct Attribute *i;
 914         int r, unified;
 915
 916         assert(path);
 917
 918         if (uid == UID_INVALID && gid == GID_INVALID)
 919                 return 0;
 920
 921         unified = cg_unified_controller(controller);
 922         if (unified < 0)
 923                 return unified;
 924
 925         /* Configure access to the cgroup itself */
 926         r = cg_get_path(controller, path, NULL, &fs);
 927         if (r < 0)
 928                 return r;
 929
 930         r = chmod_and_chown(fs, 0755, uid, gid);
 931         if (r < 0)
 932                 return r;
 933
 934         /* Configure access to the cgroup's attributes */
 935         for (i = attributes[unified]; i->name; i++) {
 936                 fs = mfree(fs);
 937
 938                 r = cg_get_path(controller, path, i->name, &fs);
 939                 if (r < 0)
 940                         return r;
 941
 942                 r = chmod_and_chown(fs, 0644, uid, gid);
 943                 if (r < 0) {
 944                         if (i->fatal)
 945                                 return r;
 946
 947                         log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
 948                 }
 949         }
 950
 951         if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 952                 r = cg_hybrid_unified();
 953                 if (r < 0)
 954                         return r;
 955                 if (r > 0) {
 956                         /* Always propagate access mode from unified to legacy controller */
 957                         r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
 958                         if (r < 0)
 959                                 log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
 960                 }
 961         }
 962
 963         return 0;
 964 }
 965
 966 int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
 967         _cleanup_free_ char *fs = NULL;
 968         int r;
 969
 970         assert(path);
 971         assert(name);
 972         assert(value || size <= 0);
 973
 974         r = cg_get_path(controller, path, NULL, &fs);
 975         if (r < 0)
 976                 return r;
 977
 978         if (setxattr(fs, name, value, size, flags) < 0)
 979                 return -errno;
 980
 981         return 0;
 982 }
 983
 984 int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
 985         _cleanup_free_ char *fs = NULL;
 986         ssize_t n;
 987         int r;
 988
 989         assert(path);
 990         assert(name);
 991
 992         r = cg_get_path(controller, path, NULL, &fs);
 993         if (r < 0)
 994                 return r;
 995
 996         n = getxattr(fs, name, value, size);
 997         if (n < 0)
 998                 return -errno;
 999
1000         return (int) n;
1001 }
1002
1003 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
1004         _cleanup_fclose_ FILE *f = NULL;
1005         char line[LINE_MAX];
1006         const char *fs, *controller_str;
1007         size_t cs = 0;
1008         int unified;
1009
1010         assert(path);
1011         assert(pid >= 0);
1012
1013         if (controller) {
1014                 if (!cg_controller_is_valid(controller))
1015                         return -EINVAL;
1016         } else
1017                 controller = SYSTEMD_CGROUP_CONTROLLER;
1018
1019         unified = cg_unified_controller(controller);
1020         if (unified < 0)
1021                 return unified;
1022         if (unified == 0) {
1023                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
1024                         controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
1025                 else
1026                         controller_str = controller;
1027
1028                 cs = strlen(controller_str);
1029         }
1030
1031         fs = procfs_file_alloca(pid, "cgroup");
1032         f = fopen(fs, "re");
1033         if (!f)
1034                 return errno == ENOENT ? -ESRCH : -errno;
1035
1036         (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
1037
1038         FOREACH_LINE(line, f, return -errno) {
1039                 char *e, *p;
1040
1041                 truncate_nl(line);
1042
1043                 if (unified) {
1044                         e = startswith(line, "0:");
1045                         if (!e)
1046                                 continue;
1047
1048                         e = strchr(e, ':');
1049                         if (!e)
1050                                 continue;
1051                 } else {
1052                         char *l;
1053                         size_t k;
1054                         const char *word, *state;
1055                         bool found = false;
1056
1057                         l = strchr(line, ':');
1058                         if (!l)
1059                                 continue;
1060
1061                         l++;
1062                         e = strchr(l, ':');
1063                         if (!e)
1064                                 continue;
1065
1066                         *e = 0;
1067                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
1068                                 if (k == cs && memcmp(word, controller_str, cs) == 0) {
1069                                         found = true;
1070                                         break;
1071                                 }
1072                         }
1073
1074                         if (!found)
1075                                 continue;
1076                 }
1077
1078                 p = strdup(e + 1);
1079                 if (!p)
1080                         return -ENOMEM;
1081
1082                 /* Truncate suffix indicating the process is a zombie */
1083                 e = endswith(p, " (deleted)");
1084                 if (e)
1085                         *e = 0;
1086
1087                 *path = p;
1088                 return 0;
1089         }
1090
1091         return -ENODATA;
1092 }
1093
1094 int cg_install_release_agent(const char *controller, const char *agent) {
1095         _cleanup_free_ char *fs = NULL, *contents = NULL;
1096         const char *sc;
1097         int r;
1098
1099         assert(agent);
1100
1101         r = cg_unified_controller(controller);
1102         if (r < 0)
1103                 return r;
1104         if (r > 0) /* doesn't apply to unified hierarchy */
1105                 return -EOPNOTSUPP;
1106
1107         r = cg_get_path(controller, NULL, "release_agent", &fs);
1108         if (r < 0)
1109                 return r;
1110
1111         r = read_one_line_file(fs, &contents);
1112         if (r < 0)
1113                 return r;
1114
1115         sc = strstrip(contents);
1116         if (isempty(sc)) {
1117                 r = write_string_file(fs, agent, 0);
1118                 if (r < 0)
1119                         return r;
1120         } else if (!path_equal(sc, agent))
1121                 return -EEXIST;
1122
1123         fs = mfree(fs);
1124         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1125         if (r < 0)
1126                 return r;
1127
1128         contents = mfree(contents);
1129         r = read_one_line_file(fs, &contents);
1130         if (r < 0)
1131                 return r;
1132
1133         sc = strstrip(contents);
1134         if (streq(sc, "0")) {
1135                 r = write_string_file(fs, "1", 0);
1136                 if (r < 0)
1137                         return r;
1138
1139                 return 1;
1140         }
1141
1142         if (!streq(sc, "1"))
1143                 return -EIO;
1144
1145         return 0;
1146 }
1147
1148 int cg_uninstall_release_agent(const char *controller) {
1149         _cleanup_free_ char *fs = NULL;
1150         int r;
1151
1152         r = cg_unified_controller(controller);
1153         if (r < 0)
1154                 return r;
1155         if (r > 0) /* Doesn't apply to unified hierarchy */
1156                 return -EOPNOTSUPP;
1157
1158         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1159         if (r < 0)
1160                 return r;
1161
1162         r = write_string_file(fs, "0", 0);
1163         if (r < 0)
1164                 return r;
1165
1166         fs = mfree(fs);
1167
1168         r = cg_get_path(controller, NULL, "release_agent", &fs);
1169         if (r < 0)
1170                 return r;
1171
1172         r = write_string_file(fs, "", 0);
1173         if (r < 0)
1174                 return r;
1175
1176         return 0;
1177 }
1178
1179 int cg_is_empty(const char *controller, const char *path) {
1180         _cleanup_fclose_ FILE *f = NULL;
1181         pid_t pid;
1182         int r;
1183
1184         assert(path);
1185
1186         r = cg_enumerate_processes(controller, path, &f);
1187         if (r == -ENOENT)
1188                 return 1;
1189         if (r < 0)
1190                 return r;
1191
1192         r = cg_read_pid(f, &pid);
1193         if (r < 0)
1194                 return r;
1195
1196         return r == 0;
1197 }
1198
1199 int cg_is_empty_recursive(const char *controller, const char *path) {
1200         int r;
1201
1202         assert(path);
1203
1204         /* The root cgroup is always populated */
1205         if (controller && (isempty(path) || path_equal(path, "/")))
1206                 return false;
1207
1208         r = cg_unified_controller(controller);
1209         if (r < 0)
1210                 return r;
1211         if (r > 0) {
1212                 _cleanup_free_ char *t = NULL;
1213
1214                 /* On the unified hierarchy we can check empty state
1215                  * via the "populated" attribute of "cgroup.events". */
1216
1217                 r = cg_read_event(controller, path, "populated", &t);
1218                 if (r < 0)
1219                         return r;
1220
1221                 return streq(t, "0");
1222         } else {
1223                 _cleanup_closedir_ DIR *d = NULL;
1224                 char *fn;
1225
1226                 r = cg_is_empty(controller, path);
1227                 if (r <= 0)
1228                         return r;
1229
1230                 r = cg_enumerate_subgroups(controller, path, &d);
1231                 if (r == -ENOENT)
1232                         return 1;
1233                 if (r < 0)
1234                         return r;
1235
1236                 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1237                         _cleanup_free_ char *p = NULL;
1238
1239                         p = strjoin(path, "/", fn);
1240                         free(fn);
1241                         if (!p)
1242                                 return -ENOMEM;
1243
1244                         r = cg_is_empty_recursive(controller, p);
1245                         if (r <= 0)
1246                                 return r;
1247                 }
1248                 if (r < 0)
1249                         return r;
1250
1251                 return true;
1252         }
1253 }
1254
1255 int cg_split_spec(const char *spec, char **controller, char **path) {
1256         char *t = NULL, *u = NULL;
1257         const char *e;
1258
1259         assert(spec);
1260
1261         if (*spec == '/') {
1262                 if (!path_is_normalized(spec))
1263                         return -EINVAL;
1264
1265                 if (path) {
1266                         t = strdup(spec);
1267                         if (!t)
1268                                 return -ENOMEM;
1269
1270                         *path = path_kill_slashes(t);
1271                 }
1272
1273                 if (controller)
1274                         *controller = NULL;
1275
1276                 return 0;
1277         }
1278
1279         e = strchr(spec, ':');
1280         if (!e) {
1281                 if (!cg_controller_is_valid(spec))
1282                         return -EINVAL;
1283
1284                 if (controller) {
1285                         t = strdup(spec);
1286                         if (!t)
1287                                 return -ENOMEM;
1288
1289                         *controller = t;
1290                 }
1291
1292                 if (path)
1293                         *path = NULL;
1294
1295                 return 0;
1296         }
1297
1298         t = strndup(spec, e-spec);
1299         if (!t)
1300                 return -ENOMEM;
1301         if (!cg_controller_is_valid(t)) {
1302                 free(t);
1303                 return -EINVAL;
1304         }
1305
1306         if (isempty(e+1))
1307                 u = NULL;
1308         else {
1309                 u = strdup(e+1);
1310                 if (!u) {
1311                         free(t);
1312                         return -ENOMEM;
1313                 }
1314
1315                 if (!path_is_normalized(u) ||
1316                     !path_is_absolute(u)) {
1317                         free(t);
1318                         free(u);
1319                         return -EINVAL;
1320                 }
1321
1322                 path_kill_slashes(u);
1323         }
1324
1325         if (controller)
1326                 *controller = t;
1327         else
1328                 free(t);
1329
1330         if (path)
1331                 *path = u;
1332         else
1333                 free(u);
1334
1335         return 0;
1336 }
1337
1338 int cg_mangle_path(const char *path, char **result) {
1339         _cleanup_free_ char *c = NULL, *p = NULL;
1340         char *t;
1341         int r;
1342
1343         assert(path);
1344         assert(result);
1345
1346         /* First, check if it already is a filesystem path */
1347         if (path_startswith(path, "/sys/fs/cgroup")) {
1348
1349                 t = strdup(path);
1350                 if (!t)
1351                         return -ENOMEM;
1352
1353                 *result = path_kill_slashes(t);
1354                 return 0;
1355         }
1356
1357         /* Otherwise, treat it as cg spec */
1358         r = cg_split_spec(path, &c, &p);
1359         if (r < 0)
1360                 return r;
1361
1362         return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1363 }
1364
1365 int cg_get_root_path(char **path) {
1366         char *p, *e;
1367         int r;
1368
1369         assert(path);
1370
1371         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1372         if (r < 0)
1373                 return r;
1374
1375         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1376         if (!e)
1377                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1378         if (!e)
1379                 e = endswith(p, "/system"); /* even more legacy */
1380         if (e)
1381                 *e = 0;
1382
1383         *path = p;
1384         return 0;
1385 }
1386
1387 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1388         _cleanup_free_ char *rt = NULL;
1389         char *p;
1390         int r;
1391
1392         assert(cgroup);
1393         assert(shifted);
1394
1395         if (!root) {
1396                 /* If the root was specified let's use that, otherwise
1397                  * let's determine it from PID 1 */
1398
1399                 r = cg_get_root_path(&rt);
1400                 if (r < 0)
1401                         return r;
1402
1403                 root = rt;
1404         }
1405
1406         p = path_startswith(cgroup, root);
1407         if (p && p > cgroup)
1408                 *shifted = p - 1;
1409         else
1410                 *shifted = cgroup;
1411
1412         return 0;
1413 }
1414
1415 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1416         _cleanup_free_ char *raw = NULL;
1417         const char *c;
1418         int r;
1419
1420         assert(pid >= 0);
1421         assert(cgroup);
1422
1423         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1424         if (r < 0)
1425                 return r;
1426
1427         r = cg_shift_path(raw, root, &c);
1428         if (r < 0)
1429                 return r;
1430
1431         if (c == raw) {
1432                 *cgroup = raw;
1433                 raw = NULL;
1434         } else {
1435                 char *n;
1436
1437                 n = strdup(c);
1438                 if (!n)
1439                         return -ENOMEM;
1440
1441                 *cgroup = n;
1442         }
1443
1444         return 0;
1445 }
1446
1447 int cg_path_decode_unit(const char *cgroup, char **unit) {
1448         char *c, *s;
1449         size_t n;
1450
1451         assert(cgroup);
1452         assert(unit);
1453
1454         n = strcspn(cgroup, "/");
1455         if (n < 3)
1456                 return -ENXIO;
1457
1458         c = strndupa(cgroup, n);
1459         c = cg_unescape(c);
1460
1461         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1462                 return -ENXIO;
1463
1464         s = strdup(c);
1465         if (!s)
1466                 return -ENOMEM;
1467
1468         *unit = s;
1469         return 0;
1470 }
1471
1472 static bool valid_slice_name(const char *p, size_t n) {
1473
1474         if (!p)
1475                 return false;
1476
1477         if (n < STRLEN("x.slice"))
1478                 return false;
1479
1480         if (memcmp(p + n - 6, ".slice", 6) == 0) {
1481                 char buf[n+1], *c;
1482
1483                 memcpy(buf, p, n);
1484                 buf[n] = 0;
1485
1486                 c = cg_unescape(buf);
1487
1488                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1489         }
1490
1491         return false;
1492 }
1493
1494 static const char *skip_slices(const char *p) {
1495         assert(p);
1496
1497         /* Skips over all slice assignments */
1498
1499         for (;;) {
1500                 size_t n;
1501
1502                 p += strspn(p, "/");
1503
1504                 n = strcspn(p, "/");
1505                 if (!valid_slice_name(p, n))
1506                         return p;
1507
1508                 p += n;
1509         }
1510 }
1511
1512 int cg_path_get_unit(const char *path, char **ret) {
1513         const char *e;
1514         char *unit;
1515         int r;
1516
1517         assert(path);
1518         assert(ret);
1519
1520         e = skip_slices(path);
1521
1522         r = cg_path_decode_unit(e, &unit);
1523         if (r < 0)
1524                 return r;
1525
1526         /* We skipped over the slices, don't accept any now */
1527         if (endswith(unit, ".slice")) {
1528                 free(unit);
1529                 return -ENXIO;
1530         }
1531
1532         *ret = unit;
1533         return 0;
1534 }
1535
1536 int cg_pid_get_unit(pid_t pid, char **unit) {
1537         _cleanup_free_ char *cgroup = NULL;
1538         int r;
1539
1540         assert(unit);
1541
1542         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1543         if (r < 0)
1544                 return r;
1545
1546         return cg_path_get_unit(cgroup, unit);
1547 }
1548
1549 /**
1550  * Skip session-*.scope, but require it to be there.
1551  */
1552 static const char *skip_session(const char *p) {
1553         size_t n;
1554
1555         if (isempty(p))
1556                 return NULL;
1557
1558         p += strspn(p, "/");
1559
1560         n = strcspn(p, "/");
1561         if (n < STRLEN("session-x.scope"))
1562                 return NULL;
1563
1564         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1565                 char buf[n - 8 - 6 + 1];
1566
1567                 memcpy(buf, p + 8, n - 8 - 6);
1568                 buf[n - 8 - 6] = 0;
1569
1570                 /* Note that session scopes never need unescaping,
1571                  * since they cannot conflict with the kernel's own
1572                  * names, hence we don't need to call cg_unescape()
1573                  * here. */
1574
1575                 if (!session_id_valid(buf))
1576                         return false;
1577
1578                 p += n;
1579                 p += strspn(p, "/");
1580                 return p;
1581         }
1582
1583         return NULL;
1584 }
1585
1586 /**
1587  * Skip user@*.service, but require it to be there.
1588  */
1589 static const char *skip_user_manager(const char *p) {
1590         size_t n;
1591
1592         if (isempty(p))
1593                 return NULL;
1594
1595         p += strspn(p, "/");
1596
1597         n = strcspn(p, "/");
1598         if (n < STRLEN("user@x.service"))
1599                 return NULL;
1600
1601         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1602                 char buf[n - 5 - 8 + 1];
1603
1604                 memcpy(buf, p + 5, n - 5 - 8);
1605                 buf[n - 5 - 8] = 0;
1606
1607                 /* Note that user manager services never need unescaping,
1608                  * since they cannot conflict with the kernel's own
1609                  * names, hence we don't need to call cg_unescape()
1610                  * here. */
1611
1612                 if (parse_uid(buf, NULL) < 0)
1613                         return NULL;
1614
1615                 p += n;
1616                 p += strspn(p, "/");
1617
1618                 return p;
1619         }
1620
1621         return NULL;
1622 }
1623
1624 static const char *skip_user_prefix(const char *path) {
1625         const char *e, *t;
1626
1627         assert(path);
1628
1629         /* Skip slices, if there are any */
1630         e = skip_slices(path);
1631
1632         /* Skip the user manager, if it's in the path now... */
1633         t = skip_user_manager(e);
1634         if (t)
1635                 return t;
1636
1637         /* Alternatively skip the user session if it is in the path... */
1638         return skip_session(e);
1639 }
1640
1641 int cg_path_get_user_unit(const char *path, char **ret) {
1642         const char *t;
1643
1644         assert(path);
1645         assert(ret);
1646
1647         t = skip_user_prefix(path);
1648         if (!t)
1649                 return -ENXIO;
1650
1651         /* And from here on it looks pretty much the same as for a
1652          * system unit, hence let's use the same parser from here
1653          * on. */
1654         return cg_path_get_unit(t, ret);
1655 }
1656
1657 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1658         _cleanup_free_ char *cgroup = NULL;
1659         int r;
1660
1661         assert(unit);
1662
1663         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1664         if (r < 0)
1665                 return r;
1666
1667         return cg_path_get_user_unit(cgroup, unit);
1668 }
1669
1670 int cg_path_get_machine_name(const char *path, char **machine) {
1671         _cleanup_free_ char *u = NULL;
1672         const char *sl;
1673         int r;
1674
1675         r = cg_path_get_unit(path, &u);
1676         if (r < 0)
1677                 return r;
1678
1679         sl = strjoina("/run/systemd/machines/unit:", u);
1680         return readlink_malloc(sl, machine);
1681 }
1682
1683 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1684         _cleanup_free_ char *cgroup = NULL;
1685         int r;
1686
1687         assert(machine);
1688
1689         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1690         if (r < 0)
1691                 return r;
1692
1693         return cg_path_get_machine_name(cgroup, machine);
1694 }
1695
1696 int cg_path_get_session(const char *path, char **session) {
1697         _cleanup_free_ char *unit = NULL;
1698         char *start, *end;
1699         int r;
1700
1701         assert(path);
1702
1703         r = cg_path_get_unit(path, &unit);
1704         if (r < 0)
1705                 return r;
1706
1707         start = startswith(unit, "session-");
1708         if (!start)
1709                 return -ENXIO;
1710         end = endswith(start, ".scope");
1711         if (!end)
1712                 return -ENXIO;
1713
1714         *end = 0;
1715         if (!session_id_valid(start))
1716                 return -ENXIO;
1717
1718         if (session) {
1719                 char *rr;
1720
1721                 rr = strdup(start);
1722                 if (!rr)
1723                         return -ENOMEM;
1724
1725                 *session = rr;
1726         }
1727
1728         return 0;
1729 }
1730
1731 int cg_pid_get_session(pid_t pid, char **session) {
1732         _cleanup_free_ char *cgroup = NULL;
1733         int r;
1734
1735         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1736         if (r < 0)
1737                 return r;
1738
1739         return cg_path_get_session(cgroup, session);
1740 }
1741
1742 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1743         _cleanup_free_ char *slice = NULL;
1744         char *start, *end;
1745         int r;
1746
1747         assert(path);
1748
1749         r = cg_path_get_slice(path, &slice);
1750         if (r < 0)
1751                 return r;
1752
1753         start = startswith(slice, "user-");
1754         if (!start)
1755                 return -ENXIO;
1756         end = endswith(start, ".slice");
1757         if (!end)
1758                 return -ENXIO;
1759
1760         *end = 0;
1761         if (parse_uid(start, uid) < 0)
1762                 return -ENXIO;
1763
1764         return 0;
1765 }
1766
1767 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1768         _cleanup_free_ char *cgroup = NULL;
1769         int r;
1770
1771         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1772         if (r < 0)
1773                 return r;
1774
1775         return cg_path_get_owner_uid(cgroup, uid);
1776 }
1777
1778 int cg_path_get_slice(const char *p, char **slice) {
1779         const char *e = NULL;
1780
1781         assert(p);
1782         assert(slice);
1783
1784         /* Finds the right-most slice unit from the beginning, but
1785          * stops before we come to the first non-slice unit. */
1786
1787         for (;;) {
1788                 size_t n;
1789
1790                 p += strspn(p, "/");
1791
1792                 n = strcspn(p, "/");
1793                 if (!valid_slice_name(p, n)) {
1794
1795                         if (!e) {
1796                                 char *s;
1797
1798                                 s = strdup(SPECIAL_ROOT_SLICE);
1799                                 if (!s)
1800                                         return -ENOMEM;
1801
1802                                 *slice = s;
1803                                 return 0;
1804                         }
1805
1806                         return cg_path_decode_unit(e, slice);
1807                 }
1808
1809                 e = p;
1810                 p += n;
1811         }
1812 }
1813
1814 int cg_pid_get_slice(pid_t pid, char **slice) {
1815         _cleanup_free_ char *cgroup = NULL;
1816         int r;
1817
1818         assert(slice);
1819
1820         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1821         if (r < 0)
1822                 return r;
1823
1824         return cg_path_get_slice(cgroup, slice);
1825 }
1826
1827 int cg_path_get_user_slice(const char *p, char **slice) {
1828         const char *t;
1829         assert(p);
1830         assert(slice);
1831
1832         t = skip_user_prefix(p);
1833         if (!t)
1834                 return -ENXIO;
1835
1836         /* And now it looks pretty much the same as for a system
1837          * slice, so let's just use the same parser from here on. */
1838         return cg_path_get_slice(t, slice);
1839 }
1840
1841 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1842         _cleanup_free_ char *cgroup = NULL;
1843         int r;
1844
1845         assert(slice);
1846
1847         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1848         if (r < 0)
1849                 return r;
1850
1851         return cg_path_get_user_slice(cgroup, slice);
1852 }
1853
1854 char *cg_escape(const char *p) {
1855         bool need_prefix = false;
1856
1857         /* This implements very minimal escaping for names to be used
1858          * as file names in the cgroup tree: any name which might
1859          * conflict with a kernel name or is prefixed with '_' is
1860          * prefixed with a '_'. That way, when reading cgroup names it
1861          * is sufficient to remove a single prefixing underscore if
1862          * there is one. */
1863
1864         /* The return value of this function (unlike cg_unescape())
1865          * needs free()! */
1866
1867         if (IN_SET(p[0], 0, '_', '.') ||
1868             streq(p, "notify_on_release") ||
1869             streq(p, "release_agent") ||
1870             streq(p, "tasks") ||
1871             startswith(p, "cgroup."))
1872                 need_prefix = true;
1873         else {
1874                 const char *dot;
1875
1876                 dot = strrchr(p, '.');
1877                 if (dot) {
1878                         CGroupController c;
1879                         size_t l = dot - p;
1880
1881                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1882                                 const char *n;
1883
1884                                 n = cgroup_controller_to_string(c);
1885
1886                                 if (l != strlen(n))
1887                                         continue;
1888
1889                                 if (memcmp(p, n, l) != 0)
1890                                         continue;
1891
1892                                 need_prefix = true;
1893                                 break;
1894                         }
1895                 }
1896         }
1897
1898         if (need_prefix)
1899                 return strappend("_", p);
1900
1901         return strdup(p);
1902 }
1903
1904 char *cg_unescape(const char *p) {
1905         assert(p);
1906
1907         /* The return value of this function (unlike cg_escape())
1908          * doesn't need free()! */
1909
1910         if (p[0] == '_')
1911                 return (char*) p+1;
1912
1913         return (char*) p;
1914 }
1915
1916 #define CONTROLLER_VALID                        \
1917         DIGITS LETTERS                          \
1918         "_"
1919
1920 bool cg_controller_is_valid(const char *p) {
1921         const char *t, *s;
1922
1923         if (!p)
1924                 return false;
1925
1926         if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
1927                 return true;
1928
1929         s = startswith(p, "name=");
1930         if (s)
1931                 p = s;
1932
1933         if (IN_SET(*p, 0, '_'))
1934                 return false;
1935
1936         for (t = p; *t; t++)
1937                 if (!strchr(CONTROLLER_VALID, *t))
1938                         return false;
1939
1940         if (t - p > FILENAME_MAX)
1941                 return false;
1942
1943         return true;
1944 }
1945
1946 int cg_slice_to_path(const char *unit, char **ret) {
1947         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1948         const char *dash;
1949         int r;
1950
1951         assert(unit);
1952         assert(ret);
1953
1954         if (streq(unit, SPECIAL_ROOT_SLICE)) {
1955                 char *x;
1956
1957                 x = strdup("");
1958                 if (!x)
1959                         return -ENOMEM;
1960                 *ret = x;
1961                 return 0;
1962         }
1963
1964         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1965                 return -EINVAL;
1966
1967         if (!endswith(unit, ".slice"))
1968                 return -EINVAL;
1969
1970         r = unit_name_to_prefix(unit, &p);
1971         if (r < 0)
1972                 return r;
1973
1974         dash = strchr(p, '-');
1975
1976         /* Don't allow initial dashes */
1977         if (dash == p)
1978                 return -EINVAL;
1979
1980         while (dash) {
1981                 _cleanup_free_ char *escaped = NULL;
1982                 char n[dash - p + sizeof(".slice")];
1983
1984                 /* Don't allow trailing or double dashes */
1985                 if (IN_SET(dash[1], 0, '-'))
1986                         return -EINVAL;
1987
1988                 strcpy(stpncpy(n, p, dash - p), ".slice");
1989                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1990                         return -EINVAL;
1991
1992                 escaped = cg_escape(n);
1993                 if (!escaped)
1994                         return -ENOMEM;
1995
1996                 if (!strextend(&s, escaped, "/", NULL))
1997                         return -ENOMEM;
1998
1999                 dash = strchr(dash+1, '-');
2000         }
2001
2002         e = cg_escape(unit);
2003         if (!e)
2004                 return -ENOMEM;
2005
2006         if (!strextend(&s, e, NULL))
2007                 return -ENOMEM;
2008
2009         *ret = s;
2010         s = NULL;
2011
2012         return 0;
2013 }
2014
2015 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
2016         _cleanup_free_ char *p = NULL;
2017         int r;
2018
2019         r = cg_get_path(controller, path, attribute, &p);
2020         if (r < 0)
2021                 return r;
2022
2023         return write_string_file(p, value, 0);
2024 }
2025
2026 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
2027         _cleanup_free_ char *p = NULL;
2028         int r;
2029
2030         r = cg_get_path(controller, path, attribute, &p);
2031         if (r < 0)
2032                 return r;
2033
2034         return read_one_line_file(p, ret);
2035 }
2036
2037 int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values) {
2038         _cleanup_free_ char *filename = NULL, *content = NULL;
2039         char *line, *p;
2040         int i, r;
2041
2042         for (i = 0; keys[i]; i++)
2043                 values[i] = NULL;
2044
2045         r = cg_get_path(controller, path, attribute, &filename);
2046         if (r < 0)
2047                 return r;
2048
2049         r = read_full_file(filename, &content, NULL);
2050         if (r < 0)
2051                 return r;
2052
2053         p = content;
2054         while ((line = strsep(&p, "\n"))) {
2055                 char *key;
2056
2057                 key = strsep(&line, " ");
2058
2059                 for (i = 0; keys[i]; i++) {
2060                         if (streq(key, keys[i])) {
2061                                 values[i] = strdup(line);
2062                                 break;
2063                         }
2064                 }
2065         }
2066
2067         for (i = 0; keys[i]; i++) {
2068                 if (!values[i]) {
2069                         for (i = 0; keys[i]; i++) {
2070                                 values[i] = mfree(values[i]);
2071                         }
2072                         return -ENOENT;
2073                 }
2074         }
2075
2076         return 0;
2077 }
2078
2079 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
2080         CGroupController c;
2081         int r;
2082
2083         /* This one will create a cgroup in our private tree, but also
2084          * duplicate it in the trees specified in mask, and remove it
2085          * in all others */
2086
2087         /* First create the cgroup in our own hierarchy. */
2088         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
2089         if (r < 0)
2090                 return r;
2091
2092         /* If we are in the unified hierarchy, we are done now */
2093         r = cg_all_unified();
2094         if (r < 0)
2095                 return r;
2096         if (r > 0)
2097                 return 0;
2098
2099         /* Otherwise, do the same in the other hierarchies */
2100         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2101                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2102                 const char *n;
2103
2104                 n = cgroup_controller_to_string(c);
2105
2106                 if (mask & bit)
2107                         (void) cg_create(n, path);
2108                 else if (supported & bit)
2109                         (void) cg_trim(n, path, true);
2110         }
2111
2112         return 0;
2113 }
2114
2115 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
2116         CGroupController c;
2117         int r;
2118
2119         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
2120         if (r < 0)
2121                 return r;
2122
2123         r = cg_all_unified();
2124         if (r < 0)
2125                 return r;
2126         if (r > 0)
2127                 return 0;
2128
2129         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2130                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2131                 const char *p = NULL;
2132
2133                 if (!(supported & bit))
2134                         continue;
2135
2136                 if (path_callback)
2137                         p = path_callback(bit, userdata);
2138
2139                 if (!p)
2140                         p = path;
2141
2142                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
2143         }
2144
2145         return 0;
2146 }
2147
2148 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
2149         Iterator i;
2150         void *pidp;
2151         int r = 0;
2152
2153         SET_FOREACH(pidp, pids, i) {
2154                 pid_t pid = PTR_TO_PID(pidp);
2155                 int q;
2156
2157                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
2158                 if (q < 0 && r >= 0)
2159                         r = q;
2160         }
2161
2162         return r;
2163 }
2164
2165 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
2166         CGroupController c;
2167         int r = 0, q;
2168
2169         if (!path_equal(from, to))  {
2170                 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
2171                 if (r < 0)
2172                         return r;
2173         }
2174
2175         q = cg_all_unified();
2176         if (q < 0)
2177                 return q;
2178         if (q > 0)
2179                 return r;
2180
2181         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2182                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2183                 const char *p = NULL;
2184
2185                 if (!(supported & bit))
2186                         continue;
2187
2188                 if (to_callback)
2189                         p = to_callback(bit, userdata);
2190
2191                 if (!p)
2192                         p = to;
2193
2194                 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
2195         }
2196
2197         return 0;
2198 }
2199
2200 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2201         CGroupController c;
2202         int r, q;
2203
2204         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2205         if (r < 0)
2206                 return r;
2207
2208         q = cg_all_unified();
2209         if (q < 0)
2210                 return q;
2211         if (q > 0)
2212                 return r;
2213
2214         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2215                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2216
2217                 if (!(supported & bit))
2218                         continue;
2219
2220                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
2221         }
2222
2223         return 0;
2224 }
2225
2226 int cg_mask_to_string(CGroupMask mask, char **ret) {
2227         _cleanup_free_ char *s = NULL;
2228         size_t n = 0, allocated = 0;
2229         bool space = false;
2230         CGroupController c;
2231
2232         assert(ret);
2233
2234         if (mask == 0) {
2235                 *ret = NULL;
2236                 return 0;
2237         }
2238
2239         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2240                 const char *k;
2241                 size_t l;
2242
2243                 if (!(mask & CGROUP_CONTROLLER_TO_MASK(c)))
2244                         continue;
2245
2246                 k = cgroup_controller_to_string(c);
2247                 l = strlen(k);
2248
2249                 if (!GREEDY_REALLOC(s, allocated, n + space + l + 1))
2250                         return -ENOMEM;
2251
2252                 if (space)
2253                         s[n] = ' ';
2254                 memcpy(s + n + space, k, l);
2255                 n += space + l;
2256
2257                 space = true;
2258         }
2259
2260         assert(s);
2261
2262         s[n] = 0;
2263         *ret = s;
2264         s = NULL;
2265
2266         return 0;
2267 }
2268
2269 int cg_mask_from_string(const char *value, CGroupMask *mask) {
2270         assert(mask);
2271         assert(value);
2272
2273         for (;;) {
2274                 _cleanup_free_ char *n = NULL;
2275                 CGroupController v;
2276                 int r;
2277
2278                 r = extract_first_word(&value, &n, NULL, 0);
2279                 if (r < 0)
2280                         return r;
2281                 if (r == 0)
2282                         break;
2283
2284                 v = cgroup_controller_from_string(n);
2285                 if (v < 0)
2286                         continue;
2287
2288                 *mask |= CGROUP_CONTROLLER_TO_MASK(v);
2289         }
2290         return 0;
2291 }
2292
2293 int cg_mask_supported(CGroupMask *ret) {
2294         CGroupMask mask = 0;
2295         int r;
2296
2297         /* Determines the mask of supported cgroup controllers. Only
2298          * includes controllers we can make sense of and that are
2299          * actually accessible. */
2300
2301         r = cg_all_unified();
2302         if (r < 0)
2303                 return r;
2304         if (r > 0) {
2305                 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2306
2307                 /* In the unified hierarchy we can read the supported
2308                  * and accessible controllers from a the top-level
2309                  * cgroup attribute */
2310
2311                 r = cg_get_root_path(&root);
2312                 if (r < 0)
2313                         return r;
2314
2315                 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2316                 if (r < 0)
2317                         return r;
2318
2319                 r = read_one_line_file(path, &controllers);
2320                 if (r < 0)
2321                         return r;
2322
2323                 r = cg_mask_from_string(controllers, &mask);
2324                 if (r < 0)
2325                         return r;
2326
2327                 /* Currently, we support the cpu, memory, io and pids
2328                  * controller in the unified hierarchy, mask
2329                  * everything else off. */
2330                 mask &= CGROUP_MASK_CPU | CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
2331
2332         } else {
2333                 CGroupController c;
2334
2335                 /* In the legacy hierarchy, we check whether which
2336                  * hierarchies are mounted. */
2337
2338                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2339                         const char *n;
2340
2341                         n = cgroup_controller_to_string(c);
2342                         if (controller_is_accessible(n) >= 0)
2343                                 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2344                 }
2345         }
2346
2347         *ret = mask;
2348         return 0;
2349 }
2350
2351 int cg_kernel_controllers(Set **ret) {
2352         _cleanup_set_free_free_ Set *controllers = NULL;
2353         _cleanup_fclose_ FILE *f = NULL;
2354         int r;
2355
2356         assert(ret);
2357
2358         /* Determines the full list of kernel-known controllers. Might
2359          * include controllers we don't actually support, arbitrary
2360          * named hierarchies and controllers that aren't currently
2361          * accessible (because not mounted). */
2362
2363         controllers = set_new(&string_hash_ops);
2364         if (!controllers)
2365                 return -ENOMEM;
2366
2367         f = fopen("/proc/cgroups", "re");
2368         if (!f) {
2369                 if (errno == ENOENT) {
2370                         *ret = NULL;
2371                         return 0;
2372                 }
2373
2374                 return -errno;
2375         }
2376
2377         (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
2378
2379         /* Ignore the header line */
2380         (void) read_line(f, (size_t) -1, NULL);
2381
2382         for (;;) {
2383                 char *controller;
2384                 int enabled = 0;
2385
2386                 errno = 0;
2387                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2388
2389                         if (feof(f))
2390                                 break;
2391
2392                         if (ferror(f) && errno > 0)
2393                                 return -errno;
2394
2395                         return -EBADMSG;
2396                 }
2397
2398                 if (!enabled) {
2399                         free(controller);
2400                         continue;
2401                 }
2402
2403                 if (!cg_controller_is_valid(controller)) {
2404                         free(controller);
2405                         return -EBADMSG;
2406                 }
2407
2408                 r = set_consume(controllers, controller);
2409                 if (r < 0)
2410                         return r;
2411         }
2412
2413         *ret = controllers;
2414         controllers = NULL;
2415
2416         return 0;
2417 }
2418
2419 static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
2420
2421 /* The hybrid mode was initially implemented in v232 and simply mounted cgroup v2 on /sys/fs/cgroup/systemd.  This
2422  * unfortunately broke other tools (such as docker) which expected the v1 "name=systemd" hierarchy on
2423  * /sys/fs/cgroup/systemd.  From v233 and on, the hybrid mode mountnbs v2 on /sys/fs/cgroup/unified and maintains
2424  * "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility with other tools.
2425  *
2426  * To keep live upgrade working, we detect and support v232 layout.  When v232 layout is detected, to keep cgroup v2
2427  * process management but disable the compat dual layout, we return %true on
2428  * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
2429  */
2430 static thread_local bool unified_systemd_v232;
2431
2432 static int cg_unified_update(void) {
2433
2434         struct statfs fs;
2435
2436         /* Checks if we support the unified hierarchy. Returns an
2437          * error when the cgroup hierarchies aren't mounted yet or we
2438          * have any other trouble determining if the unified hierarchy
2439          * is supported. */
2440
2441         if (unified_cache >= CGROUP_UNIFIED_NONE)
2442                 return 0;
2443
2444         if (statfs("/sys/fs/cgroup/", &fs) < 0)
2445                 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\" failed: %m");
2446
2447         if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2448                 log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
2449                 unified_cache = CGROUP_UNIFIED_ALL;
2450         } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2451                 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
2452                     F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2453                         log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
2454                         unified_cache = CGROUP_UNIFIED_SYSTEMD;
2455                         unified_systemd_v232 = false;
2456                 } else {
2457                         if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
2458                                 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
2459
2460                         if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2461                                 log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
2462                                 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2463                                 unified_systemd_v232 = true;
2464                         } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
2465                                 log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
2466                                 unified_cache = CGROUP_UNIFIED_NONE;
2467                         } else {
2468                                 log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
2469                                           (unsigned long long) fs.f_type);
2470                                 unified_cache = CGROUP_UNIFIED_NONE;
2471                         }
2472                 }
2473         } else {
2474                 log_debug("Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2475                           (unsigned long long) fs.f_type);
2476                 return -ENOMEDIUM;
2477         }
2478
2479         return 0;
2480 }
2481
2482 int cg_unified_controller(const char *controller) {
2483         int r;
2484
2485         r = cg_unified_update();
2486         if (r < 0)
2487                 return r;
2488
2489         if (unified_cache == CGROUP_UNIFIED_NONE)
2490                 return false;
2491
2492         if (unified_cache >= CGROUP_UNIFIED_ALL)
2493                 return true;
2494
2495         return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
2496 }
2497
2498 int cg_all_unified(void) {
2499         int r;
2500
2501         r = cg_unified_update();
2502         if (r < 0)
2503                 return r;
2504
2505         return unified_cache >= CGROUP_UNIFIED_ALL;
2506 }
2507
2508 int cg_hybrid_unified(void) {
2509         int r;
2510
2511         r = cg_unified_update();
2512         if (r < 0)
2513                 return r;
2514
2515         return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
2516 }
2517
2518 int cg_unified_flush(void) {
2519         unified_cache = CGROUP_UNIFIED_UNKNOWN;
2520
2521         return cg_unified_update();
2522 }
2523
2524 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2525         _cleanup_fclose_ FILE *f = NULL;
2526         _cleanup_free_ char *fs = NULL;
2527         CGroupController c;
2528         int r;
2529
2530         assert(p);
2531
2532         if (supported == 0)
2533                 return 0;
2534
2535         r = cg_all_unified();
2536         if (r < 0)
2537                 return r;
2538         if (r == 0) /* on the legacy hiearchy there's no joining of controllers defined */
2539                 return 0;
2540
2541         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2542         if (r < 0)
2543                 return r;
2544
2545         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2546                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2547                 const char *n;
2548
2549                 if (!(supported & bit))
2550                         continue;
2551
2552                 n = cgroup_controller_to_string(c);
2553                 {
2554                         char s[1 + strlen(n) + 1];
2555
2556                         s[0] = mask & bit ? '+' : '-';
2557                         strcpy(s + 1, n);
2558
2559                         if (!f) {
2560                                 f = fopen(fs, "we");
2561                                 if (!f) {
2562                                         log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
2563                                         break;
2564                                 }
2565                         }
2566
2567                         r = write_string_stream(f, s, 0);
2568                         if (r < 0)
2569                                 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2570                 }
2571         }
2572
2573         return 0;
2574 }
2575
2576 bool cg_is_unified_wanted(void) {
2577         static thread_local int wanted = -1;
2578         int r;
2579         bool b;
2580         const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
2581
2582         /* If we have a cached value, return that. */
2583         if (wanted >= 0)
2584                 return wanted;
2585
2586         /* If the hierarchy is already mounted, then follow whatever
2587          * was chosen for it. */
2588         if (cg_unified_flush() >= 0)
2589                 return (wanted = unified_cache >= CGROUP_UNIFIED_ALL);
2590
2591         /* Otherwise, let's see what the kernel command line has to say.
2592          * Since checking is expensive, cache a non-error result. */
2593         r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
2594
2595         return (wanted = r > 0 ? b : is_default);
2596 }
2597
2598 bool cg_is_legacy_wanted(void) {
2599         static thread_local int wanted = -1;
2600
2601         /* If we have a cached value, return that. */
2602         if (wanted >= 0)
2603                 return wanted;
2604
2605         /* Check if we have cgroups2 already mounted. */
2606         if (cg_unified_flush() >= 0 &&
2607             unified_cache == CGROUP_UNIFIED_ALL)
2608                 return (wanted = false);
2609
2610         /* Otherwise, assume that at least partial legacy is wanted,
2611          * since cgroups2 should already be mounted at this point. */
2612         return (wanted = true);
2613 }
2614
2615 bool cg_is_hybrid_wanted(void) {
2616         static thread_local int wanted = -1;
2617         int r;
2618         bool b;
2619         const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
2620         /* We default to true if the default is "hybrid", obviously,
2621          * but also when the default is "unified", because if we get
2622          * called, it means that unified hierarchy was not mounted. */
2623
2624         /* If we have a cached value, return that. */
2625         if (wanted >= 0)
2626                 return wanted;
2627
2628         /* If the hierarchy is already mounted, then follow whatever
2629          * was chosen for it. */
2630         if (cg_unified_flush() >= 0 &&
2631             unified_cache == CGROUP_UNIFIED_ALL)
2632                 return (wanted = false);
2633
2634         /* Otherwise, let's see what the kernel command line has to say.
2635          * Since checking is expensive, cache a non-error result. */
2636         r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
2637
2638         /* The meaning of the kernel option is reversed wrt. to the return value
2639          * of this function, hence the negation. */
2640         return (wanted = r > 0 ? !b : is_default);
2641 }
2642
2643 int cg_weight_parse(const char *s, uint64_t *ret) {
2644         uint64_t u;
2645         int r;
2646
2647         if (isempty(s)) {
2648                 *ret = CGROUP_WEIGHT_INVALID;
2649                 return 0;
2650         }
2651
2652         r = safe_atou64(s, &u);
2653         if (r < 0)
2654                 return r;
2655
2656         if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2657                 return -ERANGE;
2658
2659         *ret = u;
2660         return 0;
2661 }
2662
2663 const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2664         [CGROUP_IO_RBPS_MAX]    = CGROUP_LIMIT_MAX,
2665         [CGROUP_IO_WBPS_MAX]    = CGROUP_LIMIT_MAX,
2666         [CGROUP_IO_RIOPS_MAX]   = CGROUP_LIMIT_MAX,
2667         [CGROUP_IO_WIOPS_MAX]   = CGROUP_LIMIT_MAX,
2668 };
2669
2670 static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2671         [CGROUP_IO_RBPS_MAX]    = "IOReadBandwidthMax",
2672         [CGROUP_IO_WBPS_MAX]    = "IOWriteBandwidthMax",
2673         [CGROUP_IO_RIOPS_MAX]   = "IOReadIOPSMax",
2674         [CGROUP_IO_WIOPS_MAX]   = "IOWriteIOPSMax",
2675 };
2676
2677 DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2678
2679 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2680         uint64_t u;
2681         int r;
2682
2683         if (isempty(s)) {
2684                 *ret = CGROUP_CPU_SHARES_INVALID;
2685                 return 0;
2686         }
2687
2688         r = safe_atou64(s, &u);
2689         if (r < 0)
2690                 return r;
2691
2692         if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2693                 return -ERANGE;
2694
2695         *ret = u;
2696         return 0;
2697 }
2698
2699 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2700         uint64_t u;
2701         int r;
2702
2703         if (isempty(s)) {
2704                 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2705                 return 0;
2706         }
2707
2708         r = safe_atou64(s, &u);
2709         if (r < 0)
2710                 return r;
2711
2712         if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2713                 return -ERANGE;
2714
2715         *ret = u;
2716         return 0;
2717 }
2718
2719 bool is_cgroup_fs(const struct statfs *s) {
2720         return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2721                is_fs_type(s, CGROUP2_SUPER_MAGIC);
2722 }
2723
2724 bool fd_is_cgroup_fs(int fd) {
2725         struct statfs s;
2726
2727         if (fstatfs(fd, &s) < 0)
2728                 return -errno;
2729
2730         return is_cgroup_fs(&s);
2731 }
2732
2733 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2734         [CGROUP_CONTROLLER_CPU] = "cpu",
2735         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2736         [CGROUP_CONTROLLER_IO] = "io",
2737         [CGROUP_CONTROLLER_BLKIO] = "blkio",
2738         [CGROUP_CONTROLLER_MEMORY] = "memory",
2739         [CGROUP_CONTROLLER_DEVICES] = "devices",
2740         [CGROUP_CONTROLLER_PIDS] = "pids",
2741 };
2742
2743 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);