src/basic/cgroup-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2010 Lennart Poettering
   6
   7   systemd is free software; you can redistribute it and/or modify it
   8   under the terms of the GNU Lesser General Public License as published by
   9   the Free Software Foundation; either version 2.1 of the License, or
  10   (at your option) any later version.
  11
  12   systemd is distributed in the hope that it will be useful, but
  13   WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15   Lesser General Public License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public License
  18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  19 ***/
  20
  21 #include <dirent.h>
  22 #include <errno.h>
  23 #include <ftw.h>
  24 #include <limits.h>
  25 #include <signal.h>
  26 #include <stddef.h>
  27 #include <stdio_ext.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30 #include <sys/stat.h>
  31 #include <sys/statfs.h>
  32 #include <sys/types.h>
  33 #include <sys/xattr.h>
  34 #include <unistd.h>
  35
  36 #include "alloc-util.h"
  37 #include "cgroup-util.h"
  38 #include "def.h"
  39 #include "dirent-util.h"
  40 #include "extract-word.h"
  41 #include "fd-util.h"
  42 #include "fileio.h"
  43 #include "format-util.h"
  44 #include "fs-util.h"
  45 #include "log.h"
  46 #include "login-util.h"
  47 #include "macro.h"
  48 #include "missing.h"
  49 #include "mkdir.h"
  50 #include "parse-util.h"
  51 #include "path-util.h"
  52 #include "proc-cmdline.h"
  53 #include "process-util.h"
  54 #include "set.h"
  55 #include "special.h"
  56 #include "stat-util.h"
  57 #include "stdio-util.h"
  58 #include "string-table.h"
  59 #include "string-util.h"
  60 #include "strv.h"
  61 #include "unit-name.h"
  62 #include "user-util.h"
  63
  64 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
  65         _cleanup_free_ char *fs = NULL;
  66         FILE *f;
  67         int r;
  68
  69         assert(_f);
  70
  71         r = cg_get_path(controller, path, "cgroup.procs", &fs);
  72         if (r < 0)
  73                 return r;
  74
  75         f = fopen(fs, "re");
  76         if (!f)
  77                 return -errno;
  78
  79         *_f = f;
  80         return 0;
  81 }
  82
  83 int cg_read_pid(FILE *f, pid_t *_pid) {
  84         unsigned long ul;
  85
  86         /* Note that the cgroup.procs might contain duplicates! See
  87          * cgroups.txt for details. */
  88
  89         assert(f);
  90         assert(_pid);
  91
  92         errno = 0;
  93         if (fscanf(f, "%lu", &ul) != 1) {
  94
  95                 if (feof(f))
  96                         return 0;
  97
  98                 return errno > 0 ? -errno : -EIO;
  99         }
 100
 101         if (ul <= 0)
 102                 return -EIO;
 103
 104         *_pid = (pid_t) ul;
 105         return 1;
 106 }
 107
 108 int cg_read_event(
 109                 const char *controller,
 110                 const char *path,
 111                 const char *event,
 112                 char **val) {
 113
 114         _cleanup_free_ char *events = NULL, *content = NULL;
 115         char *p, *line;
 116         int r;
 117
 118         r = cg_get_path(controller, path, "cgroup.events", &events);
 119         if (r < 0)
 120                 return r;
 121
 122         r = read_full_file(events, &content, NULL);
 123         if (r < 0)
 124                 return r;
 125
 126         p = content;
 127         while ((line = strsep(&p, "\n"))) {
 128                 char *key;
 129
 130                 key = strsep(&line, " ");
 131                 if (!key || !line)
 132                         return -EINVAL;
 133
 134                 if (strcmp(key, event))
 135                         continue;
 136
 137                 *val = strdup(line);
 138                 return 0;
 139         }
 140
 141         return -ENOENT;
 142 }
 143
 144 bool cg_ns_supported(void) {
 145         static thread_local int enabled = -1;
 146
 147         if (enabled >= 0)
 148                 return enabled;
 149
 150         if (access("/proc/self/ns/cgroup", F_OK) == 0)
 151                 enabled = 1;
 152         else
 153                 enabled = 0;
 154
 155         return enabled;
 156 }
 157
 158 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
 159         _cleanup_free_ char *fs = NULL;
 160         int r;
 161         DIR *d;
 162
 163         assert(_d);
 164
 165         /* This is not recursive! */
 166
 167         r = cg_get_path(controller, path, NULL, &fs);
 168         if (r < 0)
 169                 return r;
 170
 171         d = opendir(fs);
 172         if (!d)
 173                 return -errno;
 174
 175         *_d = d;
 176         return 0;
 177 }
 178
 179 int cg_read_subgroup(DIR *d, char **fn) {
 180         struct dirent *de;
 181
 182         assert(d);
 183         assert(fn);
 184
 185         FOREACH_DIRENT_ALL(de, d, return -errno) {
 186                 char *b;
 187
 188                 if (de->d_type != DT_DIR)
 189                         continue;
 190
 191                 if (dot_or_dot_dot(de->d_name))
 192                         continue;
 193
 194                 b = strdup(de->d_name);
 195                 if (!b)
 196                         return -ENOMEM;
 197
 198                 *fn = b;
 199                 return 1;
 200         }
 201
 202         return 0;
 203 }
 204
 205 int cg_rmdir(const char *controller, const char *path) {
 206         _cleanup_free_ char *p = NULL;
 207         int r;
 208
 209         r = cg_get_path(controller, path, NULL, &p);
 210         if (r < 0)
 211                 return r;
 212
 213         r = rmdir(p);
 214         if (r < 0 && errno != ENOENT)
 215                 return -errno;
 216
 217         r = cg_hybrid_unified();
 218         if (r < 0)
 219                 return r;
 220         if (r == 0)
 221                 return 0;
 222
 223         if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 224                 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
 225                 if (r < 0)
 226                         log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
 227         }
 228
 229         return 0;
 230 }
 231
 232 int cg_kill(
 233                 const char *controller,
 234                 const char *path,
 235                 int sig,
 236                 CGroupFlags flags,
 237                 Set *s,
 238                 cg_kill_log_func_t log_kill,
 239                 void *userdata) {
 240
 241         _cleanup_set_free_ Set *allocated_set = NULL;
 242         bool done = false;
 243         int r, ret = 0;
 244         pid_t my_pid;
 245
 246         assert(sig >= 0);
 247
 248          /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
 249           * SIGCONT on SIGKILL. */
 250         if (IN_SET(sig, SIGCONT, SIGKILL))
 251                 flags &= ~CGROUP_SIGCONT;
 252
 253         /* This goes through the tasks list and kills them all. This
 254          * is repeated until no further processes are added to the
 255          * tasks list, to properly handle forking processes */
 256
 257         if (!s) {
 258                 s = allocated_set = set_new(NULL);
 259                 if (!s)
 260                         return -ENOMEM;
 261         }
 262
 263         my_pid = getpid_cached();
 264
 265         do {
 266                 _cleanup_fclose_ FILE *f = NULL;
 267                 pid_t pid = 0;
 268                 done = true;
 269
 270                 r = cg_enumerate_processes(controller, path, &f);
 271                 if (r < 0) {
 272                         if (ret >= 0 && r != -ENOENT)
 273                                 return r;
 274
 275                         return ret;
 276                 }
 277
 278                 while ((r = cg_read_pid(f, &pid)) > 0) {
 279
 280                         if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
 281                                 continue;
 282
 283                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 284                                 continue;
 285
 286                         if (log_kill)
 287                                 log_kill(pid, sig, userdata);
 288
 289                         /* If we haven't killed this process yet, kill
 290                          * it */
 291                         if (kill(pid, sig) < 0) {
 292                                 if (ret >= 0 && errno != ESRCH)
 293                                         ret = -errno;
 294                         } else {
 295                                 if (flags & CGROUP_SIGCONT)
 296                                         (void) kill(pid, SIGCONT);
 297
 298                                 if (ret == 0)
 299                                         ret = 1;
 300                         }
 301
 302                         done = false;
 303
 304                         r = set_put(s, PID_TO_PTR(pid));
 305                         if (r < 0) {
 306                                 if (ret >= 0)
 307                                         return r;
 308
 309                                 return ret;
 310                         }
 311                 }
 312
 313                 if (r < 0) {
 314                         if (ret >= 0)
 315                                 return r;
 316
 317                         return ret;
 318                 }
 319
 320                 /* To avoid racing against processes which fork
 321                  * quicker than we can kill them we repeat this until
 322                  * no new pids need to be killed. */
 323
 324         } while (!done);
 325
 326         return ret;
 327 }
 328
 329 int cg_kill_recursive(
 330                 const char *controller,
 331                 const char *path,
 332                 int sig,
 333                 CGroupFlags flags,
 334                 Set *s,
 335                 cg_kill_log_func_t log_kill,
 336                 void *userdata) {
 337
 338         _cleanup_set_free_ Set *allocated_set = NULL;
 339         _cleanup_closedir_ DIR *d = NULL;
 340         int r, ret;
 341         char *fn;
 342
 343         assert(path);
 344         assert(sig >= 0);
 345
 346         if (!s) {
 347                 s = allocated_set = set_new(NULL);
 348                 if (!s)
 349                         return -ENOMEM;
 350         }
 351
 352         ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
 353
 354         r = cg_enumerate_subgroups(controller, path, &d);
 355         if (r < 0) {
 356                 if (ret >= 0 && r != -ENOENT)
 357                         return r;
 358
 359                 return ret;
 360         }
 361
 362         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 363                 _cleanup_free_ char *p = NULL;
 364
 365                 p = strjoin(path, "/", fn);
 366                 free(fn);
 367                 if (!p)
 368                         return -ENOMEM;
 369
 370                 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
 371                 if (r != 0 && ret >= 0)
 372                         ret = r;
 373         }
 374         if (ret >= 0 && r < 0)
 375                 ret = r;
 376
 377         if (flags & CGROUP_REMOVE) {
 378                 r = cg_rmdir(controller, path);
 379                 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
 380                         return r;
 381         }
 382
 383         return ret;
 384 }
 385
 386 int cg_migrate(
 387                 const char *cfrom,
 388                 const char *pfrom,
 389                 const char *cto,
 390                 const char *pto,
 391                 CGroupFlags flags) {
 392
 393         bool done = false;
 394         _cleanup_set_free_ Set *s = NULL;
 395         int r, ret = 0;
 396         pid_t my_pid;
 397
 398         assert(cfrom);
 399         assert(pfrom);
 400         assert(cto);
 401         assert(pto);
 402
 403         s = set_new(NULL);
 404         if (!s)
 405                 return -ENOMEM;
 406
 407         my_pid = getpid_cached();
 408
 409         do {
 410                 _cleanup_fclose_ FILE *f = NULL;
 411                 pid_t pid = 0;
 412                 done = true;
 413
 414                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 415                 if (r < 0) {
 416                         if (ret >= 0 && r != -ENOENT)
 417                                 return r;
 418
 419                         return ret;
 420                 }
 421
 422                 while ((r = cg_read_pid(f, &pid)) > 0) {
 423
 424                         /* This might do weird stuff if we aren't a
 425                          * single-threaded program. However, we
 426                          * luckily know we are not */
 427                         if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
 428                                 continue;
 429
 430                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 431                                 continue;
 432
 433                         /* Ignore kernel threads. Since they can only
 434                          * exist in the root cgroup, we only check for
 435                          * them there. */
 436                         if (cfrom &&
 437                             (isempty(pfrom) || path_equal(pfrom, "/")) &&
 438                             is_kernel_thread(pid) > 0)
 439                                 continue;
 440
 441                         r = cg_attach(cto, pto, pid);
 442                         if (r < 0) {
 443                                 if (ret >= 0 && r != -ESRCH)
 444                                         ret = r;
 445                         } else if (ret == 0)
 446                                 ret = 1;
 447
 448                         done = false;
 449
 450                         r = set_put(s, PID_TO_PTR(pid));
 451                         if (r < 0) {
 452                                 if (ret >= 0)
 453                                         return r;
 454
 455                                 return ret;
 456                         }
 457                 }
 458
 459                 if (r < 0) {
 460                         if (ret >= 0)
 461                                 return r;
 462
 463                         return ret;
 464                 }
 465         } while (!done);
 466
 467         return ret;
 468 }
 469
 470 int cg_migrate_recursive(
 471                 const char *cfrom,
 472                 const char *pfrom,
 473                 const char *cto,
 474                 const char *pto,
 475                 CGroupFlags flags) {
 476
 477         _cleanup_closedir_ DIR *d = NULL;
 478         int r, ret = 0;
 479         char *fn;
 480
 481         assert(cfrom);
 482         assert(pfrom);
 483         assert(cto);
 484         assert(pto);
 485
 486         ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
 487
 488         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 489         if (r < 0) {
 490                 if (ret >= 0 && r != -ENOENT)
 491                         return r;
 492
 493                 return ret;
 494         }
 495
 496         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 497                 _cleanup_free_ char *p = NULL;
 498
 499                 p = strjoin(pfrom, "/", fn);
 500                 free(fn);
 501                 if (!p)
 502                         return -ENOMEM;
 503
 504                 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
 505                 if (r != 0 && ret >= 0)
 506                         ret = r;
 507         }
 508
 509         if (r < 0 && ret >= 0)
 510                 ret = r;
 511
 512         if (flags & CGROUP_REMOVE) {
 513                 r = cg_rmdir(cfrom, pfrom);
 514                 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
 515                         return r;
 516         }
 517
 518         return ret;
 519 }
 520
 521 int cg_migrate_recursive_fallback(
 522                 const char *cfrom,
 523                 const char *pfrom,
 524                 const char *cto,
 525                 const char *pto,
 526                 CGroupFlags flags) {
 527
 528         int r;
 529
 530         assert(cfrom);
 531         assert(pfrom);
 532         assert(cto);
 533         assert(pto);
 534
 535         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
 536         if (r < 0) {
 537                 char prefix[strlen(pto) + 1];
 538
 539                 /* This didn't work? Then let's try all prefixes of the destination */
 540
 541                 PATH_FOREACH_PREFIX(prefix, pto) {
 542                         int q;
 543
 544                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
 545                         if (q >= 0)
 546                                 return q;
 547                 }
 548         }
 549
 550         return r;
 551 }
 552
 553 static const char *controller_to_dirname(const char *controller) {
 554         const char *e;
 555
 556         assert(controller);
 557
 558         /* Converts a controller name to the directory name below
 559          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
 560          * just cuts off the name= prefixed used for named
 561          * hierarchies, if it is specified. */
 562
 563         if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 564                 if (cg_hybrid_unified() > 0)
 565                         controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
 566                 else
 567                         controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
 568         }
 569
 570         e = startswith(controller, "name=");
 571         if (e)
 572                 return e;
 573
 574         return controller;
 575 }
 576
 577 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
 578         const char *dn;
 579         char *t = NULL;
 580
 581         assert(fs);
 582         assert(controller);
 583
 584         dn = controller_to_dirname(controller);
 585
 586         if (isempty(path) && isempty(suffix))
 587                 t = strappend("/sys/fs/cgroup/", dn);
 588         else if (isempty(path))
 589                 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix);
 590         else if (isempty(suffix))
 591                 t = strjoin("/sys/fs/cgroup/", dn, "/", path);
 592         else
 593                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix);
 594         if (!t)
 595                 return -ENOMEM;
 596
 597         *fs = t;
 598         return 0;
 599 }
 600
 601 static int join_path_unified(const char *path, const char *suffix, char **fs) {
 602         char *t;
 603
 604         assert(fs);
 605
 606         if (isempty(path) && isempty(suffix))
 607                 t = strdup("/sys/fs/cgroup");
 608         else if (isempty(path))
 609                 t = strappend("/sys/fs/cgroup/", suffix);
 610         else if (isempty(suffix))
 611                 t = strappend("/sys/fs/cgroup/", path);
 612         else
 613                 t = strjoin("/sys/fs/cgroup/", path, "/", suffix);
 614         if (!t)
 615                 return -ENOMEM;
 616
 617         *fs = t;
 618         return 0;
 619 }
 620
 621 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
 622         int r;
 623
 624         assert(fs);
 625
 626         if (!controller) {
 627                 char *t;
 628
 629                 /* If no controller is specified, we return the path
 630                  * *below* the controllers, without any prefix. */
 631
 632                 if (!path && !suffix)
 633                         return -EINVAL;
 634
 635                 if (!suffix)
 636                         t = strdup(path);
 637                 else if (!path)
 638                         t = strdup(suffix);
 639                 else
 640                         t = strjoin(path, "/", suffix);
 641                 if (!t)
 642                         return -ENOMEM;
 643
 644                 *fs = path_kill_slashes(t);
 645                 return 0;
 646         }
 647
 648         if (!cg_controller_is_valid(controller))
 649                 return -EINVAL;
 650
 651         r = cg_all_unified();
 652         if (r < 0)
 653                 return r;
 654         if (r > 0)
 655                 r = join_path_unified(path, suffix, fs);
 656         else
 657                 r = join_path_legacy(controller, path, suffix, fs);
 658         if (r < 0)
 659                 return r;
 660
 661         path_kill_slashes(*fs);
 662         return 0;
 663 }
 664
 665 static int controller_is_accessible(const char *controller) {
 666         int r;
 667
 668         assert(controller);
 669
 670         /* Checks whether a specific controller is accessible,
 671          * i.e. its hierarchy mounted. In the unified hierarchy all
 672          * controllers are considered accessible, except for the named
 673          * hierarchies */
 674
 675         if (!cg_controller_is_valid(controller))
 676                 return -EINVAL;
 677
 678         r = cg_all_unified();
 679         if (r < 0)
 680                 return r;
 681         if (r > 0) {
 682                 /* We don't support named hierarchies if we are using
 683                  * the unified hierarchy. */
 684
 685                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
 686                         return 0;
 687
 688                 if (startswith(controller, "name="))
 689                         return -EOPNOTSUPP;
 690
 691         } else {
 692                 const char *cc, *dn;
 693
 694                 dn = controller_to_dirname(controller);
 695                 cc = strjoina("/sys/fs/cgroup/", dn);
 696
 697                 if (laccess(cc, F_OK) < 0)
 698                         return -errno;
 699         }
 700
 701         return 0;
 702 }
 703
 704 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
 705         int r;
 706
 707         assert(controller);
 708         assert(fs);
 709
 710         /* Check if the specified controller is actually accessible */
 711         r = controller_is_accessible(controller);
 712         if (r < 0)
 713                 return r;
 714
 715         return cg_get_path(controller, path, suffix, fs);
 716 }
 717
 718 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 719         assert(path);
 720         assert(sb);
 721         assert(ftwbuf);
 722
 723         if (typeflag != FTW_DP)
 724                 return 0;
 725
 726         if (ftwbuf->level < 1)
 727                 return 0;
 728
 729         (void) rmdir(path);
 730         return 0;
 731 }
 732
 733 int cg_trim(const char *controller, const char *path, bool delete_root) {
 734         _cleanup_free_ char *fs = NULL;
 735         int r = 0, q;
 736
 737         assert(path);
 738
 739         r = cg_get_path(controller, path, NULL, &fs);
 740         if (r < 0)
 741                 return r;
 742
 743         errno = 0;
 744         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 745                 if (errno == ENOENT)
 746                         r = 0;
 747                 else if (errno > 0)
 748                         r = -errno;
 749                 else
 750                         r = -EIO;
 751         }
 752
 753         if (delete_root) {
 754                 if (rmdir(fs) < 0 && errno != ENOENT)
 755                         return -errno;
 756         }
 757
 758         q = cg_hybrid_unified();
 759         if (q < 0)
 760                 return q;
 761         if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 762                 q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
 763                 if (q < 0)
 764                         log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
 765         }
 766
 767         return r;
 768 }
 769
 770 int cg_create(const char *controller, const char *path) {
 771         _cleanup_free_ char *fs = NULL;
 772         int r;
 773
 774         r = cg_get_path_and_check(controller, path, NULL, &fs);
 775         if (r < 0)
 776                 return r;
 777
 778         r = mkdir_parents(fs, 0755);
 779         if (r < 0)
 780                 return r;
 781
 782         r = mkdir_errno_wrapper(fs, 0755);
 783         if (r == -EEXIST)
 784                 return 0;
 785         if (r < 0)
 786                 return r;
 787
 788         r = cg_hybrid_unified();
 789         if (r < 0)
 790                 return r;
 791
 792         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 793                 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
 794                 if (r < 0)
 795                         log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
 796         }
 797
 798         return 1;
 799 }
 800
 801 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 802         int r, q;
 803
 804         assert(pid >= 0);
 805
 806         r = cg_create(controller, path);
 807         if (r < 0)
 808                 return r;
 809
 810         q = cg_attach(controller, path, pid);
 811         if (q < 0)
 812                 return q;
 813
 814         /* This does not remove the cgroup on failure */
 815         return r;
 816 }
 817
 818 int cg_attach(const char *controller, const char *path, pid_t pid) {
 819         _cleanup_free_ char *fs = NULL;
 820         char c[DECIMAL_STR_MAX(pid_t) + 2];
 821         int r;
 822
 823         assert(path);
 824         assert(pid >= 0);
 825
 826         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 827         if (r < 0)
 828                 return r;
 829
 830         if (pid == 0)
 831                 pid = getpid_cached();
 832
 833         xsprintf(c, PID_FMT "\n", pid);
 834
 835         r = write_string_file(fs, c, 0);
 836         if (r < 0)
 837                 return r;
 838
 839         r = cg_hybrid_unified();
 840         if (r < 0)
 841                 return r;
 842
 843         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 844                 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
 845                 if (r < 0)
 846                         log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
 847         }
 848
 849         return 0;
 850 }
 851
 852 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 853         int r;
 854
 855         assert(controller);
 856         assert(path);
 857         assert(pid >= 0);
 858
 859         r = cg_attach(controller, path, pid);
 860         if (r < 0) {
 861                 char prefix[strlen(path) + 1];
 862
 863                 /* This didn't work? Then let's try all prefixes of
 864                  * the destination */
 865
 866                 PATH_FOREACH_PREFIX(prefix, path) {
 867                         int q;
 868
 869                         q = cg_attach(controller, prefix, pid);
 870                         if (q >= 0)
 871                                 return q;
 872                 }
 873         }
 874
 875         return r;
 876 }
 877
 878 int cg_set_access(
 879                 const char *controller,
 880                 const char *path,
 881                 uid_t uid,
 882                 gid_t gid) {
 883
 884         struct Attribute {
 885                 const char *name;
 886                 bool fatal;
 887         };
 888
 889         /* cgroupsv1, aka legacy/non-unified */
 890         static const struct Attribute legacy_attributes[] = {
 891                 { "cgroup.procs",           true  },
 892                 { "tasks",                  false },
 893                 { "cgroup.clone_children",  false },
 894                 {},
 895         };
 896
 897         /* cgroupsv2, aka unified */
 898         static const struct Attribute unified_attributes[] = {
 899                 { "cgroup.procs",           true  },
 900                 { "cgroup.subtree_control", true  },
 901                 { "cgroup.threads",         false },
 902                 {},
 903         };
 904
 905         static const struct Attribute* const attributes[] = {
 906                 [false] = legacy_attributes,
 907                 [true]  = unified_attributes,
 908         };
 909
 910         _cleanup_free_ char *fs = NULL;
 911         const struct Attribute *i;
 912         int r, unified;
 913
 914         assert(path);
 915
 916         if (uid == UID_INVALID && gid == GID_INVALID)
 917                 return 0;
 918
 919         unified = cg_unified_controller(controller);
 920         if (unified < 0)
 921                 return unified;
 922
 923         /* Configure access to the cgroup itself */
 924         r = cg_get_path(controller, path, NULL, &fs);
 925         if (r < 0)
 926                 return r;
 927
 928         r = chmod_and_chown(fs, 0755, uid, gid);
 929         if (r < 0)
 930                 return r;
 931
 932         /* Configure access to the cgroup's attributes */
 933         for (i = attributes[unified]; i->name; i++) {
 934                 fs = mfree(fs);
 935
 936                 r = cg_get_path(controller, path, i->name, &fs);
 937                 if (r < 0)
 938                         return r;
 939
 940                 r = chmod_and_chown(fs, 0644, uid, gid);
 941                 if (r < 0) {
 942                         if (i->fatal)
 943                                 return r;
 944
 945                         log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
 946                 }
 947         }
 948
 949         if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 950                 r = cg_hybrid_unified();
 951                 if (r < 0)
 952                         return r;
 953                 if (r > 0) {
 954                         /* Always propagate access mode from unified to legacy controller */
 955                         r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
 956                         if (r < 0)
 957                                 log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
 958                 }
 959         }
 960
 961         return 0;
 962 }
 963
 964 int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
 965         _cleanup_free_ char *fs = NULL;
 966         int r;
 967
 968         assert(path);
 969         assert(name);
 970         assert(value || size <= 0);
 971
 972         r = cg_get_path(controller, path, NULL, &fs);
 973         if (r < 0)
 974                 return r;
 975
 976         if (setxattr(fs, name, value, size, flags) < 0)
 977                 return -errno;
 978
 979         return 0;
 980 }
 981
 982 int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
 983         _cleanup_free_ char *fs = NULL;
 984         ssize_t n;
 985         int r;
 986
 987         assert(path);
 988         assert(name);
 989
 990         r = cg_get_path(controller, path, NULL, &fs);
 991         if (r < 0)
 992                 return r;
 993
 994         n = getxattr(fs, name, value, size);
 995         if (n < 0)
 996                 return -errno;
 997
 998         return (int) n;
 999 }
1000
1001 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
1002         _cleanup_fclose_ FILE *f = NULL;
1003         char line[LINE_MAX];
1004         const char *fs, *controller_str;
1005         size_t cs = 0;
1006         int unified;
1007
1008         assert(path);
1009         assert(pid >= 0);
1010
1011         if (controller) {
1012                 if (!cg_controller_is_valid(controller))
1013                         return -EINVAL;
1014         } else
1015                 controller = SYSTEMD_CGROUP_CONTROLLER;
1016
1017         unified = cg_unified_controller(controller);
1018         if (unified < 0)
1019                 return unified;
1020         if (unified == 0) {
1021                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
1022                         controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
1023                 else
1024                         controller_str = controller;
1025
1026                 cs = strlen(controller_str);
1027         }
1028
1029         fs = procfs_file_alloca(pid, "cgroup");
1030         f = fopen(fs, "re");
1031         if (!f)
1032                 return errno == ENOENT ? -ESRCH : -errno;
1033
1034         (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
1035
1036         FOREACH_LINE(line, f, return -errno) {
1037                 char *e, *p;
1038
1039                 truncate_nl(line);
1040
1041                 if (unified) {
1042                         e = startswith(line, "0:");
1043                         if (!e)
1044                                 continue;
1045
1046                         e = strchr(e, ':');
1047                         if (!e)
1048                                 continue;
1049                 } else {
1050                         char *l;
1051                         size_t k;
1052                         const char *word, *state;
1053                         bool found = false;
1054
1055                         l = strchr(line, ':');
1056                         if (!l)
1057                                 continue;
1058
1059                         l++;
1060                         e = strchr(l, ':');
1061                         if (!e)
1062                                 continue;
1063
1064                         *e = 0;
1065                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
1066                                 if (k == cs && memcmp(word, controller_str, cs) == 0) {
1067                                         found = true;
1068                                         break;
1069                                 }
1070                         }
1071
1072                         if (!found)
1073                                 continue;
1074                 }
1075
1076                 p = strdup(e + 1);
1077                 if (!p)
1078                         return -ENOMEM;
1079
1080                 /* Truncate suffix indicating the process is a zombie */
1081                 e = endswith(p, " (deleted)");
1082                 if (e)
1083                         *e = 0;
1084
1085                 *path = p;
1086                 return 0;
1087         }
1088
1089         return -ENODATA;
1090 }
1091
1092 int cg_install_release_agent(const char *controller, const char *agent) {
1093         _cleanup_free_ char *fs = NULL, *contents = NULL;
1094         const char *sc;
1095         int r;
1096
1097         assert(agent);
1098
1099         r = cg_unified_controller(controller);
1100         if (r < 0)
1101                 return r;
1102         if (r > 0) /* doesn't apply to unified hierarchy */
1103                 return -EOPNOTSUPP;
1104
1105         r = cg_get_path(controller, NULL, "release_agent", &fs);
1106         if (r < 0)
1107                 return r;
1108
1109         r = read_one_line_file(fs, &contents);
1110         if (r < 0)
1111                 return r;
1112
1113         sc = strstrip(contents);
1114         if (isempty(sc)) {
1115                 r = write_string_file(fs, agent, 0);
1116                 if (r < 0)
1117                         return r;
1118         } else if (!path_equal(sc, agent))
1119                 return -EEXIST;
1120
1121         fs = mfree(fs);
1122         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1123         if (r < 0)
1124                 return r;
1125
1126         contents = mfree(contents);
1127         r = read_one_line_file(fs, &contents);
1128         if (r < 0)
1129                 return r;
1130
1131         sc = strstrip(contents);
1132         if (streq(sc, "0")) {
1133                 r = write_string_file(fs, "1", 0);
1134                 if (r < 0)
1135                         return r;
1136
1137                 return 1;
1138         }
1139
1140         if (!streq(sc, "1"))
1141                 return -EIO;
1142
1143         return 0;
1144 }
1145
1146 int cg_uninstall_release_agent(const char *controller) {
1147         _cleanup_free_ char *fs = NULL;
1148         int r;
1149
1150         r = cg_unified_controller(controller);
1151         if (r < 0)
1152                 return r;
1153         if (r > 0) /* Doesn't apply to unified hierarchy */
1154                 return -EOPNOTSUPP;
1155
1156         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1157         if (r < 0)
1158                 return r;
1159
1160         r = write_string_file(fs, "0", 0);
1161         if (r < 0)
1162                 return r;
1163
1164         fs = mfree(fs);
1165
1166         r = cg_get_path(controller, NULL, "release_agent", &fs);
1167         if (r < 0)
1168                 return r;
1169
1170         r = write_string_file(fs, "", 0);
1171         if (r < 0)
1172                 return r;
1173
1174         return 0;
1175 }
1176
1177 int cg_is_empty(const char *controller, const char *path) {
1178         _cleanup_fclose_ FILE *f = NULL;
1179         pid_t pid;
1180         int r;
1181
1182         assert(path);
1183
1184         r = cg_enumerate_processes(controller, path, &f);
1185         if (r == -ENOENT)
1186                 return 1;
1187         if (r < 0)
1188                 return r;
1189
1190         r = cg_read_pid(f, &pid);
1191         if (r < 0)
1192                 return r;
1193
1194         return r == 0;
1195 }
1196
1197 int cg_is_empty_recursive(const char *controller, const char *path) {
1198         int r;
1199
1200         assert(path);
1201
1202         /* The root cgroup is always populated */
1203         if (controller && (isempty(path) || path_equal(path, "/")))
1204                 return false;
1205
1206         r = cg_unified_controller(controller);
1207         if (r < 0)
1208                 return r;
1209         if (r > 0) {
1210                 _cleanup_free_ char *t = NULL;
1211
1212                 /* On the unified hierarchy we can check empty state
1213                  * via the "populated" attribute of "cgroup.events". */
1214
1215                 r = cg_read_event(controller, path, "populated", &t);
1216                 if (r < 0)
1217                         return r;
1218
1219                 return streq(t, "0");
1220         } else {
1221                 _cleanup_closedir_ DIR *d = NULL;
1222                 char *fn;
1223
1224                 r = cg_is_empty(controller, path);
1225                 if (r <= 0)
1226                         return r;
1227
1228                 r = cg_enumerate_subgroups(controller, path, &d);
1229                 if (r == -ENOENT)
1230                         return 1;
1231                 if (r < 0)
1232                         return r;
1233
1234                 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1235                         _cleanup_free_ char *p = NULL;
1236
1237                         p = strjoin(path, "/", fn);
1238                         free(fn);
1239                         if (!p)
1240                                 return -ENOMEM;
1241
1242                         r = cg_is_empty_recursive(controller, p);
1243                         if (r <= 0)
1244                                 return r;
1245                 }
1246                 if (r < 0)
1247                         return r;
1248
1249                 return true;
1250         }
1251 }
1252
1253 int cg_split_spec(const char *spec, char **controller, char **path) {
1254         char *t = NULL, *u = NULL;
1255         const char *e;
1256
1257         assert(spec);
1258
1259         if (*spec == '/') {
1260                 if (!path_is_normalized(spec))
1261                         return -EINVAL;
1262
1263                 if (path) {
1264                         t = strdup(spec);
1265                         if (!t)
1266                                 return -ENOMEM;
1267
1268                         *path = path_kill_slashes(t);
1269                 }
1270
1271                 if (controller)
1272                         *controller = NULL;
1273
1274                 return 0;
1275         }
1276
1277         e = strchr(spec, ':');
1278         if (!e) {
1279                 if (!cg_controller_is_valid(spec))
1280                         return -EINVAL;
1281
1282                 if (controller) {
1283                         t = strdup(spec);
1284                         if (!t)
1285                                 return -ENOMEM;
1286
1287                         *controller = t;
1288                 }
1289
1290                 if (path)
1291                         *path = NULL;
1292
1293                 return 0;
1294         }
1295
1296         t = strndup(spec, e-spec);
1297         if (!t)
1298                 return -ENOMEM;
1299         if (!cg_controller_is_valid(t)) {
1300                 free(t);
1301                 return -EINVAL;
1302         }
1303
1304         if (isempty(e+1))
1305                 u = NULL;
1306         else {
1307                 u = strdup(e+1);
1308                 if (!u) {
1309                         free(t);
1310                         return -ENOMEM;
1311                 }
1312
1313                 if (!path_is_normalized(u) ||
1314                     !path_is_absolute(u)) {
1315                         free(t);
1316                         free(u);
1317                         return -EINVAL;
1318                 }
1319
1320                 path_kill_slashes(u);
1321         }
1322
1323         if (controller)
1324                 *controller = t;
1325         else
1326                 free(t);
1327
1328         if (path)
1329                 *path = u;
1330         else
1331                 free(u);
1332
1333         return 0;
1334 }
1335
1336 int cg_mangle_path(const char *path, char **result) {
1337         _cleanup_free_ char *c = NULL, *p = NULL;
1338         char *t;
1339         int r;
1340
1341         assert(path);
1342         assert(result);
1343
1344         /* First, check if it already is a filesystem path */
1345         if (path_startswith(path, "/sys/fs/cgroup")) {
1346
1347                 t = strdup(path);
1348                 if (!t)
1349                         return -ENOMEM;
1350
1351                 *result = path_kill_slashes(t);
1352                 return 0;
1353         }
1354
1355         /* Otherwise, treat it as cg spec */
1356         r = cg_split_spec(path, &c, &p);
1357         if (r < 0)
1358                 return r;
1359
1360         return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1361 }
1362
1363 int cg_get_root_path(char **path) {
1364         char *p, *e;
1365         int r;
1366
1367         assert(path);
1368
1369         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1370         if (r < 0)
1371                 return r;
1372
1373         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1374         if (!e)
1375                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1376         if (!e)
1377                 e = endswith(p, "/system"); /* even more legacy */
1378         if (e)
1379                 *e = 0;
1380
1381         *path = p;
1382         return 0;
1383 }
1384
1385 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1386         _cleanup_free_ char *rt = NULL;
1387         char *p;
1388         int r;
1389
1390         assert(cgroup);
1391         assert(shifted);
1392
1393         if (!root) {
1394                 /* If the root was specified let's use that, otherwise
1395                  * let's determine it from PID 1 */
1396
1397                 r = cg_get_root_path(&rt);
1398                 if (r < 0)
1399                         return r;
1400
1401                 root = rt;
1402         }
1403
1404         p = path_startswith(cgroup, root);
1405         if (p && p > cgroup)
1406                 *shifted = p - 1;
1407         else
1408                 *shifted = cgroup;
1409
1410         return 0;
1411 }
1412
1413 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1414         _cleanup_free_ char *raw = NULL;
1415         const char *c;
1416         int r;
1417
1418         assert(pid >= 0);
1419         assert(cgroup);
1420
1421         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1422         if (r < 0)
1423                 return r;
1424
1425         r = cg_shift_path(raw, root, &c);
1426         if (r < 0)
1427                 return r;
1428
1429         if (c == raw) {
1430                 *cgroup = raw;
1431                 raw = NULL;
1432         } else {
1433                 char *n;
1434
1435                 n = strdup(c);
1436                 if (!n)
1437                         return -ENOMEM;
1438
1439                 *cgroup = n;
1440         }
1441
1442         return 0;
1443 }
1444
1445 int cg_path_decode_unit(const char *cgroup, char **unit) {
1446         char *c, *s;
1447         size_t n;
1448
1449         assert(cgroup);
1450         assert(unit);
1451
1452         n = strcspn(cgroup, "/");
1453         if (n < 3)
1454                 return -ENXIO;
1455
1456         c = strndupa(cgroup, n);
1457         c = cg_unescape(c);
1458
1459         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1460                 return -ENXIO;
1461
1462         s = strdup(c);
1463         if (!s)
1464                 return -ENOMEM;
1465
1466         *unit = s;
1467         return 0;
1468 }
1469
1470 static bool valid_slice_name(const char *p, size_t n) {
1471
1472         if (!p)
1473                 return false;
1474
1475         if (n < STRLEN("x.slice"))
1476                 return false;
1477
1478         if (memcmp(p + n - 6, ".slice", 6) == 0) {
1479                 char buf[n+1], *c;
1480
1481                 memcpy(buf, p, n);
1482                 buf[n] = 0;
1483
1484                 c = cg_unescape(buf);
1485
1486                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1487         }
1488
1489         return false;
1490 }
1491
1492 static const char *skip_slices(const char *p) {
1493         assert(p);
1494
1495         /* Skips over all slice assignments */
1496
1497         for (;;) {
1498                 size_t n;
1499
1500                 p += strspn(p, "/");
1501
1502                 n = strcspn(p, "/");
1503                 if (!valid_slice_name(p, n))
1504                         return p;
1505
1506                 p += n;
1507         }
1508 }
1509
1510 int cg_path_get_unit(const char *path, char **ret) {
1511         const char *e;
1512         char *unit;
1513         int r;
1514
1515         assert(path);
1516         assert(ret);
1517
1518         e = skip_slices(path);
1519
1520         r = cg_path_decode_unit(e, &unit);
1521         if (r < 0)
1522                 return r;
1523
1524         /* We skipped over the slices, don't accept any now */
1525         if (endswith(unit, ".slice")) {
1526                 free(unit);
1527                 return -ENXIO;
1528         }
1529
1530         *ret = unit;
1531         return 0;
1532 }
1533
1534 int cg_pid_get_unit(pid_t pid, char **unit) {
1535         _cleanup_free_ char *cgroup = NULL;
1536         int r;
1537
1538         assert(unit);
1539
1540         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1541         if (r < 0)
1542                 return r;
1543
1544         return cg_path_get_unit(cgroup, unit);
1545 }
1546
1547 /**
1548  * Skip session-*.scope, but require it to be there.
1549  */
1550 static const char *skip_session(const char *p) {
1551         size_t n;
1552
1553         if (isempty(p))
1554                 return NULL;
1555
1556         p += strspn(p, "/");
1557
1558         n = strcspn(p, "/");
1559         if (n < STRLEN("session-x.scope"))
1560                 return NULL;
1561
1562         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1563                 char buf[n - 8 - 6 + 1];
1564
1565                 memcpy(buf, p + 8, n - 8 - 6);
1566                 buf[n - 8 - 6] = 0;
1567
1568                 /* Note that session scopes never need unescaping,
1569                  * since they cannot conflict with the kernel's own
1570                  * names, hence we don't need to call cg_unescape()
1571                  * here. */
1572
1573                 if (!session_id_valid(buf))
1574                         return false;
1575
1576                 p += n;
1577                 p += strspn(p, "/");
1578                 return p;
1579         }
1580
1581         return NULL;
1582 }
1583
1584 /**
1585  * Skip user@*.service, but require it to be there.
1586  */
1587 static const char *skip_user_manager(const char *p) {
1588         size_t n;
1589
1590         if (isempty(p))
1591                 return NULL;
1592
1593         p += strspn(p, "/");
1594
1595         n = strcspn(p, "/");
1596         if (n < STRLEN("user@x.service"))
1597                 return NULL;
1598
1599         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1600                 char buf[n - 5 - 8 + 1];
1601
1602                 memcpy(buf, p + 5, n - 5 - 8);
1603                 buf[n - 5 - 8] = 0;
1604
1605                 /* Note that user manager services never need unescaping,
1606                  * since they cannot conflict with the kernel's own
1607                  * names, hence we don't need to call cg_unescape()
1608                  * here. */
1609
1610                 if (parse_uid(buf, NULL) < 0)
1611                         return NULL;
1612
1613                 p += n;
1614                 p += strspn(p, "/");
1615
1616                 return p;
1617         }
1618
1619         return NULL;
1620 }
1621
1622 static const char *skip_user_prefix(const char *path) {
1623         const char *e, *t;
1624
1625         assert(path);
1626
1627         /* Skip slices, if there are any */
1628         e = skip_slices(path);
1629
1630         /* Skip the user manager, if it's in the path now... */
1631         t = skip_user_manager(e);
1632         if (t)
1633                 return t;
1634
1635         /* Alternatively skip the user session if it is in the path... */
1636         return skip_session(e);
1637 }
1638
1639 int cg_path_get_user_unit(const char *path, char **ret) {
1640         const char *t;
1641
1642         assert(path);
1643         assert(ret);
1644
1645         t = skip_user_prefix(path);
1646         if (!t)
1647                 return -ENXIO;
1648
1649         /* And from here on it looks pretty much the same as for a
1650          * system unit, hence let's use the same parser from here
1651          * on. */
1652         return cg_path_get_unit(t, ret);
1653 }
1654
1655 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1656         _cleanup_free_ char *cgroup = NULL;
1657         int r;
1658
1659         assert(unit);
1660
1661         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1662         if (r < 0)
1663                 return r;
1664
1665         return cg_path_get_user_unit(cgroup, unit);
1666 }
1667
1668 int cg_path_get_machine_name(const char *path, char **machine) {
1669         _cleanup_free_ char *u = NULL;
1670         const char *sl;
1671         int r;
1672
1673         r = cg_path_get_unit(path, &u);
1674         if (r < 0)
1675                 return r;
1676
1677         sl = strjoina("/run/systemd/machines/unit:", u);
1678         return readlink_malloc(sl, machine);
1679 }
1680
1681 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1682         _cleanup_free_ char *cgroup = NULL;
1683         int r;
1684
1685         assert(machine);
1686
1687         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1688         if (r < 0)
1689                 return r;
1690
1691         return cg_path_get_machine_name(cgroup, machine);
1692 }
1693
1694 int cg_path_get_session(const char *path, char **session) {
1695         _cleanup_free_ char *unit = NULL;
1696         char *start, *end;
1697         int r;
1698
1699         assert(path);
1700
1701         r = cg_path_get_unit(path, &unit);
1702         if (r < 0)
1703                 return r;
1704
1705         start = startswith(unit, "session-");
1706         if (!start)
1707                 return -ENXIO;
1708         end = endswith(start, ".scope");
1709         if (!end)
1710                 return -ENXIO;
1711
1712         *end = 0;
1713         if (!session_id_valid(start))
1714                 return -ENXIO;
1715
1716         if (session) {
1717                 char *rr;
1718
1719                 rr = strdup(start);
1720                 if (!rr)
1721                         return -ENOMEM;
1722
1723                 *session = rr;
1724         }
1725
1726         return 0;
1727 }
1728
1729 int cg_pid_get_session(pid_t pid, char **session) {
1730         _cleanup_free_ char *cgroup = NULL;
1731         int r;
1732
1733         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1734         if (r < 0)
1735                 return r;
1736
1737         return cg_path_get_session(cgroup, session);
1738 }
1739
1740 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1741         _cleanup_free_ char *slice = NULL;
1742         char *start, *end;
1743         int r;
1744
1745         assert(path);
1746
1747         r = cg_path_get_slice(path, &slice);
1748         if (r < 0)
1749                 return r;
1750
1751         start = startswith(slice, "user-");
1752         if (!start)
1753                 return -ENXIO;
1754         end = endswith(start, ".slice");
1755         if (!end)
1756                 return -ENXIO;
1757
1758         *end = 0;
1759         if (parse_uid(start, uid) < 0)
1760                 return -ENXIO;
1761
1762         return 0;
1763 }
1764
1765 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1766         _cleanup_free_ char *cgroup = NULL;
1767         int r;
1768
1769         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1770         if (r < 0)
1771                 return r;
1772
1773         return cg_path_get_owner_uid(cgroup, uid);
1774 }
1775
1776 int cg_path_get_slice(const char *p, char **slice) {
1777         const char *e = NULL;
1778
1779         assert(p);
1780         assert(slice);
1781
1782         /* Finds the right-most slice unit from the beginning, but
1783          * stops before we come to the first non-slice unit. */
1784
1785         for (;;) {
1786                 size_t n;
1787
1788                 p += strspn(p, "/");
1789
1790                 n = strcspn(p, "/");
1791                 if (!valid_slice_name(p, n)) {
1792
1793                         if (!e) {
1794                                 char *s;
1795
1796                                 s = strdup(SPECIAL_ROOT_SLICE);
1797                                 if (!s)
1798                                         return -ENOMEM;
1799
1800                                 *slice = s;
1801                                 return 0;
1802                         }
1803
1804                         return cg_path_decode_unit(e, slice);
1805                 }
1806
1807                 e = p;
1808                 p += n;
1809         }
1810 }
1811
1812 int cg_pid_get_slice(pid_t pid, char **slice) {
1813         _cleanup_free_ char *cgroup = NULL;
1814         int r;
1815
1816         assert(slice);
1817
1818         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1819         if (r < 0)
1820                 return r;
1821
1822         return cg_path_get_slice(cgroup, slice);
1823 }
1824
1825 int cg_path_get_user_slice(const char *p, char **slice) {
1826         const char *t;
1827         assert(p);
1828         assert(slice);
1829
1830         t = skip_user_prefix(p);
1831         if (!t)
1832                 return -ENXIO;
1833
1834         /* And now it looks pretty much the same as for a system
1835          * slice, so let's just use the same parser from here on. */
1836         return cg_path_get_slice(t, slice);
1837 }
1838
1839 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1840         _cleanup_free_ char *cgroup = NULL;
1841         int r;
1842
1843         assert(slice);
1844
1845         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1846         if (r < 0)
1847                 return r;
1848
1849         return cg_path_get_user_slice(cgroup, slice);
1850 }
1851
1852 char *cg_escape(const char *p) {
1853         bool need_prefix = false;
1854
1855         /* This implements very minimal escaping for names to be used
1856          * as file names in the cgroup tree: any name which might
1857          * conflict with a kernel name or is prefixed with '_' is
1858          * prefixed with a '_'. That way, when reading cgroup names it
1859          * is sufficient to remove a single prefixing underscore if
1860          * there is one. */
1861
1862         /* The return value of this function (unlike cg_unescape())
1863          * needs free()! */
1864
1865         if (IN_SET(p[0], 0, '_', '.') ||
1866             streq(p, "notify_on_release") ||
1867             streq(p, "release_agent") ||
1868             streq(p, "tasks") ||
1869             startswith(p, "cgroup."))
1870                 need_prefix = true;
1871         else {
1872                 const char *dot;
1873
1874                 dot = strrchr(p, '.');
1875                 if (dot) {
1876                         CGroupController c;
1877                         size_t l = dot - p;
1878
1879                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1880                                 const char *n;
1881
1882                                 n = cgroup_controller_to_string(c);
1883
1884                                 if (l != strlen(n))
1885                                         continue;
1886
1887                                 if (memcmp(p, n, l) != 0)
1888                                         continue;
1889
1890                                 need_prefix = true;
1891                                 break;
1892                         }
1893                 }
1894         }
1895
1896         if (need_prefix)
1897                 return strappend("_", p);
1898
1899         return strdup(p);
1900 }
1901
1902 char *cg_unescape(const char *p) {
1903         assert(p);
1904
1905         /* The return value of this function (unlike cg_escape())
1906          * doesn't need free()! */
1907
1908         if (p[0] == '_')
1909                 return (char*) p+1;
1910
1911         return (char*) p;
1912 }
1913
1914 #define CONTROLLER_VALID                        \
1915         DIGITS LETTERS                          \
1916         "_"
1917
1918 bool cg_controller_is_valid(const char *p) {
1919         const char *t, *s;
1920
1921         if (!p)
1922                 return false;
1923
1924         if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
1925                 return true;
1926
1927         s = startswith(p, "name=");
1928         if (s)
1929                 p = s;
1930
1931         if (IN_SET(*p, 0, '_'))
1932                 return false;
1933
1934         for (t = p; *t; t++)
1935                 if (!strchr(CONTROLLER_VALID, *t))
1936                         return false;
1937
1938         if (t - p > FILENAME_MAX)
1939                 return false;
1940
1941         return true;
1942 }
1943
1944 int cg_slice_to_path(const char *unit, char **ret) {
1945         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1946         const char *dash;
1947         int r;
1948
1949         assert(unit);
1950         assert(ret);
1951
1952         if (streq(unit, SPECIAL_ROOT_SLICE)) {
1953                 char *x;
1954
1955                 x = strdup("");
1956                 if (!x)
1957                         return -ENOMEM;
1958                 *ret = x;
1959                 return 0;
1960         }
1961
1962         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1963                 return -EINVAL;
1964
1965         if (!endswith(unit, ".slice"))
1966                 return -EINVAL;
1967
1968         r = unit_name_to_prefix(unit, &p);
1969         if (r < 0)
1970                 return r;
1971
1972         dash = strchr(p, '-');
1973
1974         /* Don't allow initial dashes */
1975         if (dash == p)
1976                 return -EINVAL;
1977
1978         while (dash) {
1979                 _cleanup_free_ char *escaped = NULL;
1980                 char n[dash - p + sizeof(".slice")];
1981
1982                 /* Don't allow trailing or double dashes */
1983                 if (IN_SET(dash[1], 0, '-'))
1984                         return -EINVAL;
1985
1986                 strcpy(stpncpy(n, p, dash - p), ".slice");
1987                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1988                         return -EINVAL;
1989
1990                 escaped = cg_escape(n);
1991                 if (!escaped)
1992                         return -ENOMEM;
1993
1994                 if (!strextend(&s, escaped, "/", NULL))
1995                         return -ENOMEM;
1996
1997                 dash = strchr(dash+1, '-');
1998         }
1999
2000         e = cg_escape(unit);
2001         if (!e)
2002                 return -ENOMEM;
2003
2004         if (!strextend(&s, e, NULL))
2005                 return -ENOMEM;
2006
2007         *ret = s;
2008         s = NULL;
2009
2010         return 0;
2011 }
2012
2013 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
2014         _cleanup_free_ char *p = NULL;
2015         int r;
2016
2017         r = cg_get_path(controller, path, attribute, &p);
2018         if (r < 0)
2019                 return r;
2020
2021         return write_string_file(p, value, 0);
2022 }
2023
2024 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
2025         _cleanup_free_ char *p = NULL;
2026         int r;
2027
2028         r = cg_get_path(controller, path, attribute, &p);
2029         if (r < 0)
2030                 return r;
2031
2032         return read_one_line_file(p, ret);
2033 }
2034
2035 int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values) {
2036         _cleanup_free_ char *filename = NULL, *content = NULL;
2037         char *line, *p;
2038         int i, r;
2039
2040         for (i = 0; keys[i]; i++)
2041                 values[i] = NULL;
2042
2043         r = cg_get_path(controller, path, attribute, &filename);
2044         if (r < 0)
2045                 return r;
2046
2047         r = read_full_file(filename, &content, NULL);
2048         if (r < 0)
2049                 return r;
2050
2051         p = content;
2052         while ((line = strsep(&p, "\n"))) {
2053                 char *key;
2054
2055                 key = strsep(&line, " ");
2056
2057                 for (i = 0; keys[i]; i++) {
2058                         if (streq(key, keys[i])) {
2059                                 values[i] = strdup(line);
2060                                 break;
2061                         }
2062                 }
2063         }
2064
2065         for (i = 0; keys[i]; i++) {
2066                 if (!values[i]) {
2067                         for (i = 0; keys[i]; i++) {
2068                                 values[i] = mfree(values[i]);
2069                         }
2070                         return -ENOENT;
2071                 }
2072         }
2073
2074         return 0;
2075 }
2076
2077 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
2078         CGroupController c;
2079         int r;
2080
2081         /* This one will create a cgroup in our private tree, but also
2082          * duplicate it in the trees specified in mask, and remove it
2083          * in all others */
2084
2085         /* First create the cgroup in our own hierarchy. */
2086         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
2087         if (r < 0)
2088                 return r;
2089
2090         /* If we are in the unified hierarchy, we are done now */
2091         r = cg_all_unified();
2092         if (r < 0)
2093                 return r;
2094         if (r > 0)
2095                 return 0;
2096
2097         /* Otherwise, do the same in the other hierarchies */
2098         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2099                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2100                 const char *n;
2101
2102                 n = cgroup_controller_to_string(c);
2103
2104                 if (mask & bit)
2105                         (void) cg_create(n, path);
2106                 else if (supported & bit)
2107                         (void) cg_trim(n, path, true);
2108         }
2109
2110         return 0;
2111 }
2112
2113 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
2114         CGroupController c;
2115         int r;
2116
2117         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
2118         if (r < 0)
2119                 return r;
2120
2121         r = cg_all_unified();
2122         if (r < 0)
2123                 return r;
2124         if (r > 0)
2125                 return 0;
2126
2127         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2128                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2129                 const char *p = NULL;
2130
2131                 if (!(supported & bit))
2132                         continue;
2133
2134                 if (path_callback)
2135                         p = path_callback(bit, userdata);
2136
2137                 if (!p)
2138                         p = path;
2139
2140                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
2141         }
2142
2143         return 0;
2144 }
2145
2146 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
2147         Iterator i;
2148         void *pidp;
2149         int r = 0;
2150
2151         SET_FOREACH(pidp, pids, i) {
2152                 pid_t pid = PTR_TO_PID(pidp);
2153                 int q;
2154
2155                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
2156                 if (q < 0 && r >= 0)
2157                         r = q;
2158         }
2159
2160         return r;
2161 }
2162
2163 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
2164         CGroupController c;
2165         int r = 0, q;
2166
2167         if (!path_equal(from, to))  {
2168                 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
2169                 if (r < 0)
2170                         return r;
2171         }
2172
2173         q = cg_all_unified();
2174         if (q < 0)
2175                 return q;
2176         if (q > 0)
2177                 return r;
2178
2179         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2180                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2181                 const char *p = NULL;
2182
2183                 if (!(supported & bit))
2184                         continue;
2185
2186                 if (to_callback)
2187                         p = to_callback(bit, userdata);
2188
2189                 if (!p)
2190                         p = to;
2191
2192                 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
2193         }
2194
2195         return 0;
2196 }
2197
2198 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2199         CGroupController c;
2200         int r, q;
2201
2202         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2203         if (r < 0)
2204                 return r;
2205
2206         q = cg_all_unified();
2207         if (q < 0)
2208                 return q;
2209         if (q > 0)
2210                 return r;
2211
2212         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2213                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2214
2215                 if (!(supported & bit))
2216                         continue;
2217
2218                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
2219         }
2220
2221         return 0;
2222 }
2223
2224 int cg_mask_to_string(CGroupMask mask, char **ret) {
2225         _cleanup_free_ char *s = NULL;
2226         size_t n = 0, allocated = 0;
2227         bool space = false;
2228         CGroupController c;
2229
2230         assert(ret);
2231
2232         if (mask == 0) {
2233                 *ret = NULL;
2234                 return 0;
2235         }
2236
2237         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2238                 const char *k;
2239                 size_t l;
2240
2241                 if (!(mask & CGROUP_CONTROLLER_TO_MASK(c)))
2242                         continue;
2243
2244                 k = cgroup_controller_to_string(c);
2245                 l = strlen(k);
2246
2247                 if (!GREEDY_REALLOC(s, allocated, n + space + l + 1))
2248                         return -ENOMEM;
2249
2250                 if (space)
2251                         s[n] = ' ';
2252                 memcpy(s + n + space, k, l);
2253                 n += space + l;
2254
2255                 space = true;
2256         }
2257
2258         assert(s);
2259
2260         s[n] = 0;
2261         *ret = s;
2262         s = NULL;
2263
2264         return 0;
2265 }
2266
2267 int cg_mask_from_string(const char *value, CGroupMask *mask) {
2268         assert(mask);
2269         assert(value);
2270
2271         for (;;) {
2272                 _cleanup_free_ char *n = NULL;
2273                 CGroupController v;
2274                 int r;
2275
2276                 r = extract_first_word(&value, &n, NULL, 0);
2277                 if (r < 0)
2278                         return r;
2279                 if (r == 0)
2280                         break;
2281
2282                 v = cgroup_controller_from_string(n);
2283                 if (v < 0)
2284                         continue;
2285
2286                 *mask |= CGROUP_CONTROLLER_TO_MASK(v);
2287         }
2288         return 0;
2289 }
2290
2291 int cg_mask_supported(CGroupMask *ret) {
2292         CGroupMask mask = 0;
2293         int r;
2294
2295         /* Determines the mask of supported cgroup controllers. Only
2296          * includes controllers we can make sense of and that are
2297          * actually accessible. */
2298
2299         r = cg_all_unified();
2300         if (r < 0)
2301                 return r;
2302         if (r > 0) {
2303                 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2304
2305                 /* In the unified hierarchy we can read the supported
2306                  * and accessible controllers from a the top-level
2307                  * cgroup attribute */
2308
2309                 r = cg_get_root_path(&root);
2310                 if (r < 0)
2311                         return r;
2312
2313                 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2314                 if (r < 0)
2315                         return r;
2316
2317                 r = read_one_line_file(path, &controllers);
2318                 if (r < 0)
2319                         return r;
2320
2321                 r = cg_mask_from_string(controllers, &mask);
2322                 if (r < 0)
2323                         return r;
2324
2325                 /* Currently, we support the cpu, memory, io and pids
2326                  * controller in the unified hierarchy, mask
2327                  * everything else off. */
2328                 mask &= CGROUP_MASK_CPU | CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
2329
2330         } else {
2331                 CGroupController c;
2332
2333                 /* In the legacy hierarchy, we check whether which
2334                  * hierarchies are mounted. */
2335
2336                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2337                         const char *n;
2338
2339                         n = cgroup_controller_to_string(c);
2340                         if (controller_is_accessible(n) >= 0)
2341                                 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2342                 }
2343         }
2344
2345         *ret = mask;
2346         return 0;
2347 }
2348
2349 int cg_kernel_controllers(Set **ret) {
2350         _cleanup_set_free_free_ Set *controllers = NULL;
2351         _cleanup_fclose_ FILE *f = NULL;
2352         int r;
2353
2354         assert(ret);
2355
2356         /* Determines the full list of kernel-known controllers. Might
2357          * include controllers we don't actually support, arbitrary
2358          * named hierarchies and controllers that aren't currently
2359          * accessible (because not mounted). */
2360
2361         controllers = set_new(&string_hash_ops);
2362         if (!controllers)
2363                 return -ENOMEM;
2364
2365         f = fopen("/proc/cgroups", "re");
2366         if (!f) {
2367                 if (errno == ENOENT) {
2368                         *ret = NULL;
2369                         return 0;
2370                 }
2371
2372                 return -errno;
2373         }
2374
2375         (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
2376
2377         /* Ignore the header line */
2378         (void) read_line(f, (size_t) -1, NULL);
2379
2380         for (;;) {
2381                 char *controller;
2382                 int enabled = 0;
2383
2384                 errno = 0;
2385                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2386
2387                         if (feof(f))
2388                                 break;
2389
2390                         if (ferror(f) && errno > 0)
2391                                 return -errno;
2392
2393                         return -EBADMSG;
2394                 }
2395
2396                 if (!enabled) {
2397                         free(controller);
2398                         continue;
2399                 }
2400
2401                 if (!cg_controller_is_valid(controller)) {
2402                         free(controller);
2403                         return -EBADMSG;
2404                 }
2405
2406                 r = set_consume(controllers, controller);
2407                 if (r < 0)
2408                         return r;
2409         }
2410
2411         *ret = controllers;
2412         controllers = NULL;
2413
2414         return 0;
2415 }
2416
2417 static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
2418
2419 /* The hybrid mode was initially implemented in v232 and simply mounted cgroup v2 on /sys/fs/cgroup/systemd.  This
2420  * unfortunately broke other tools (such as docker) which expected the v1 "name=systemd" hierarchy on
2421  * /sys/fs/cgroup/systemd.  From v233 and on, the hybrid mode mountnbs v2 on /sys/fs/cgroup/unified and maintains
2422  * "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility with other tools.
2423  *
2424  * To keep live upgrade working, we detect and support v232 layout.  When v232 layout is detected, to keep cgroup v2
2425  * process management but disable the compat dual layout, we return %true on
2426  * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
2427  */
2428 static thread_local bool unified_systemd_v232;
2429
2430 static int cg_unified_update(void) {
2431
2432         struct statfs fs;
2433
2434         /* Checks if we support the unified hierarchy. Returns an
2435          * error when the cgroup hierarchies aren't mounted yet or we
2436          * have any other trouble determining if the unified hierarchy
2437          * is supported. */
2438
2439         if (unified_cache >= CGROUP_UNIFIED_NONE)
2440                 return 0;
2441
2442         if (statfs("/sys/fs/cgroup/", &fs) < 0)
2443                 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\" failed: %m");
2444
2445         if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2446                 log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
2447                 unified_cache = CGROUP_UNIFIED_ALL;
2448         } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2449                 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
2450                     F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2451                         log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
2452                         unified_cache = CGROUP_UNIFIED_SYSTEMD;
2453                         unified_systemd_v232 = false;
2454                 } else {
2455                         if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
2456                                 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
2457
2458                         if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2459                                 log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
2460                                 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2461                                 unified_systemd_v232 = true;
2462                         } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
2463                                 log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
2464                                 unified_cache = CGROUP_UNIFIED_NONE;
2465                         } else {
2466                                 log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
2467                                           (unsigned long long) fs.f_type);
2468                                 unified_cache = CGROUP_UNIFIED_NONE;
2469                         }
2470                 }
2471         } else {
2472                 log_debug("Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2473                           (unsigned long long) fs.f_type);
2474                 return -ENOMEDIUM;
2475         }
2476
2477         return 0;
2478 }
2479
2480 int cg_unified_controller(const char *controller) {
2481         int r;
2482
2483         r = cg_unified_update();
2484         if (r < 0)
2485                 return r;
2486
2487         if (unified_cache == CGROUP_UNIFIED_NONE)
2488                 return false;
2489
2490         if (unified_cache >= CGROUP_UNIFIED_ALL)
2491                 return true;
2492
2493         return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
2494 }
2495
2496 int cg_all_unified(void) {
2497         int r;
2498
2499         r = cg_unified_update();
2500         if (r < 0)
2501                 return r;
2502
2503         return unified_cache >= CGROUP_UNIFIED_ALL;
2504 }
2505
2506 int cg_hybrid_unified(void) {
2507         int r;
2508
2509         r = cg_unified_update();
2510         if (r < 0)
2511                 return r;
2512
2513         return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
2514 }
2515
2516 int cg_unified_flush(void) {
2517         unified_cache = CGROUP_UNIFIED_UNKNOWN;
2518
2519         return cg_unified_update();
2520 }
2521
2522 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2523         _cleanup_fclose_ FILE *f = NULL;
2524         _cleanup_free_ char *fs = NULL;
2525         CGroupController c;
2526         int r;
2527
2528         assert(p);
2529
2530         if (supported == 0)
2531                 return 0;
2532
2533         r = cg_all_unified();
2534         if (r < 0)
2535                 return r;
2536         if (r == 0) /* on the legacy hiearchy there's no joining of controllers defined */
2537                 return 0;
2538
2539         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2540         if (r < 0)
2541                 return r;
2542
2543         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2544                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2545                 const char *n;
2546
2547                 if (!(supported & bit))
2548                         continue;
2549
2550                 n = cgroup_controller_to_string(c);
2551                 {
2552                         char s[1 + strlen(n) + 1];
2553
2554                         s[0] = mask & bit ? '+' : '-';
2555                         strcpy(s + 1, n);
2556
2557                         if (!f) {
2558                                 f = fopen(fs, "we");
2559                                 if (!f) {
2560                                         log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
2561                                         break;
2562                                 }
2563                         }
2564
2565                         r = write_string_stream(f, s, 0);
2566                         if (r < 0)
2567                                 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2568                 }
2569         }
2570
2571         return 0;
2572 }
2573
2574 bool cg_is_unified_wanted(void) {
2575         static thread_local int wanted = -1;
2576         int r;
2577         bool b;
2578         const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
2579
2580         /* If we have a cached value, return that. */
2581         if (wanted >= 0)
2582                 return wanted;
2583
2584         /* If the hierarchy is already mounted, then follow whatever
2585          * was chosen for it. */
2586         if (cg_unified_flush() >= 0)
2587                 return (wanted = unified_cache >= CGROUP_UNIFIED_ALL);
2588
2589         /* Otherwise, let's see what the kernel command line has to say.
2590          * Since checking is expensive, cache a non-error result. */
2591         r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
2592
2593         return (wanted = r > 0 ? b : is_default);
2594 }
2595
2596 bool cg_is_legacy_wanted(void) {
2597         static thread_local int wanted = -1;
2598
2599         /* If we have a cached value, return that. */
2600         if (wanted >= 0)
2601                 return wanted;
2602
2603         /* Check if we have cgroups2 already mounted. */
2604         if (cg_unified_flush() >= 0 &&
2605             unified_cache == CGROUP_UNIFIED_ALL)
2606                 return (wanted = false);
2607
2608         /* Otherwise, assume that at least partial legacy is wanted,
2609          * since cgroups2 should already be mounted at this point. */
2610         return (wanted = true);
2611 }
2612
2613 bool cg_is_hybrid_wanted(void) {
2614         static thread_local int wanted = -1;
2615         int r;
2616         bool b;
2617         const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
2618         /* We default to true if the default is "hybrid", obviously,
2619          * but also when the default is "unified", because if we get
2620          * called, it means that unified hierarchy was not mounted. */
2621
2622         /* If we have a cached value, return that. */
2623         if (wanted >= 0)
2624                 return wanted;
2625
2626         /* If the hierarchy is already mounted, then follow whatever
2627          * was chosen for it. */
2628         if (cg_unified_flush() >= 0 &&
2629             unified_cache == CGROUP_UNIFIED_ALL)
2630                 return (wanted = false);
2631
2632         /* Otherwise, let's see what the kernel command line has to say.
2633          * Since checking is expensive, cache a non-error result. */
2634         r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
2635
2636         /* The meaning of the kernel option is reversed wrt. to the return value
2637          * of this function, hence the negation. */
2638         return (wanted = r > 0 ? !b : is_default);
2639 }
2640
2641 int cg_weight_parse(const char *s, uint64_t *ret) {
2642         uint64_t u;
2643         int r;
2644
2645         if (isempty(s)) {
2646                 *ret = CGROUP_WEIGHT_INVALID;
2647                 return 0;
2648         }
2649
2650         r = safe_atou64(s, &u);
2651         if (r < 0)
2652                 return r;
2653
2654         if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2655                 return -ERANGE;
2656
2657         *ret = u;
2658         return 0;
2659 }
2660
2661 const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2662         [CGROUP_IO_RBPS_MAX]    = CGROUP_LIMIT_MAX,
2663         [CGROUP_IO_WBPS_MAX]    = CGROUP_LIMIT_MAX,
2664         [CGROUP_IO_RIOPS_MAX]   = CGROUP_LIMIT_MAX,
2665         [CGROUP_IO_WIOPS_MAX]   = CGROUP_LIMIT_MAX,
2666 };
2667
2668 static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2669         [CGROUP_IO_RBPS_MAX]    = "IOReadBandwidthMax",
2670         [CGROUP_IO_WBPS_MAX]    = "IOWriteBandwidthMax",
2671         [CGROUP_IO_RIOPS_MAX]   = "IOReadIOPSMax",
2672         [CGROUP_IO_WIOPS_MAX]   = "IOWriteIOPSMax",
2673 };
2674
2675 DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2676
2677 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2678         uint64_t u;
2679         int r;
2680
2681         if (isempty(s)) {
2682                 *ret = CGROUP_CPU_SHARES_INVALID;
2683                 return 0;
2684         }
2685
2686         r = safe_atou64(s, &u);
2687         if (r < 0)
2688                 return r;
2689
2690         if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2691                 return -ERANGE;
2692
2693         *ret = u;
2694         return 0;
2695 }
2696
2697 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2698         uint64_t u;
2699         int r;
2700
2701         if (isempty(s)) {
2702                 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2703                 return 0;
2704         }
2705
2706         r = safe_atou64(s, &u);
2707         if (r < 0)
2708                 return r;
2709
2710         if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2711                 return -ERANGE;
2712
2713         *ret = u;
2714         return 0;
2715 }
2716
2717 bool is_cgroup_fs(const struct statfs *s) {
2718         return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2719                is_fs_type(s, CGROUP2_SUPER_MAGIC);
2720 }
2721
2722 bool fd_is_cgroup_fs(int fd) {
2723         struct statfs s;
2724
2725         if (fstatfs(fd, &s) < 0)
2726                 return -errno;
2727
2728         return is_cgroup_fs(&s);
2729 }
2730
2731 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2732         [CGROUP_CONTROLLER_CPU] = "cpu",
2733         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2734         [CGROUP_CONTROLLER_IO] = "io",
2735         [CGROUP_CONTROLLER_BLKIO] = "blkio",
2736         [CGROUP_CONTROLLER_MEMORY] = "memory",
2737         [CGROUP_CONTROLLER_DEVICES] = "devices",
2738         [CGROUP_CONTROLLER_PIDS] = "pids",
2739 };
2740
2741 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);