src/basic/cgroup-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2010 Lennart Poettering
   6
   7   systemd is free software; you can redistribute it and/or modify it
   8   under the terms of the GNU Lesser General Public License as published by
   9   the Free Software Foundation; either version 2.1 of the License, or
  10   (at your option) any later version.
  11
  12   systemd is distributed in the hope that it will be useful, but
  13   WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15   Lesser General Public License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public License
  18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  19 ***/
  20
  21 #include <dirent.h>
  22 #include <errno.h>
  23 #include <ftw.h>
  24 #include <limits.h>
  25 #include <signal.h>
  26 #include <stddef.h>
  27 #include <stdlib.h>
  28 #include <string.h>
  29 #include <sys/stat.h>
  30 #include <sys/statfs.h>
  31 #include <sys/types.h>
  32 #include <sys/xattr.h>
  33 #include <unistd.h>
  34
  35 #include "alloc-util.h"
  36 #include "cgroup-util.h"
  37 #include "def.h"
  38 #include "dirent-util.h"
  39 #include "extract-word.h"
  40 #include "fd-util.h"
  41 #include "fileio.h"
  42 #include "format-util.h"
  43 #include "fs-util.h"
  44 #include "log.h"
  45 #include "login-util.h"
  46 #include "macro.h"
  47 #include "missing.h"
  48 #include "mkdir.h"
  49 #include "parse-util.h"
  50 #include "path-util.h"
  51 #include "proc-cmdline.h"
  52 #include "process-util.h"
  53 #include "set.h"
  54 #include "special.h"
  55 #include "stat-util.h"
  56 #include "stdio-util.h"
  57 #include "string-table.h"
  58 #include "string-util.h"
  59 #include "strv.h"
  60 #include "unit-name.h"
  61 #include "user-util.h"
  62
  63 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
  64         _cleanup_free_ char *fs = NULL;
  65         FILE *f;
  66         int r;
  67
  68         assert(_f);
  69
  70         r = cg_get_path(controller, path, "cgroup.procs", &fs);
  71         if (r < 0)
  72                 return r;
  73
  74         f = fopen(fs, "re");
  75         if (!f)
  76                 return -errno;
  77
  78         *_f = f;
  79         return 0;
  80 }
  81
  82 int cg_read_pid(FILE *f, pid_t *_pid) {
  83         unsigned long ul;
  84
  85         /* Note that the cgroup.procs might contain duplicates! See
  86          * cgroups.txt for details. */
  87
  88         assert(f);
  89         assert(_pid);
  90
  91         errno = 0;
  92         if (fscanf(f, "%lu", &ul) != 1) {
  93
  94                 if (feof(f))
  95                         return 0;
  96
  97                 return errno > 0 ? -errno : -EIO;
  98         }
  99
 100         if (ul <= 0)
 101                 return -EIO;
 102
 103         *_pid = (pid_t) ul;
 104         return 1;
 105 }
 106
 107 int cg_read_event(
 108                 const char *controller,
 109                 const char *path,
 110                 const char *event,
 111                 char **val) {
 112
 113         _cleanup_free_ char *events = NULL, *content = NULL;
 114         char *p, *line;
 115         int r;
 116
 117         r = cg_get_path(controller, path, "cgroup.events", &events);
 118         if (r < 0)
 119                 return r;
 120
 121         r = read_full_file(events, &content, NULL);
 122         if (r < 0)
 123                 return r;
 124
 125         p = content;
 126         while ((line = strsep(&p, "\n"))) {
 127                 char *key;
 128
 129                 key = strsep(&line, " ");
 130                 if (!key || !line)
 131                         return -EINVAL;
 132
 133                 if (strcmp(key, event))
 134                         continue;
 135
 136                 *val = strdup(line);
 137                 return 0;
 138         }
 139
 140         return -ENOENT;
 141 }
 142
 143 bool cg_ns_supported(void) {
 144         static thread_local int enabled = -1;
 145
 146         if (enabled >= 0)
 147                 return enabled;
 148
 149         if (access("/proc/self/ns/cgroup", F_OK) == 0)
 150                 enabled = 1;
 151         else
 152                 enabled = 0;
 153
 154         return enabled;
 155 }
 156
 157 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
 158         _cleanup_free_ char *fs = NULL;
 159         int r;
 160         DIR *d;
 161
 162         assert(_d);
 163
 164         /* This is not recursive! */
 165
 166         r = cg_get_path(controller, path, NULL, &fs);
 167         if (r < 0)
 168                 return r;
 169
 170         d = opendir(fs);
 171         if (!d)
 172                 return -errno;
 173
 174         *_d = d;
 175         return 0;
 176 }
 177
 178 int cg_read_subgroup(DIR *d, char **fn) {
 179         struct dirent *de;
 180
 181         assert(d);
 182         assert(fn);
 183
 184         FOREACH_DIRENT_ALL(de, d, return -errno) {
 185                 char *b;
 186
 187                 if (de->d_type != DT_DIR)
 188                         continue;
 189
 190                 if (dot_or_dot_dot(de->d_name))
 191                         continue;
 192
 193                 b = strdup(de->d_name);
 194                 if (!b)
 195                         return -ENOMEM;
 196
 197                 *fn = b;
 198                 return 1;
 199         }
 200
 201         return 0;
 202 }
 203
 204 int cg_rmdir(const char *controller, const char *path) {
 205         _cleanup_free_ char *p = NULL;
 206         int r;
 207
 208         r = cg_get_path(controller, path, NULL, &p);
 209         if (r < 0)
 210                 return r;
 211
 212         r = rmdir(p);
 213         if (r < 0 && errno != ENOENT)
 214                 return -errno;
 215
 216         r = cg_hybrid_unified();
 217         if (r < 0)
 218                 return r;
 219         if (r == 0)
 220                 return 0;
 221
 222         if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 223                 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
 224                 if (r < 0)
 225                         log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
 226         }
 227
 228         return 0;
 229 }
 230
 231 int cg_kill(
 232                 const char *controller,
 233                 const char *path,
 234                 int sig,
 235                 CGroupFlags flags,
 236                 Set *s,
 237                 cg_kill_log_func_t log_kill,
 238                 void *userdata) {
 239
 240         _cleanup_set_free_ Set *allocated_set = NULL;
 241         bool done = false;
 242         int r, ret = 0;
 243         pid_t my_pid;
 244
 245         assert(sig >= 0);
 246
 247          /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
 248           * SIGCONT on SIGKILL. */
 249         if (IN_SET(sig, SIGCONT, SIGKILL))
 250                 flags &= ~CGROUP_SIGCONT;
 251
 252         /* This goes through the tasks list and kills them all. This
 253          * is repeated until no further processes are added to the
 254          * tasks list, to properly handle forking processes */
 255
 256         if (!s) {
 257                 s = allocated_set = set_new(NULL);
 258                 if (!s)
 259                         return -ENOMEM;
 260         }
 261
 262         my_pid = getpid_cached();
 263
 264         do {
 265                 _cleanup_fclose_ FILE *f = NULL;
 266                 pid_t pid = 0;
 267                 done = true;
 268
 269                 r = cg_enumerate_processes(controller, path, &f);
 270                 if (r < 0) {
 271                         if (ret >= 0 && r != -ENOENT)
 272                                 return r;
 273
 274                         return ret;
 275                 }
 276
 277                 while ((r = cg_read_pid(f, &pid)) > 0) {
 278
 279                         if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
 280                                 continue;
 281
 282                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 283                                 continue;
 284
 285                         if (log_kill)
 286                                 log_kill(pid, sig, userdata);
 287
 288                         /* If we haven't killed this process yet, kill
 289                          * it */
 290                         if (kill(pid, sig) < 0) {
 291                                 if (ret >= 0 && errno != ESRCH)
 292                                         ret = -errno;
 293                         } else {
 294                                 if (flags & CGROUP_SIGCONT)
 295                                         (void) kill(pid, SIGCONT);
 296
 297                                 if (ret == 0)
 298                                         ret = 1;
 299                         }
 300
 301                         done = false;
 302
 303                         r = set_put(s, PID_TO_PTR(pid));
 304                         if (r < 0) {
 305                                 if (ret >= 0)
 306                                         return r;
 307
 308                                 return ret;
 309                         }
 310                 }
 311
 312                 if (r < 0) {
 313                         if (ret >= 0)
 314                                 return r;
 315
 316                         return ret;
 317                 }
 318
 319                 /* To avoid racing against processes which fork
 320                  * quicker than we can kill them we repeat this until
 321                  * no new pids need to be killed. */
 322
 323         } while (!done);
 324
 325         return ret;
 326 }
 327
 328 int cg_kill_recursive(
 329                 const char *controller,
 330                 const char *path,
 331                 int sig,
 332                 CGroupFlags flags,
 333                 Set *s,
 334                 cg_kill_log_func_t log_kill,
 335                 void *userdata) {
 336
 337         _cleanup_set_free_ Set *allocated_set = NULL;
 338         _cleanup_closedir_ DIR *d = NULL;
 339         int r, ret;
 340         char *fn;
 341
 342         assert(path);
 343         assert(sig >= 0);
 344
 345         if (!s) {
 346                 s = allocated_set = set_new(NULL);
 347                 if (!s)
 348                         return -ENOMEM;
 349         }
 350
 351         ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
 352
 353         r = cg_enumerate_subgroups(controller, path, &d);
 354         if (r < 0) {
 355                 if (ret >= 0 && r != -ENOENT)
 356                         return r;
 357
 358                 return ret;
 359         }
 360
 361         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 362                 _cleanup_free_ char *p = NULL;
 363
 364                 p = strjoin(path, "/", fn);
 365                 free(fn);
 366                 if (!p)
 367                         return -ENOMEM;
 368
 369                 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
 370                 if (r != 0 && ret >= 0)
 371                         ret = r;
 372         }
 373         if (ret >= 0 && r < 0)
 374                 ret = r;
 375
 376         if (flags & CGROUP_REMOVE) {
 377                 r = cg_rmdir(controller, path);
 378                 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
 379                         return r;
 380         }
 381
 382         return ret;
 383 }
 384
 385 int cg_migrate(
 386                 const char *cfrom,
 387                 const char *pfrom,
 388                 const char *cto,
 389                 const char *pto,
 390                 CGroupFlags flags) {
 391
 392         bool done = false;
 393         _cleanup_set_free_ Set *s = NULL;
 394         int r, ret = 0;
 395         pid_t my_pid;
 396
 397         assert(cfrom);
 398         assert(pfrom);
 399         assert(cto);
 400         assert(pto);
 401
 402         s = set_new(NULL);
 403         if (!s)
 404                 return -ENOMEM;
 405
 406         my_pid = getpid_cached();
 407
 408         do {
 409                 _cleanup_fclose_ FILE *f = NULL;
 410                 pid_t pid = 0;
 411                 done = true;
 412
 413                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 414                 if (r < 0) {
 415                         if (ret >= 0 && r != -ENOENT)
 416                                 return r;
 417
 418                         return ret;
 419                 }
 420
 421                 while ((r = cg_read_pid(f, &pid)) > 0) {
 422
 423                         /* This might do weird stuff if we aren't a
 424                          * single-threaded program. However, we
 425                          * luckily know we are not */
 426                         if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
 427                                 continue;
 428
 429                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 430                                 continue;
 431
 432                         /* Ignore kernel threads. Since they can only
 433                          * exist in the root cgroup, we only check for
 434                          * them there. */
 435                         if (cfrom &&
 436                             (isempty(pfrom) || path_equal(pfrom, "/")) &&
 437                             is_kernel_thread(pid) > 0)
 438                                 continue;
 439
 440                         r = cg_attach(cto, pto, pid);
 441                         if (r < 0) {
 442                                 if (ret >= 0 && r != -ESRCH)
 443                                         ret = r;
 444                         } else if (ret == 0)
 445                                 ret = 1;
 446
 447                         done = false;
 448
 449                         r = set_put(s, PID_TO_PTR(pid));
 450                         if (r < 0) {
 451                                 if (ret >= 0)
 452                                         return r;
 453
 454                                 return ret;
 455                         }
 456                 }
 457
 458                 if (r < 0) {
 459                         if (ret >= 0)
 460                                 return r;
 461
 462                         return ret;
 463                 }
 464         } while (!done);
 465
 466         return ret;
 467 }
 468
 469 int cg_migrate_recursive(
 470                 const char *cfrom,
 471                 const char *pfrom,
 472                 const char *cto,
 473                 const char *pto,
 474                 CGroupFlags flags) {
 475
 476         _cleanup_closedir_ DIR *d = NULL;
 477         int r, ret = 0;
 478         char *fn;
 479
 480         assert(cfrom);
 481         assert(pfrom);
 482         assert(cto);
 483         assert(pto);
 484
 485         ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
 486
 487         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 488         if (r < 0) {
 489                 if (ret >= 0 && r != -ENOENT)
 490                         return r;
 491
 492                 return ret;
 493         }
 494
 495         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 496                 _cleanup_free_ char *p = NULL;
 497
 498                 p = strjoin(pfrom, "/", fn);
 499                 free(fn);
 500                 if (!p)
 501                         return -ENOMEM;
 502
 503                 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
 504                 if (r != 0 && ret >= 0)
 505                         ret = r;
 506         }
 507
 508         if (r < 0 && ret >= 0)
 509                 ret = r;
 510
 511         if (flags & CGROUP_REMOVE) {
 512                 r = cg_rmdir(cfrom, pfrom);
 513                 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
 514                         return r;
 515         }
 516
 517         return ret;
 518 }
 519
 520 int cg_migrate_recursive_fallback(
 521                 const char *cfrom,
 522                 const char *pfrom,
 523                 const char *cto,
 524                 const char *pto,
 525                 CGroupFlags flags) {
 526
 527         int r;
 528
 529         assert(cfrom);
 530         assert(pfrom);
 531         assert(cto);
 532         assert(pto);
 533
 534         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
 535         if (r < 0) {
 536                 char prefix[strlen(pto) + 1];
 537
 538                 /* This didn't work? Then let's try all prefixes of the destination */
 539
 540                 PATH_FOREACH_PREFIX(prefix, pto) {
 541                         int q;
 542
 543                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
 544                         if (q >= 0)
 545                                 return q;
 546                 }
 547         }
 548
 549         return r;
 550 }
 551
 552 static const char *controller_to_dirname(const char *controller) {
 553         const char *e;
 554
 555         assert(controller);
 556
 557         /* Converts a controller name to the directory name below
 558          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
 559          * just cuts off the name= prefixed used for named
 560          * hierarchies, if it is specified. */
 561
 562         if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 563                 if (cg_hybrid_unified() > 0)
 564                         controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
 565                 else
 566                         controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
 567         }
 568
 569         e = startswith(controller, "name=");
 570         if (e)
 571                 return e;
 572
 573         return controller;
 574 }
 575
 576 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
 577         const char *dn;
 578         char *t = NULL;
 579
 580         assert(fs);
 581         assert(controller);
 582
 583         dn = controller_to_dirname(controller);
 584
 585         if (isempty(path) && isempty(suffix))
 586                 t = strappend("/sys/fs/cgroup/", dn);
 587         else if (isempty(path))
 588                 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix);
 589         else if (isempty(suffix))
 590                 t = strjoin("/sys/fs/cgroup/", dn, "/", path);
 591         else
 592                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix);
 593         if (!t)
 594                 return -ENOMEM;
 595
 596         *fs = t;
 597         return 0;
 598 }
 599
 600 static int join_path_unified(const char *path, const char *suffix, char **fs) {
 601         char *t;
 602
 603         assert(fs);
 604
 605         if (isempty(path) && isempty(suffix))
 606                 t = strdup("/sys/fs/cgroup");
 607         else if (isempty(path))
 608                 t = strappend("/sys/fs/cgroup/", suffix);
 609         else if (isempty(suffix))
 610                 t = strappend("/sys/fs/cgroup/", path);
 611         else
 612                 t = strjoin("/sys/fs/cgroup/", path, "/", suffix);
 613         if (!t)
 614                 return -ENOMEM;
 615
 616         *fs = t;
 617         return 0;
 618 }
 619
 620 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
 621         int r;
 622
 623         assert(fs);
 624
 625         if (!controller) {
 626                 char *t;
 627
 628                 /* If no controller is specified, we return the path
 629                  * *below* the controllers, without any prefix. */
 630
 631                 if (!path && !suffix)
 632                         return -EINVAL;
 633
 634                 if (!suffix)
 635                         t = strdup(path);
 636                 else if (!path)
 637                         t = strdup(suffix);
 638                 else
 639                         t = strjoin(path, "/", suffix);
 640                 if (!t)
 641                         return -ENOMEM;
 642
 643                 *fs = path_kill_slashes(t);
 644                 return 0;
 645         }
 646
 647         if (!cg_controller_is_valid(controller))
 648                 return -EINVAL;
 649
 650         r = cg_all_unified();
 651         if (r < 0)
 652                 return r;
 653         if (r > 0)
 654                 r = join_path_unified(path, suffix, fs);
 655         else
 656                 r = join_path_legacy(controller, path, suffix, fs);
 657         if (r < 0)
 658                 return r;
 659
 660         path_kill_slashes(*fs);
 661         return 0;
 662 }
 663
 664 static int controller_is_accessible(const char *controller) {
 665         int r;
 666
 667         assert(controller);
 668
 669         /* Checks whether a specific controller is accessible,
 670          * i.e. its hierarchy mounted. In the unified hierarchy all
 671          * controllers are considered accessible, except for the named
 672          * hierarchies */
 673
 674         if (!cg_controller_is_valid(controller))
 675                 return -EINVAL;
 676
 677         r = cg_all_unified();
 678         if (r < 0)
 679                 return r;
 680         if (r > 0) {
 681                 /* We don't support named hierarchies if we are using
 682                  * the unified hierarchy. */
 683
 684                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
 685                         return 0;
 686
 687                 if (startswith(controller, "name="))
 688                         return -EOPNOTSUPP;
 689
 690         } else {
 691                 const char *cc, *dn;
 692
 693                 dn = controller_to_dirname(controller);
 694                 cc = strjoina("/sys/fs/cgroup/", dn);
 695
 696                 if (laccess(cc, F_OK) < 0)
 697                         return -errno;
 698         }
 699
 700         return 0;
 701 }
 702
 703 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
 704         int r;
 705
 706         assert(controller);
 707         assert(fs);
 708
 709         /* Check if the specified controller is actually accessible */
 710         r = controller_is_accessible(controller);
 711         if (r < 0)
 712                 return r;
 713
 714         return cg_get_path(controller, path, suffix, fs);
 715 }
 716
 717 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 718         assert(path);
 719         assert(sb);
 720         assert(ftwbuf);
 721
 722         if (typeflag != FTW_DP)
 723                 return 0;
 724
 725         if (ftwbuf->level < 1)
 726                 return 0;
 727
 728         (void) rmdir(path);
 729         return 0;
 730 }
 731
 732 int cg_trim(const char *controller, const char *path, bool delete_root) {
 733         _cleanup_free_ char *fs = NULL;
 734         int r = 0, q;
 735
 736         assert(path);
 737
 738         r = cg_get_path(controller, path, NULL, &fs);
 739         if (r < 0)
 740                 return r;
 741
 742         errno = 0;
 743         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 744                 if (errno == ENOENT)
 745                         r = 0;
 746                 else if (errno > 0)
 747                         r = -errno;
 748                 else
 749                         r = -EIO;
 750         }
 751
 752         if (delete_root) {
 753                 if (rmdir(fs) < 0 && errno != ENOENT)
 754                         return -errno;
 755         }
 756
 757         q = cg_hybrid_unified();
 758         if (q < 0)
 759                 return q;
 760         if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 761                 q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
 762                 if (q < 0)
 763                         log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
 764         }
 765
 766         return r;
 767 }
 768
 769 int cg_create(const char *controller, const char *path) {
 770         _cleanup_free_ char *fs = NULL;
 771         int r;
 772
 773         r = cg_get_path_and_check(controller, path, NULL, &fs);
 774         if (r < 0)
 775                 return r;
 776
 777         r = mkdir_parents(fs, 0755);
 778         if (r < 0)
 779                 return r;
 780
 781         if (mkdir(fs, 0755) < 0) {
 782
 783                 if (errno == EEXIST)
 784                         return 0;
 785
 786                 return -errno;
 787         }
 788
 789         r = cg_hybrid_unified();
 790         if (r < 0)
 791                 return r;
 792
 793         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 794                 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
 795                 if (r < 0)
 796                         log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
 797         }
 798
 799         return 1;
 800 }
 801
 802 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 803         int r, q;
 804
 805         assert(pid >= 0);
 806
 807         r = cg_create(controller, path);
 808         if (r < 0)
 809                 return r;
 810
 811         q = cg_attach(controller, path, pid);
 812         if (q < 0)
 813                 return q;
 814
 815         /* This does not remove the cgroup on failure */
 816         return r;
 817 }
 818
 819 int cg_attach(const char *controller, const char *path, pid_t pid) {
 820         _cleanup_free_ char *fs = NULL;
 821         char c[DECIMAL_STR_MAX(pid_t) + 2];
 822         int r;
 823
 824         assert(path);
 825         assert(pid >= 0);
 826
 827         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 828         if (r < 0)
 829                 return r;
 830
 831         if (pid == 0)
 832                 pid = getpid_cached();
 833
 834         xsprintf(c, PID_FMT "\n", pid);
 835
 836         r = write_string_file(fs, c, 0);
 837         if (r < 0)
 838                 return r;
 839
 840         r = cg_hybrid_unified();
 841         if (r < 0)
 842                 return r;
 843
 844         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 845                 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
 846                 if (r < 0)
 847                         log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
 848         }
 849
 850         return 0;
 851 }
 852
 853 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 854         int r;
 855
 856         assert(controller);
 857         assert(path);
 858         assert(pid >= 0);
 859
 860         r = cg_attach(controller, path, pid);
 861         if (r < 0) {
 862                 char prefix[strlen(path) + 1];
 863
 864                 /* This didn't work? Then let's try all prefixes of
 865                  * the destination */
 866
 867                 PATH_FOREACH_PREFIX(prefix, path) {
 868                         int q;
 869
 870                         q = cg_attach(controller, prefix, pid);
 871                         if (q >= 0)
 872                                 return q;
 873                 }
 874         }
 875
 876         return r;
 877 }
 878
 879 int cg_set_group_access(
 880                 const char *controller,
 881                 const char *path,
 882                 mode_t mode,
 883                 uid_t uid,
 884                 gid_t gid) {
 885
 886         _cleanup_free_ char *fs = NULL;
 887         int r;
 888
 889         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 890                 return 0;
 891
 892         if (mode != MODE_INVALID)
 893                 mode &= 0777;
 894
 895         r = cg_get_path(controller, path, NULL, &fs);
 896         if (r < 0)
 897                 return r;
 898
 899         r = chmod_and_chown(fs, mode, uid, gid);
 900         if (r < 0)
 901                 return r;
 902
 903         r = cg_hybrid_unified();
 904         if (r < 0)
 905                 return r;
 906         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 907                 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid);
 908                 if (r < 0)
 909                         log_debug_errno(r, "Failed to set group access on compatibility systemd cgroup %s, ignoring: %m", path);
 910         }
 911
 912         return 0;
 913 }
 914
 915 int cg_set_task_access(
 916                 const char *controller,
 917                 const char *path,
 918                 mode_t mode,
 919                 uid_t uid,
 920                 gid_t gid) {
 921
 922         _cleanup_free_ char *fs = NULL;
 923         int r;
 924
 925         assert(path);
 926
 927         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 928                 return 0;
 929
 930         if (mode != MODE_INVALID)
 931                 mode &= 0666;
 932
 933         /* For both the legacy and unified hierarchies, "cgroup.procs" is the main entry point for PIDs */
 934         r = cg_get_path(controller, path, "cgroup.procs", &fs);
 935         if (r < 0)
 936                 return r;
 937
 938         r = chmod_and_chown(fs, mode, uid, gid);
 939         if (r < 0)
 940                 return r;
 941
 942         r = cg_unified_controller(controller);
 943         if (r < 0)
 944                 return r;
 945         if (r == 0) {
 946                 const char *fn;
 947
 948                 /* Compatibility: on cgroupsv1 always keep values for the legacy files "tasks" and
 949                  * "cgroup.clone_children" in sync with "cgroup.procs". Since this is legacy stuff, we don't care if
 950                  * this fails. */
 951
 952                 FOREACH_STRING(fn,
 953                                "tasks",
 954                                "cgroup.clone_children") {
 955
 956                         fs = mfree(fs);
 957
 958                         r = cg_get_path(controller, path, fn, &fs);
 959                         if (r < 0)
 960                                 log_debug_errno(r, "Failed to get path for %s of %s, ignoring: %m", fn, path);
 961
 962                         r = chmod_and_chown(fs, mode, uid, gid);
 963                         if (r < 0)
 964                                 log_debug_errno(r, "Failed to to change ownership/access mode for %s of %s, ignoring: %m", fn, path);
 965                 }
 966         } else {
 967                 /* On the unified controller, we want to permit subtree controllers too. */
 968
 969                 fs = mfree(fs);
 970                 r = cg_get_path(controller, path, "cgroup.subtree_control", &fs);
 971                 if (r < 0)
 972                         return r;
 973
 974                 r = chmod_and_chown(fs, mode, uid, gid);
 975                 if (r < 0)
 976                         return r;
 977         }
 978
 979         r = cg_hybrid_unified();
 980         if (r < 0)
 981                 return r;
 982         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 983                 /* Always propagate access mode from unified to legacy controller */
 984
 985                 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid);
 986                 if (r < 0)
 987                         log_debug_errno(r, "Failed to set task access on compatibility systemd cgroup %s, ignoring: %m", path);
 988         }
 989
 990         return 0;
 991 }
 992
 993 int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
 994         _cleanup_free_ char *fs = NULL;
 995         int r;
 996
 997         assert(path);
 998         assert(name);
 999         assert(value || size <= 0);
1000
1001         r = cg_get_path(controller, path, NULL, &fs);
1002         if (r < 0)
1003                 return r;
1004
1005         if (setxattr(fs, name, value, size, flags) < 0)
1006                 return -errno;
1007
1008         return 0;
1009 }
1010
1011 int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
1012         _cleanup_free_ char *fs = NULL;
1013         ssize_t n;
1014         int r;
1015
1016         assert(path);
1017         assert(name);
1018
1019         r = cg_get_path(controller, path, NULL, &fs);
1020         if (r < 0)
1021                 return r;
1022
1023         n = getxattr(fs, name, value, size);
1024         if (n < 0)
1025                 return -errno;
1026
1027         return (int) n;
1028 }
1029
1030 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
1031         _cleanup_fclose_ FILE *f = NULL;
1032         char line[LINE_MAX];
1033         const char *fs, *controller_str;
1034         size_t cs = 0;
1035         int unified;
1036
1037         assert(path);
1038         assert(pid >= 0);
1039
1040         if (controller) {
1041                 if (!cg_controller_is_valid(controller))
1042                         return -EINVAL;
1043         } else
1044                 controller = SYSTEMD_CGROUP_CONTROLLER;
1045
1046         unified = cg_unified_controller(controller);
1047         if (unified < 0)
1048                 return unified;
1049         if (unified == 0) {
1050                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
1051                         controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
1052                 else
1053                         controller_str = controller;
1054
1055                 cs = strlen(controller_str);
1056         }
1057
1058         fs = procfs_file_alloca(pid, "cgroup");
1059         f = fopen(fs, "re");
1060         if (!f)
1061                 return errno == ENOENT ? -ESRCH : -errno;
1062
1063         FOREACH_LINE(line, f, return -errno) {
1064                 char *e, *p;
1065
1066                 truncate_nl(line);
1067
1068                 if (unified) {
1069                         e = startswith(line, "0:");
1070                         if (!e)
1071                                 continue;
1072
1073                         e = strchr(e, ':');
1074                         if (!e)
1075                                 continue;
1076                 } else {
1077                         char *l;
1078                         size_t k;
1079                         const char *word, *state;
1080                         bool found = false;
1081
1082                         l = strchr(line, ':');
1083                         if (!l)
1084                                 continue;
1085
1086                         l++;
1087                         e = strchr(l, ':');
1088                         if (!e)
1089                                 continue;
1090
1091                         *e = 0;
1092                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
1093                                 if (k == cs && memcmp(word, controller_str, cs) == 0) {
1094                                         found = true;
1095                                         break;
1096                                 }
1097                         }
1098
1099                         if (!found)
1100                                 continue;
1101                 }
1102
1103                 p = strdup(e + 1);
1104                 if (!p)
1105                         return -ENOMEM;
1106
1107                 *path = p;
1108                 return 0;
1109         }
1110
1111         return -ENODATA;
1112 }
1113
1114 int cg_install_release_agent(const char *controller, const char *agent) {
1115         _cleanup_free_ char *fs = NULL, *contents = NULL;
1116         const char *sc;
1117         int r;
1118
1119         assert(agent);
1120
1121         r = cg_unified_controller(controller);
1122         if (r < 0)
1123                 return r;
1124         if (r > 0) /* doesn't apply to unified hierarchy */
1125                 return -EOPNOTSUPP;
1126
1127         r = cg_get_path(controller, NULL, "release_agent", &fs);
1128         if (r < 0)
1129                 return r;
1130
1131         r = read_one_line_file(fs, &contents);
1132         if (r < 0)
1133                 return r;
1134
1135         sc = strstrip(contents);
1136         if (isempty(sc)) {
1137                 r = write_string_file(fs, agent, 0);
1138                 if (r < 0)
1139                         return r;
1140         } else if (!path_equal(sc, agent))
1141                 return -EEXIST;
1142
1143         fs = mfree(fs);
1144         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1145         if (r < 0)
1146                 return r;
1147
1148         contents = mfree(contents);
1149         r = read_one_line_file(fs, &contents);
1150         if (r < 0)
1151                 return r;
1152
1153         sc = strstrip(contents);
1154         if (streq(sc, "0")) {
1155                 r = write_string_file(fs, "1", 0);
1156                 if (r < 0)
1157                         return r;
1158
1159                 return 1;
1160         }
1161
1162         if (!streq(sc, "1"))
1163                 return -EIO;
1164
1165         return 0;
1166 }
1167
1168 int cg_uninstall_release_agent(const char *controller) {
1169         _cleanup_free_ char *fs = NULL;
1170         int r;
1171
1172         r = cg_unified_controller(controller);
1173         if (r < 0)
1174                 return r;
1175         if (r > 0) /* Doesn't apply to unified hierarchy */
1176                 return -EOPNOTSUPP;
1177
1178         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1179         if (r < 0)
1180                 return r;
1181
1182         r = write_string_file(fs, "0", 0);
1183         if (r < 0)
1184                 return r;
1185
1186         fs = mfree(fs);
1187
1188         r = cg_get_path(controller, NULL, "release_agent", &fs);
1189         if (r < 0)
1190                 return r;
1191
1192         r = write_string_file(fs, "", 0);
1193         if (r < 0)
1194                 return r;
1195
1196         return 0;
1197 }
1198
1199 int cg_is_empty(const char *controller, const char *path) {
1200         _cleanup_fclose_ FILE *f = NULL;
1201         pid_t pid;
1202         int r;
1203
1204         assert(path);
1205
1206         r = cg_enumerate_processes(controller, path, &f);
1207         if (r == -ENOENT)
1208                 return 1;
1209         if (r < 0)
1210                 return r;
1211
1212         r = cg_read_pid(f, &pid);
1213         if (r < 0)
1214                 return r;
1215
1216         return r == 0;
1217 }
1218
1219 int cg_is_empty_recursive(const char *controller, const char *path) {
1220         int r;
1221
1222         assert(path);
1223
1224         /* The root cgroup is always populated */
1225         if (controller && (isempty(path) || path_equal(path, "/")))
1226                 return false;
1227
1228         r = cg_unified_controller(controller);
1229         if (r < 0)
1230                 return r;
1231         if (r > 0) {
1232                 _cleanup_free_ char *t = NULL;
1233
1234                 /* On the unified hierarchy we can check empty state
1235                  * via the "populated" attribute of "cgroup.events". */
1236
1237                 r = cg_read_event(controller, path, "populated", &t);
1238                 if (r < 0)
1239                         return r;
1240
1241                 return streq(t, "0");
1242         } else {
1243                 _cleanup_closedir_ DIR *d = NULL;
1244                 char *fn;
1245
1246                 r = cg_is_empty(controller, path);
1247                 if (r <= 0)
1248                         return r;
1249
1250                 r = cg_enumerate_subgroups(controller, path, &d);
1251                 if (r == -ENOENT)
1252                         return 1;
1253                 if (r < 0)
1254                         return r;
1255
1256                 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1257                         _cleanup_free_ char *p = NULL;
1258
1259                         p = strjoin(path, "/", fn);
1260                         free(fn);
1261                         if (!p)
1262                                 return -ENOMEM;
1263
1264                         r = cg_is_empty_recursive(controller, p);
1265                         if (r <= 0)
1266                                 return r;
1267                 }
1268                 if (r < 0)
1269                         return r;
1270
1271                 return true;
1272         }
1273 }
1274
1275 int cg_split_spec(const char *spec, char **controller, char **path) {
1276         char *t = NULL, *u = NULL;
1277         const char *e;
1278
1279         assert(spec);
1280
1281         if (*spec == '/') {
1282                 if (!path_is_safe(spec))
1283                         return -EINVAL;
1284
1285                 if (path) {
1286                         t = strdup(spec);
1287                         if (!t)
1288                                 return -ENOMEM;
1289
1290                         *path = path_kill_slashes(t);
1291                 }
1292
1293                 if (controller)
1294                         *controller = NULL;
1295
1296                 return 0;
1297         }
1298
1299         e = strchr(spec, ':');
1300         if (!e) {
1301                 if (!cg_controller_is_valid(spec))
1302                         return -EINVAL;
1303
1304                 if (controller) {
1305                         t = strdup(spec);
1306                         if (!t)
1307                                 return -ENOMEM;
1308
1309                         *controller = t;
1310                 }
1311
1312                 if (path)
1313                         *path = NULL;
1314
1315                 return 0;
1316         }
1317
1318         t = strndup(spec, e-spec);
1319         if (!t)
1320                 return -ENOMEM;
1321         if (!cg_controller_is_valid(t)) {
1322                 free(t);
1323                 return -EINVAL;
1324         }
1325
1326         if (isempty(e+1))
1327                 u = NULL;
1328         else {
1329                 u = strdup(e+1);
1330                 if (!u) {
1331                         free(t);
1332                         return -ENOMEM;
1333                 }
1334
1335                 if (!path_is_safe(u) ||
1336                     !path_is_absolute(u)) {
1337                         free(t);
1338                         free(u);
1339                         return -EINVAL;
1340                 }
1341
1342                 path_kill_slashes(u);
1343         }
1344
1345         if (controller)
1346                 *controller = t;
1347         else
1348                 free(t);
1349
1350         if (path)
1351                 *path = u;
1352         else
1353                 free(u);
1354
1355         return 0;
1356 }
1357
1358 int cg_mangle_path(const char *path, char **result) {
1359         _cleanup_free_ char *c = NULL, *p = NULL;
1360         char *t;
1361         int r;
1362
1363         assert(path);
1364         assert(result);
1365
1366         /* First, check if it already is a filesystem path */
1367         if (path_startswith(path, "/sys/fs/cgroup")) {
1368
1369                 t = strdup(path);
1370                 if (!t)
1371                         return -ENOMEM;
1372
1373                 *result = path_kill_slashes(t);
1374                 return 0;
1375         }
1376
1377         /* Otherwise, treat it as cg spec */
1378         r = cg_split_spec(path, &c, &p);
1379         if (r < 0)
1380                 return r;
1381
1382         return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1383 }
1384
1385 int cg_get_root_path(char **path) {
1386         char *p, *e;
1387         int r;
1388
1389         assert(path);
1390
1391         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1392         if (r < 0)
1393                 return r;
1394
1395         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1396         if (!e)
1397                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1398         if (!e)
1399                 e = endswith(p, "/system"); /* even more legacy */
1400         if (e)
1401                 *e = 0;
1402
1403         *path = p;
1404         return 0;
1405 }
1406
1407 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1408         _cleanup_free_ char *rt = NULL;
1409         char *p;
1410         int r;
1411
1412         assert(cgroup);
1413         assert(shifted);
1414
1415         if (!root) {
1416                 /* If the root was specified let's use that, otherwise
1417                  * let's determine it from PID 1 */
1418
1419                 r = cg_get_root_path(&rt);
1420                 if (r < 0)
1421                         return r;
1422
1423                 root = rt;
1424         }
1425
1426         p = path_startswith(cgroup, root);
1427         if (p && p > cgroup)
1428                 *shifted = p - 1;
1429         else
1430                 *shifted = cgroup;
1431
1432         return 0;
1433 }
1434
1435 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1436         _cleanup_free_ char *raw = NULL;
1437         const char *c;
1438         int r;
1439
1440         assert(pid >= 0);
1441         assert(cgroup);
1442
1443         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1444         if (r < 0)
1445                 return r;
1446
1447         r = cg_shift_path(raw, root, &c);
1448         if (r < 0)
1449                 return r;
1450
1451         if (c == raw) {
1452                 *cgroup = raw;
1453                 raw = NULL;
1454         } else {
1455                 char *n;
1456
1457                 n = strdup(c);
1458                 if (!n)
1459                         return -ENOMEM;
1460
1461                 *cgroup = n;
1462         }
1463
1464         return 0;
1465 }
1466
1467 int cg_path_decode_unit(const char *cgroup, char **unit) {
1468         char *c, *s;
1469         size_t n;
1470
1471         assert(cgroup);
1472         assert(unit);
1473
1474         n = strcspn(cgroup, "/");
1475         if (n < 3)
1476                 return -ENXIO;
1477
1478         c = strndupa(cgroup, n);
1479         c = cg_unescape(c);
1480
1481         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1482                 return -ENXIO;
1483
1484         s = strdup(c);
1485         if (!s)
1486                 return -ENOMEM;
1487
1488         *unit = s;
1489         return 0;
1490 }
1491
1492 static bool valid_slice_name(const char *p, size_t n) {
1493
1494         if (!p)
1495                 return false;
1496
1497         if (n < strlen("x.slice"))
1498                 return false;
1499
1500         if (memcmp(p + n - 6, ".slice", 6) == 0) {
1501                 char buf[n+1], *c;
1502
1503                 memcpy(buf, p, n);
1504                 buf[n] = 0;
1505
1506                 c = cg_unescape(buf);
1507
1508                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1509         }
1510
1511         return false;
1512 }
1513
1514 static const char *skip_slices(const char *p) {
1515         assert(p);
1516
1517         /* Skips over all slice assignments */
1518
1519         for (;;) {
1520                 size_t n;
1521
1522                 p += strspn(p, "/");
1523
1524                 n = strcspn(p, "/");
1525                 if (!valid_slice_name(p, n))
1526                         return p;
1527
1528                 p += n;
1529         }
1530 }
1531
1532 int cg_path_get_unit(const char *path, char **ret) {
1533         const char *e;
1534         char *unit;
1535         int r;
1536
1537         assert(path);
1538         assert(ret);
1539
1540         e = skip_slices(path);
1541
1542         r = cg_path_decode_unit(e, &unit);
1543         if (r < 0)
1544                 return r;
1545
1546         /* We skipped over the slices, don't accept any now */
1547         if (endswith(unit, ".slice")) {
1548                 free(unit);
1549                 return -ENXIO;
1550         }
1551
1552         *ret = unit;
1553         return 0;
1554 }
1555
1556 int cg_pid_get_unit(pid_t pid, char **unit) {
1557         _cleanup_free_ char *cgroup = NULL;
1558         int r;
1559
1560         assert(unit);
1561
1562         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1563         if (r < 0)
1564                 return r;
1565
1566         return cg_path_get_unit(cgroup, unit);
1567 }
1568
1569 /**
1570  * Skip session-*.scope, but require it to be there.
1571  */
1572 static const char *skip_session(const char *p) {
1573         size_t n;
1574
1575         if (isempty(p))
1576                 return NULL;
1577
1578         p += strspn(p, "/");
1579
1580         n = strcspn(p, "/");
1581         if (n < strlen("session-x.scope"))
1582                 return NULL;
1583
1584         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1585                 char buf[n - 8 - 6 + 1];
1586
1587                 memcpy(buf, p + 8, n - 8 - 6);
1588                 buf[n - 8 - 6] = 0;
1589
1590                 /* Note that session scopes never need unescaping,
1591                  * since they cannot conflict with the kernel's own
1592                  * names, hence we don't need to call cg_unescape()
1593                  * here. */
1594
1595                 if (!session_id_valid(buf))
1596                         return false;
1597
1598                 p += n;
1599                 p += strspn(p, "/");
1600                 return p;
1601         }
1602
1603         return NULL;
1604 }
1605
1606 /**
1607  * Skip user@*.service, but require it to be there.
1608  */
1609 static const char *skip_user_manager(const char *p) {
1610         size_t n;
1611
1612         if (isempty(p))
1613                 return NULL;
1614
1615         p += strspn(p, "/");
1616
1617         n = strcspn(p, "/");
1618         if (n < strlen("user@x.service"))
1619                 return NULL;
1620
1621         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1622                 char buf[n - 5 - 8 + 1];
1623
1624                 memcpy(buf, p + 5, n - 5 - 8);
1625                 buf[n - 5 - 8] = 0;
1626
1627                 /* Note that user manager services never need unescaping,
1628                  * since they cannot conflict with the kernel's own
1629                  * names, hence we don't need to call cg_unescape()
1630                  * here. */
1631
1632                 if (parse_uid(buf, NULL) < 0)
1633                         return NULL;
1634
1635                 p += n;
1636                 p += strspn(p, "/");
1637
1638                 return p;
1639         }
1640
1641         return NULL;
1642 }
1643
1644 static const char *skip_user_prefix(const char *path) {
1645         const char *e, *t;
1646
1647         assert(path);
1648
1649         /* Skip slices, if there are any */
1650         e = skip_slices(path);
1651
1652         /* Skip the user manager, if it's in the path now... */
1653         t = skip_user_manager(e);
1654         if (t)
1655                 return t;
1656
1657         /* Alternatively skip the user session if it is in the path... */
1658         return skip_session(e);
1659 }
1660
1661 int cg_path_get_user_unit(const char *path, char **ret) {
1662         const char *t;
1663
1664         assert(path);
1665         assert(ret);
1666
1667         t = skip_user_prefix(path);
1668         if (!t)
1669                 return -ENXIO;
1670
1671         /* And from here on it looks pretty much the same as for a
1672          * system unit, hence let's use the same parser from here
1673          * on. */
1674         return cg_path_get_unit(t, ret);
1675 }
1676
1677 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1678         _cleanup_free_ char *cgroup = NULL;
1679         int r;
1680
1681         assert(unit);
1682
1683         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1684         if (r < 0)
1685                 return r;
1686
1687         return cg_path_get_user_unit(cgroup, unit);
1688 }
1689
1690 int cg_path_get_machine_name(const char *path, char **machine) {
1691         _cleanup_free_ char *u = NULL;
1692         const char *sl;
1693         int r;
1694
1695         r = cg_path_get_unit(path, &u);
1696         if (r < 0)
1697                 return r;
1698
1699         sl = strjoina("/run/systemd/machines/unit:", u);
1700         return readlink_malloc(sl, machine);
1701 }
1702
1703 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1704         _cleanup_free_ char *cgroup = NULL;
1705         int r;
1706
1707         assert(machine);
1708
1709         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1710         if (r < 0)
1711                 return r;
1712
1713         return cg_path_get_machine_name(cgroup, machine);
1714 }
1715
1716 int cg_path_get_session(const char *path, char **session) {
1717         _cleanup_free_ char *unit = NULL;
1718         char *start, *end;
1719         int r;
1720
1721         assert(path);
1722
1723         r = cg_path_get_unit(path, &unit);
1724         if (r < 0)
1725                 return r;
1726
1727         start = startswith(unit, "session-");
1728         if (!start)
1729                 return -ENXIO;
1730         end = endswith(start, ".scope");
1731         if (!end)
1732                 return -ENXIO;
1733
1734         *end = 0;
1735         if (!session_id_valid(start))
1736                 return -ENXIO;
1737
1738         if (session) {
1739                 char *rr;
1740
1741                 rr = strdup(start);
1742                 if (!rr)
1743                         return -ENOMEM;
1744
1745                 *session = rr;
1746         }
1747
1748         return 0;
1749 }
1750
1751 int cg_pid_get_session(pid_t pid, char **session) {
1752         _cleanup_free_ char *cgroup = NULL;
1753         int r;
1754
1755         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1756         if (r < 0)
1757                 return r;
1758
1759         return cg_path_get_session(cgroup, session);
1760 }
1761
1762 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1763         _cleanup_free_ char *slice = NULL;
1764         char *start, *end;
1765         int r;
1766
1767         assert(path);
1768
1769         r = cg_path_get_slice(path, &slice);
1770         if (r < 0)
1771                 return r;
1772
1773         start = startswith(slice, "user-");
1774         if (!start)
1775                 return -ENXIO;
1776         end = endswith(start, ".slice");
1777         if (!end)
1778                 return -ENXIO;
1779
1780         *end = 0;
1781         if (parse_uid(start, uid) < 0)
1782                 return -ENXIO;
1783
1784         return 0;
1785 }
1786
1787 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1788         _cleanup_free_ char *cgroup = NULL;
1789         int r;
1790
1791         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1792         if (r < 0)
1793                 return r;
1794
1795         return cg_path_get_owner_uid(cgroup, uid);
1796 }
1797
1798 int cg_path_get_slice(const char *p, char **slice) {
1799         const char *e = NULL;
1800
1801         assert(p);
1802         assert(slice);
1803
1804         /* Finds the right-most slice unit from the beginning, but
1805          * stops before we come to the first non-slice unit. */
1806
1807         for (;;) {
1808                 size_t n;
1809
1810                 p += strspn(p, "/");
1811
1812                 n = strcspn(p, "/");
1813                 if (!valid_slice_name(p, n)) {
1814
1815                         if (!e) {
1816                                 char *s;
1817
1818                                 s = strdup(SPECIAL_ROOT_SLICE);
1819                                 if (!s)
1820                                         return -ENOMEM;
1821
1822                                 *slice = s;
1823                                 return 0;
1824                         }
1825
1826                         return cg_path_decode_unit(e, slice);
1827                 }
1828
1829                 e = p;
1830                 p += n;
1831         }
1832 }
1833
1834 int cg_pid_get_slice(pid_t pid, char **slice) {
1835         _cleanup_free_ char *cgroup = NULL;
1836         int r;
1837
1838         assert(slice);
1839
1840         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1841         if (r < 0)
1842                 return r;
1843
1844         return cg_path_get_slice(cgroup, slice);
1845 }
1846
1847 int cg_path_get_user_slice(const char *p, char **slice) {
1848         const char *t;
1849         assert(p);
1850         assert(slice);
1851
1852         t = skip_user_prefix(p);
1853         if (!t)
1854                 return -ENXIO;
1855
1856         /* And now it looks pretty much the same as for a system
1857          * slice, so let's just use the same parser from here on. */
1858         return cg_path_get_slice(t, slice);
1859 }
1860
1861 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1862         _cleanup_free_ char *cgroup = NULL;
1863         int r;
1864
1865         assert(slice);
1866
1867         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1868         if (r < 0)
1869                 return r;
1870
1871         return cg_path_get_user_slice(cgroup, slice);
1872 }
1873
1874 char *cg_escape(const char *p) {
1875         bool need_prefix = false;
1876
1877         /* This implements very minimal escaping for names to be used
1878          * as file names in the cgroup tree: any name which might
1879          * conflict with a kernel name or is prefixed with '_' is
1880          * prefixed with a '_'. That way, when reading cgroup names it
1881          * is sufficient to remove a single prefixing underscore if
1882          * there is one. */
1883
1884         /* The return value of this function (unlike cg_unescape())
1885          * needs free()! */
1886
1887         if (IN_SET(p[0], 0, '_', '.') ||
1888             streq(p, "notify_on_release") ||
1889             streq(p, "release_agent") ||
1890             streq(p, "tasks") ||
1891             startswith(p, "cgroup."))
1892                 need_prefix = true;
1893         else {
1894                 const char *dot;
1895
1896                 dot = strrchr(p, '.');
1897                 if (dot) {
1898                         CGroupController c;
1899                         size_t l = dot - p;
1900
1901                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1902                                 const char *n;
1903
1904                                 n = cgroup_controller_to_string(c);
1905
1906                                 if (l != strlen(n))
1907                                         continue;
1908
1909                                 if (memcmp(p, n, l) != 0)
1910                                         continue;
1911
1912                                 need_prefix = true;
1913                                 break;
1914                         }
1915                 }
1916         }
1917
1918         if (need_prefix)
1919                 return strappend("_", p);
1920
1921         return strdup(p);
1922 }
1923
1924 char *cg_unescape(const char *p) {
1925         assert(p);
1926
1927         /* The return value of this function (unlike cg_escape())
1928          * doesn't need free()! */
1929
1930         if (p[0] == '_')
1931                 return (char*) p+1;
1932
1933         return (char*) p;
1934 }
1935
1936 #define CONTROLLER_VALID                        \
1937         DIGITS LETTERS                          \
1938         "_"
1939
1940 bool cg_controller_is_valid(const char *p) {
1941         const char *t, *s;
1942
1943         if (!p)
1944                 return false;
1945
1946         if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
1947                 return true;
1948
1949         s = startswith(p, "name=");
1950         if (s)
1951                 p = s;
1952
1953         if (IN_SET(*p, 0, '_'))
1954                 return false;
1955
1956         for (t = p; *t; t++)
1957                 if (!strchr(CONTROLLER_VALID, *t))
1958                         return false;
1959
1960         if (t - p > FILENAME_MAX)
1961                 return false;
1962
1963         return true;
1964 }
1965
1966 int cg_slice_to_path(const char *unit, char **ret) {
1967         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1968         const char *dash;
1969         int r;
1970
1971         assert(unit);
1972         assert(ret);
1973
1974         if (streq(unit, SPECIAL_ROOT_SLICE)) {
1975                 char *x;
1976
1977                 x = strdup("");
1978                 if (!x)
1979                         return -ENOMEM;
1980                 *ret = x;
1981                 return 0;
1982         }
1983
1984         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1985                 return -EINVAL;
1986
1987         if (!endswith(unit, ".slice"))
1988                 return -EINVAL;
1989
1990         r = unit_name_to_prefix(unit, &p);
1991         if (r < 0)
1992                 return r;
1993
1994         dash = strchr(p, '-');
1995
1996         /* Don't allow initial dashes */
1997         if (dash == p)
1998                 return -EINVAL;
1999
2000         while (dash) {
2001                 _cleanup_free_ char *escaped = NULL;
2002                 char n[dash - p + sizeof(".slice")];
2003
2004                 /* Don't allow trailing or double dashes */
2005                 if (IN_SET(dash[1], 0, '-'))
2006                         return -EINVAL;
2007
2008                 strcpy(stpncpy(n, p, dash - p), ".slice");
2009                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
2010                         return -EINVAL;
2011
2012                 escaped = cg_escape(n);
2013                 if (!escaped)
2014                         return -ENOMEM;
2015
2016                 if (!strextend(&s, escaped, "/", NULL))
2017                         return -ENOMEM;
2018
2019                 dash = strchr(dash+1, '-');
2020         }
2021
2022         e = cg_escape(unit);
2023         if (!e)
2024                 return -ENOMEM;
2025
2026         if (!strextend(&s, e, NULL))
2027                 return -ENOMEM;
2028
2029         *ret = s;
2030         s = NULL;
2031
2032         return 0;
2033 }
2034
2035 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
2036         _cleanup_free_ char *p = NULL;
2037         int r;
2038
2039         r = cg_get_path(controller, path, attribute, &p);
2040         if (r < 0)
2041                 return r;
2042
2043         return write_string_file(p, value, 0);
2044 }
2045
2046 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
2047         _cleanup_free_ char *p = NULL;
2048         int r;
2049
2050         r = cg_get_path(controller, path, attribute, &p);
2051         if (r < 0)
2052                 return r;
2053
2054         return read_one_line_file(p, ret);
2055 }
2056
2057 int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values) {
2058         _cleanup_free_ char *filename = NULL, *content = NULL;
2059         char *line, *p;
2060         int i, r;
2061
2062         for (i = 0; keys[i]; i++)
2063                 values[i] = NULL;
2064
2065         r = cg_get_path(controller, path, attribute, &filename);
2066         if (r < 0)
2067                 return r;
2068
2069         r = read_full_file(filename, &content, NULL);
2070         if (r < 0)
2071                 return r;
2072
2073         p = content;
2074         while ((line = strsep(&p, "\n"))) {
2075                 char *key;
2076
2077                 key = strsep(&line, " ");
2078
2079                 for (i = 0; keys[i]; i++) {
2080                         if (streq(key, keys[i])) {
2081                                 values[i] = strdup(line);
2082                                 break;
2083                         }
2084                 }
2085         }
2086
2087         for (i = 0; keys[i]; i++) {
2088                 if (!values[i]) {
2089                         for (i = 0; keys[i]; i++) {
2090                                 free(values[i]);
2091                                 values[i] = NULL;
2092                         }
2093                         return -ENOENT;
2094                 }
2095         }
2096
2097         return 0;
2098 }
2099
2100 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
2101         CGroupController c;
2102         int r;
2103
2104         /* This one will create a cgroup in our private tree, but also
2105          * duplicate it in the trees specified in mask, and remove it
2106          * in all others */
2107
2108         /* First create the cgroup in our own hierarchy. */
2109         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
2110         if (r < 0)
2111                 return r;
2112
2113         /* If we are in the unified hierarchy, we are done now */
2114         r = cg_all_unified();
2115         if (r < 0)
2116                 return r;
2117         if (r > 0)
2118                 return 0;
2119
2120         /* Otherwise, do the same in the other hierarchies */
2121         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2122                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2123                 const char *n;
2124
2125                 n = cgroup_controller_to_string(c);
2126
2127                 if (mask & bit)
2128                         (void) cg_create(n, path);
2129                 else if (supported & bit)
2130                         (void) cg_trim(n, path, true);
2131         }
2132
2133         return 0;
2134 }
2135
2136 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
2137         CGroupController c;
2138         int r;
2139
2140         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
2141         if (r < 0)
2142                 return r;
2143
2144         r = cg_all_unified();
2145         if (r < 0)
2146                 return r;
2147         if (r > 0)
2148                 return 0;
2149
2150         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2151                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2152                 const char *p = NULL;
2153
2154                 if (!(supported & bit))
2155                         continue;
2156
2157                 if (path_callback)
2158                         p = path_callback(bit, userdata);
2159
2160                 if (!p)
2161                         p = path;
2162
2163                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
2164         }
2165
2166         return 0;
2167 }
2168
2169 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
2170         Iterator i;
2171         void *pidp;
2172         int r = 0;
2173
2174         SET_FOREACH(pidp, pids, i) {
2175                 pid_t pid = PTR_TO_PID(pidp);
2176                 int q;
2177
2178                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
2179                 if (q < 0 && r >= 0)
2180                         r = q;
2181         }
2182
2183         return r;
2184 }
2185
2186 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
2187         CGroupController c;
2188         int r = 0, q;
2189
2190         if (!path_equal(from, to))  {
2191                 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
2192                 if (r < 0)
2193                         return r;
2194         }
2195
2196         q = cg_all_unified();
2197         if (q < 0)
2198                 return q;
2199         if (q > 0)
2200                 return r;
2201
2202         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2203                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2204                 const char *p = NULL;
2205
2206                 if (!(supported & bit))
2207                         continue;
2208
2209                 if (to_callback)
2210                         p = to_callback(bit, userdata);
2211
2212                 if (!p)
2213                         p = to;
2214
2215                 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
2216         }
2217
2218         return 0;
2219 }
2220
2221 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2222         CGroupController c;
2223         int r, q;
2224
2225         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2226         if (r < 0)
2227                 return r;
2228
2229         q = cg_all_unified();
2230         if (q < 0)
2231                 return q;
2232         if (q > 0)
2233                 return r;
2234
2235         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2236                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2237
2238                 if (!(supported & bit))
2239                         continue;
2240
2241                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
2242         }
2243
2244         return 0;
2245 }
2246
2247 int cg_mask_to_string(CGroupMask mask, char **ret) {
2248         _cleanup_free_ char *s = NULL;
2249         size_t n = 0, allocated = 0;
2250         bool space = false;
2251         CGroupController c;
2252
2253         assert(ret);
2254
2255         if (mask == 0) {
2256                 *ret = NULL;
2257                 return 0;
2258         }
2259
2260         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2261                 const char *k;
2262                 size_t l;
2263
2264                 if (!(mask & CGROUP_CONTROLLER_TO_MASK(c)))
2265                         continue;
2266
2267                 k = cgroup_controller_to_string(c);
2268                 l = strlen(k);
2269
2270                 if (!GREEDY_REALLOC(s, allocated, n + space + l + 1))
2271                         return -ENOMEM;
2272
2273                 if (space)
2274                         s[n] = ' ';
2275                 memcpy(s + n + space, k, l);
2276                 n += space + l;
2277
2278                 space = true;
2279         }
2280
2281         assert(s);
2282
2283         s[n] = 0;
2284         *ret = s;
2285         s = NULL;
2286
2287         return 0;
2288 }
2289
2290 int cg_mask_from_string(const char *value, CGroupMask *mask) {
2291         assert(mask);
2292         assert(value);
2293
2294         for (;;) {
2295                 _cleanup_free_ char *n = NULL;
2296                 CGroupController v;
2297                 int r;
2298
2299                 r = extract_first_word(&value, &n, NULL, 0);
2300                 if (r < 0)
2301                         return r;
2302                 if (r == 0)
2303                         break;
2304
2305                 v = cgroup_controller_from_string(n);
2306                 if (v < 0)
2307                         continue;
2308
2309                 *mask |= CGROUP_CONTROLLER_TO_MASK(v);
2310         }
2311         return 0;
2312 }
2313
2314 int cg_mask_supported(CGroupMask *ret) {
2315         CGroupMask mask = 0;
2316         int r;
2317
2318         /* Determines the mask of supported cgroup controllers. Only
2319          * includes controllers we can make sense of and that are
2320          * actually accessible. */
2321
2322         r = cg_all_unified();
2323         if (r < 0)
2324                 return r;
2325         if (r > 0) {
2326                 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2327
2328                 /* In the unified hierarchy we can read the supported
2329                  * and accessible controllers from a the top-level
2330                  * cgroup attribute */
2331
2332                 r = cg_get_root_path(&root);
2333                 if (r < 0)
2334                         return r;
2335
2336                 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2337                 if (r < 0)
2338                         return r;
2339
2340                 r = read_one_line_file(path, &controllers);
2341                 if (r < 0)
2342                         return r;
2343
2344                 r = cg_mask_from_string(controllers, &mask);
2345                 if (r < 0)
2346                         return r;
2347
2348                 /* Currently, we support the cpu, memory, io and pids
2349                  * controller in the unified hierarchy, mask
2350                  * everything else off. */
2351                 mask &= CGROUP_MASK_CPU | CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
2352
2353         } else {
2354                 CGroupController c;
2355
2356                 /* In the legacy hierarchy, we check whether which
2357                  * hierarchies are mounted. */
2358
2359                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2360                         const char *n;
2361
2362                         n = cgroup_controller_to_string(c);
2363                         if (controller_is_accessible(n) >= 0)
2364                                 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2365                 }
2366         }
2367
2368         *ret = mask;
2369         return 0;
2370 }
2371
2372 int cg_kernel_controllers(Set *controllers) {
2373         _cleanup_fclose_ FILE *f = NULL;
2374         int r;
2375
2376         assert(controllers);
2377
2378         /* Determines the full list of kernel-known controllers. Might
2379          * include controllers we don't actually support, arbitrary
2380          * named hierarchies and controllers that aren't currently
2381          * accessible (because not mounted). */
2382
2383         f = fopen("/proc/cgroups", "re");
2384         if (!f) {
2385                 if (errno == ENOENT)
2386                         return 0;
2387                 return -errno;
2388         }
2389
2390         /* Ignore the header line */
2391         (void) read_line(f, (size_t) -1, NULL);
2392
2393         for (;;) {
2394                 char *controller;
2395                 int enabled = 0;
2396
2397                 errno = 0;
2398                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2399
2400                         if (feof(f))
2401                                 break;
2402
2403                         if (ferror(f) && errno > 0)
2404                                 return -errno;
2405
2406                         return -EBADMSG;
2407                 }
2408
2409                 if (!enabled) {
2410                         free(controller);
2411                         continue;
2412                 }
2413
2414                 if (!cg_controller_is_valid(controller)) {
2415                         free(controller);
2416                         return -EBADMSG;
2417                 }
2418
2419                 r = set_consume(controllers, controller);
2420                 if (r < 0)
2421                         return r;
2422         }
2423
2424         return 0;
2425 }
2426
2427 static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
2428
2429 /* The hybrid mode was initially implemented in v232 and simply mounted cgroup v2 on /sys/fs/cgroup/systemd.  This
2430  * unfortunately broke other tools (such as docker) which expected the v1 "name=systemd" hierarchy on
2431  * /sys/fs/cgroup/systemd.  From v233 and on, the hybrid mode mountnbs v2 on /sys/fs/cgroup/unified and maintains
2432  * "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility with other tools.
2433  *
2434  * To keep live upgrade working, we detect and support v232 layout.  When v232 layout is detected, to keep cgroup v2
2435  * process management but disable the compat dual layout, we return %true on
2436  * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
2437  */
2438 static thread_local bool unified_systemd_v232;
2439
2440 static int cg_unified_update(void) {
2441
2442         struct statfs fs;
2443
2444         /* Checks if we support the unified hierarchy. Returns an
2445          * error when the cgroup hierarchies aren't mounted yet or we
2446          * have any other trouble determining if the unified hierarchy
2447          * is supported. */
2448
2449         if (unified_cache >= CGROUP_UNIFIED_NONE)
2450                 return 0;
2451
2452         if (statfs("/sys/fs/cgroup/", &fs) < 0)
2453                 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\" failed: %m");
2454
2455         if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2456                 log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
2457                 unified_cache = CGROUP_UNIFIED_ALL;
2458         } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2459                 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
2460                     F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2461                         log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
2462                         unified_cache = CGROUP_UNIFIED_SYSTEMD;
2463                         unified_systemd_v232 = false;
2464                 } else {
2465                         if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
2466                                 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
2467
2468                         if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2469                                 log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
2470                                 unified_cache = CGROUP_UNIFIED_SYSTEMD;
2471                                 unified_systemd_v232 = true;
2472                         } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
2473                                 log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
2474                                 unified_cache = CGROUP_UNIFIED_NONE;
2475                         } else {
2476                                 log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
2477                                           (unsigned long long) fs.f_type);
2478                                 unified_cache = CGROUP_UNIFIED_NONE;
2479                         }
2480                 }
2481         } else {
2482                 log_debug("Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2483                           (unsigned long long) fs.f_type);
2484                 return -ENOMEDIUM;
2485         }
2486
2487         return 0;
2488 }
2489
2490 int cg_unified_controller(const char *controller) {
2491         int r;
2492
2493         r = cg_unified_update();
2494         if (r < 0)
2495                 return r;
2496
2497         if (unified_cache == CGROUP_UNIFIED_NONE)
2498                 return false;
2499
2500         if (unified_cache >= CGROUP_UNIFIED_ALL)
2501                 return true;
2502
2503         return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
2504 }
2505
2506 int cg_all_unified(void) {
2507         int r;
2508
2509         r = cg_unified_update();
2510         if (r < 0)
2511                 return r;
2512
2513         return unified_cache >= CGROUP_UNIFIED_ALL;
2514 }
2515
2516 int cg_hybrid_unified(void) {
2517         int r;
2518
2519         r = cg_unified_update();
2520         if (r < 0)
2521                 return r;
2522
2523         return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
2524 }
2525
2526 int cg_unified_flush(void) {
2527         unified_cache = CGROUP_UNIFIED_UNKNOWN;
2528
2529         return cg_unified_update();
2530 }
2531
2532 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2533         _cleanup_free_ char *fs = NULL;
2534         CGroupController c;
2535         int r;
2536
2537         assert(p);
2538
2539         if (supported == 0)
2540                 return 0;
2541
2542         r = cg_all_unified();
2543         if (r < 0)
2544                 return r;
2545         if (r == 0) /* on the legacy hiearchy there's no joining of controllers defined */
2546                 return 0;
2547
2548         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2549         if (r < 0)
2550                 return r;
2551
2552         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2553                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2554                 const char *n;
2555
2556                 if (!(supported & bit))
2557                         continue;
2558
2559                 n = cgroup_controller_to_string(c);
2560                 {
2561                         char s[1 + strlen(n) + 1];
2562
2563                         s[0] = mask & bit ? '+' : '-';
2564                         strcpy(s + 1, n);
2565
2566                         r = write_string_file(fs, s, 0);
2567                         if (r < 0)
2568                                 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2569                 }
2570         }
2571
2572         return 0;
2573 }
2574
2575 bool cg_is_unified_wanted(void) {
2576         static thread_local int wanted = -1;
2577         int r;
2578         bool b;
2579         const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
2580
2581         /* If we have a cached value, return that. */
2582         if (wanted >= 0)
2583                 return wanted;
2584
2585         /* If the hierarchy is already mounted, then follow whatever
2586          * was chosen for it. */
2587         if (cg_unified_flush() >= 0)
2588                 return (wanted = unified_cache >= CGROUP_UNIFIED_ALL);
2589
2590         /* Otherwise, let's see what the kernel command line has to say.
2591          * Since checking is expensive, cache a non-error result. */
2592         r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
2593
2594         return (wanted = r > 0 ? b : is_default);
2595 }
2596
2597 bool cg_is_legacy_wanted(void) {
2598         static thread_local int wanted = -1;
2599
2600         /* If we have a cached value, return that. */
2601         if (wanted >= 0)
2602                 return wanted;
2603
2604         /* Check if we have cgroups2 already mounted. */
2605         if (cg_unified_flush() >= 0 &&
2606             unified_cache == CGROUP_UNIFIED_ALL)
2607                 return (wanted = false);
2608
2609         /* Otherwise, assume that at least partial legacy is wanted,
2610          * since cgroups2 should already be mounted at this point. */
2611         return (wanted = true);
2612 }
2613
2614 bool cg_is_hybrid_wanted(void) {
2615         static thread_local int wanted = -1;
2616         int r;
2617         bool b;
2618         const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
2619         /* We default to true if the default is "hybrid", obviously,
2620          * but also when the default is "unified", because if we get
2621          * called, it means that unified hierarchy was not mounted. */
2622
2623         /* If we have a cached value, return that. */
2624         if (wanted >= 0)
2625                 return wanted;
2626
2627         /* If the hierarchy is already mounted, then follow whatever
2628          * was chosen for it. */
2629         if (cg_unified_flush() >= 0 &&
2630             unified_cache == CGROUP_UNIFIED_ALL)
2631                 return (wanted = false);
2632
2633         /* Otherwise, let's see what the kernel command line has to say.
2634          * Since checking is expensive, cache a non-error result. */
2635         r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
2636
2637         /* The meaning of the kernel option is reversed wrt. to the return value
2638          * of this function, hence the negation. */
2639         return (wanted = r > 0 ? !b : is_default);
2640 }
2641
2642 int cg_weight_parse(const char *s, uint64_t *ret) {
2643         uint64_t u;
2644         int r;
2645
2646         if (isempty(s)) {
2647                 *ret = CGROUP_WEIGHT_INVALID;
2648                 return 0;
2649         }
2650
2651         r = safe_atou64(s, &u);
2652         if (r < 0)
2653                 return r;
2654
2655         if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2656                 return -ERANGE;
2657
2658         *ret = u;
2659         return 0;
2660 }
2661
2662 const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2663         [CGROUP_IO_RBPS_MAX]    = CGROUP_LIMIT_MAX,
2664         [CGROUP_IO_WBPS_MAX]    = CGROUP_LIMIT_MAX,
2665         [CGROUP_IO_RIOPS_MAX]   = CGROUP_LIMIT_MAX,
2666         [CGROUP_IO_WIOPS_MAX]   = CGROUP_LIMIT_MAX,
2667 };
2668
2669 static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2670         [CGROUP_IO_RBPS_MAX]    = "IOReadBandwidthMax",
2671         [CGROUP_IO_WBPS_MAX]    = "IOWriteBandwidthMax",
2672         [CGROUP_IO_RIOPS_MAX]   = "IOReadIOPSMax",
2673         [CGROUP_IO_WIOPS_MAX]   = "IOWriteIOPSMax",
2674 };
2675
2676 DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2677
2678 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2679         uint64_t u;
2680         int r;
2681
2682         if (isempty(s)) {
2683                 *ret = CGROUP_CPU_SHARES_INVALID;
2684                 return 0;
2685         }
2686
2687         r = safe_atou64(s, &u);
2688         if (r < 0)
2689                 return r;
2690
2691         if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2692                 return -ERANGE;
2693
2694         *ret = u;
2695         return 0;
2696 }
2697
2698 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2699         uint64_t u;
2700         int r;
2701
2702         if (isempty(s)) {
2703                 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2704                 return 0;
2705         }
2706
2707         r = safe_atou64(s, &u);
2708         if (r < 0)
2709                 return r;
2710
2711         if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2712                 return -ERANGE;
2713
2714         *ret = u;
2715         return 0;
2716 }
2717
2718 bool is_cgroup_fs(const struct statfs *s) {
2719         return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2720                is_fs_type(s, CGROUP2_SUPER_MAGIC);
2721 }
2722
2723 bool fd_is_cgroup_fs(int fd) {
2724         struct statfs s;
2725
2726         if (fstatfs(fd, &s) < 0)
2727                 return -errno;
2728
2729         return is_cgroup_fs(&s);
2730 }
2731
2732 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2733         [CGROUP_CONTROLLER_CPU] = "cpu",
2734         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2735         [CGROUP_CONTROLLER_IO] = "io",
2736         [CGROUP_CONTROLLER_BLKIO] = "blkio",
2737         [CGROUP_CONTROLLER_MEMORY] = "memory",
2738         [CGROUP_CONTROLLER_DEVICES] = "devices",
2739         [CGROUP_CONTROLLER_PIDS] = "pids",
2740 };
2741
2742 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);