src/basic/cgroup-util.c

   1 /***
   2   This file is part of systemd.
   3
   4   Copyright 2010 Lennart Poettering
   5
   6   systemd is free software; you can redistribute it and/or modify it
   7   under the terms of the GNU Lesser General Public License as published by
   8   the Free Software Foundation; either version 2.1 of the License, or
   9   (at your option) any later version.
  10
  11   systemd is distributed in the hope that it will be useful, but
  12   WITHOUT ANY WARRANTY; without even the implied warranty of
  13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14   Lesser General Public License for more details.
  15
  16   You should have received a copy of the GNU Lesser General Public License
  17   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  18 ***/
  19
  20 #include <dirent.h>
  21 #include <errno.h>
  22 #include <ftw.h>
  23 #include <limits.h>
  24 #include <signal.h>
  25 #include <stddef.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28 #include <sys/stat.h>
  29 #include <sys/statfs.h>
  30 #include <sys/types.h>
  31 #include <sys/xattr.h>
  32 #include <unistd.h>
  33
  34 #include "alloc-util.h"
  35 #include "cgroup-util.h"
  36 #include "def.h"
  37 #include "dirent-util.h"
  38 #include "extract-word.h"
  39 #include "fd-util.h"
  40 #include "fileio.h"
  41 #include "format-util.h"
  42 #include "fs-util.h"
  43 #include "log.h"
  44 #include "login-util.h"
  45 #include "macro.h"
  46 #include "missing.h"
  47 #include "mkdir.h"
  48 #include "parse-util.h"
  49 #include "path-util.h"
  50 #include "proc-cmdline.h"
  51 #include "process-util.h"
  52 #include "set.h"
  53 #include "special.h"
  54 #include "stat-util.h"
  55 #include "stdio-util.h"
  56 #include "string-table.h"
  57 #include "string-util.h"
  58 #include "strv.h"
  59 #include "unit-name.h"
  60 #include "user-util.h"
  61
  62 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
  63         _cleanup_free_ char *fs = NULL;
  64         FILE *f;
  65         int r;
  66
  67         assert(_f);
  68
  69         r = cg_get_path(controller, path, "cgroup.procs", &fs);
  70         if (r < 0)
  71                 return r;
  72
  73         f = fopen(fs, "re");
  74         if (!f)
  75                 return -errno;
  76
  77         *_f = f;
  78         return 0;
  79 }
  80
  81 int cg_read_pid(FILE *f, pid_t *_pid) {
  82         unsigned long ul;
  83
  84         /* Note that the cgroup.procs might contain duplicates! See
  85          * cgroups.txt for details. */
  86
  87         assert(f);
  88         assert(_pid);
  89
  90         errno = 0;
  91         if (fscanf(f, "%lu", &ul) != 1) {
  92
  93                 if (feof(f))
  94                         return 0;
  95
  96                 return errno > 0 ? -errno : -EIO;
  97         }
  98
  99         if (ul <= 0)
 100                 return -EIO;
 101
 102         *_pid = (pid_t) ul;
 103         return 1;
 104 }
 105
 106 int cg_read_event(const char *controller, const char *path, const char *event,
 107                   char **val)
 108 {
 109         _cleanup_free_ char *events = NULL, *content = NULL;
 110         char *p, *line;
 111         int r;
 112
 113         r = cg_get_path(controller, path, "cgroup.events", &events);
 114         if (r < 0)
 115                 return r;
 116
 117         r = read_full_file(events, &content, NULL);
 118         if (r < 0)
 119                 return r;
 120
 121         p = content;
 122         while ((line = strsep(&p, "\n"))) {
 123                 char *key;
 124
 125                 key = strsep(&line, " ");
 126                 if (!key || !line)
 127                         return -EINVAL;
 128
 129                 if (strcmp(key, event))
 130                         continue;
 131
 132                 *val = strdup(line);
 133                 return 0;
 134         }
 135
 136         return -ENOENT;
 137 }
 138
 139 bool cg_ns_supported(void) {
 140         static thread_local int enabled = -1;
 141
 142         if (enabled >= 0)
 143                 return enabled;
 144
 145         if (access("/proc/self/ns/cgroup", F_OK) == 0)
 146                 enabled = 1;
 147         else
 148                 enabled = 0;
 149
 150         return enabled;
 151 }
 152
 153 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
 154         _cleanup_free_ char *fs = NULL;
 155         int r;
 156         DIR *d;
 157
 158         assert(_d);
 159
 160         /* This is not recursive! */
 161
 162         r = cg_get_path(controller, path, NULL, &fs);
 163         if (r < 0)
 164                 return r;
 165
 166         d = opendir(fs);
 167         if (!d)
 168                 return -errno;
 169
 170         *_d = d;
 171         return 0;
 172 }
 173
 174 int cg_read_subgroup(DIR *d, char **fn) {
 175         struct dirent *de;
 176
 177         assert(d);
 178         assert(fn);
 179
 180         FOREACH_DIRENT_ALL(de, d, return -errno) {
 181                 char *b;
 182
 183                 if (de->d_type != DT_DIR)
 184                         continue;
 185
 186                 if (dot_or_dot_dot(de->d_name))
 187                         continue;
 188
 189                 b = strdup(de->d_name);
 190                 if (!b)
 191                         return -ENOMEM;
 192
 193                 *fn = b;
 194                 return 1;
 195         }
 196
 197         return 0;
 198 }
 199
 200 int cg_rmdir(const char *controller, const char *path) {
 201         _cleanup_free_ char *p = NULL;
 202         int r;
 203
 204         r = cg_get_path(controller, path, NULL, &p);
 205         if (r < 0)
 206                 return r;
 207
 208         r = rmdir(p);
 209         if (r < 0 && errno != ENOENT)
 210                 return -errno;
 211
 212         r = cg_hybrid_unified();
 213         if (r < 0)
 214                 return r;
 215         if (r == 0)
 216                 return 0;
 217
 218         if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 219                 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
 220                 if (r < 0)
 221                         log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
 222         }
 223
 224         return 0;
 225 }
 226
 227 int cg_kill(
 228                 const char *controller,
 229                 const char *path,
 230                 int sig,
 231                 CGroupFlags flags,
 232                 Set *s,
 233                 cg_kill_log_func_t log_kill,
 234                 void *userdata) {
 235
 236         _cleanup_set_free_ Set *allocated_set = NULL;
 237         bool done = false;
 238         int r, ret = 0;
 239         pid_t my_pid;
 240
 241         assert(sig >= 0);
 242
 243          /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
 244           * SIGCONT on SIGKILL. */
 245         if (IN_SET(sig, SIGCONT, SIGKILL))
 246                 flags &= ~CGROUP_SIGCONT;
 247
 248         /* This goes through the tasks list and kills them all. This
 249          * is repeated until no further processes are added to the
 250          * tasks list, to properly handle forking processes */
 251
 252         if (!s) {
 253                 s = allocated_set = set_new(NULL);
 254                 if (!s)
 255                         return -ENOMEM;
 256         }
 257
 258         my_pid = getpid();
 259
 260         do {
 261                 _cleanup_fclose_ FILE *f = NULL;
 262                 pid_t pid = 0;
 263                 done = true;
 264
 265                 r = cg_enumerate_processes(controller, path, &f);
 266                 if (r < 0) {
 267                         if (ret >= 0 && r != -ENOENT)
 268                                 return r;
 269
 270                         return ret;
 271                 }
 272
 273                 while ((r = cg_read_pid(f, &pid)) > 0) {
 274
 275                         if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
 276                                 continue;
 277
 278                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 279                                 continue;
 280
 281                         if (log_kill)
 282                                 log_kill(pid, sig, userdata);
 283
 284                         /* If we haven't killed this process yet, kill
 285                          * it */
 286                         if (kill(pid, sig) < 0) {
 287                                 if (ret >= 0 && errno != ESRCH)
 288                                         ret = -errno;
 289                         } else {
 290                                 if (flags & CGROUP_SIGCONT)
 291                                         (void) kill(pid, SIGCONT);
 292
 293                                 if (ret == 0)
 294                                         ret = 1;
 295                         }
 296
 297                         done = false;
 298
 299                         r = set_put(s, PID_TO_PTR(pid));
 300                         if (r < 0) {
 301                                 if (ret >= 0)
 302                                         return r;
 303
 304                                 return ret;
 305                         }
 306                 }
 307
 308                 if (r < 0) {
 309                         if (ret >= 0)
 310                                 return r;
 311
 312                         return ret;
 313                 }
 314
 315                 /* To avoid racing against processes which fork
 316                  * quicker than we can kill them we repeat this until
 317                  * no new pids need to be killed. */
 318
 319         } while (!done);
 320
 321         return ret;
 322 }
 323
 324 int cg_kill_recursive(
 325                 const char *controller,
 326                 const char *path,
 327                 int sig,
 328                 CGroupFlags flags,
 329                 Set *s,
 330                 cg_kill_log_func_t log_kill,
 331                 void *userdata) {
 332
 333         _cleanup_set_free_ Set *allocated_set = NULL;
 334         _cleanup_closedir_ DIR *d = NULL;
 335         int r, ret;
 336         char *fn;
 337
 338         assert(path);
 339         assert(sig >= 0);
 340
 341         if (!s) {
 342                 s = allocated_set = set_new(NULL);
 343                 if (!s)
 344                         return -ENOMEM;
 345         }
 346
 347         ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
 348
 349         r = cg_enumerate_subgroups(controller, path, &d);
 350         if (r < 0) {
 351                 if (ret >= 0 && r != -ENOENT)
 352                         return r;
 353
 354                 return ret;
 355         }
 356
 357         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 358                 _cleanup_free_ char *p = NULL;
 359
 360                 p = strjoin(path, "/", fn);
 361                 free(fn);
 362                 if (!p)
 363                         return -ENOMEM;
 364
 365                 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
 366                 if (r != 0 && ret >= 0)
 367                         ret = r;
 368         }
 369         if (ret >= 0 && r < 0)
 370                 ret = r;
 371
 372         if (flags & CGROUP_REMOVE) {
 373                 r = cg_rmdir(controller, path);
 374                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 375                         return r;
 376         }
 377
 378         return ret;
 379 }
 380
 381 int cg_migrate(
 382                 const char *cfrom,
 383                 const char *pfrom,
 384                 const char *cto,
 385                 const char *pto,
 386                 CGroupFlags flags) {
 387
 388         bool done = false;
 389         _cleanup_set_free_ Set *s = NULL;
 390         int r, ret = 0;
 391         pid_t my_pid;
 392
 393         assert(cfrom);
 394         assert(pfrom);
 395         assert(cto);
 396         assert(pto);
 397
 398         s = set_new(NULL);
 399         if (!s)
 400                 return -ENOMEM;
 401
 402         my_pid = getpid();
 403
 404         do {
 405                 _cleanup_fclose_ FILE *f = NULL;
 406                 pid_t pid = 0;
 407                 done = true;
 408
 409                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 410                 if (r < 0) {
 411                         if (ret >= 0 && r != -ENOENT)
 412                                 return r;
 413
 414                         return ret;
 415                 }
 416
 417                 while ((r = cg_read_pid(f, &pid)) > 0) {
 418
 419                         /* This might do weird stuff if we aren't a
 420                          * single-threaded program. However, we
 421                          * luckily know we are not */
 422                         if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
 423                                 continue;
 424
 425                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 426                                 continue;
 427
 428                         /* Ignore kernel threads. Since they can only
 429                          * exist in the root cgroup, we only check for
 430                          * them there. */
 431                         if (cfrom &&
 432                             (isempty(pfrom) || path_equal(pfrom, "/")) &&
 433                             is_kernel_thread(pid) > 0)
 434                                 continue;
 435
 436                         r = cg_attach(cto, pto, pid);
 437                         if (r < 0) {
 438                                 if (ret >= 0 && r != -ESRCH)
 439                                         ret = r;
 440                         } else if (ret == 0)
 441                                 ret = 1;
 442
 443                         done = false;
 444
 445                         r = set_put(s, PID_TO_PTR(pid));
 446                         if (r < 0) {
 447                                 if (ret >= 0)
 448                                         return r;
 449
 450                                 return ret;
 451                         }
 452                 }
 453
 454                 if (r < 0) {
 455                         if (ret >= 0)
 456                                 return r;
 457
 458                         return ret;
 459                 }
 460         } while (!done);
 461
 462         return ret;
 463 }
 464
 465 int cg_migrate_recursive(
 466                 const char *cfrom,
 467                 const char *pfrom,
 468                 const char *cto,
 469                 const char *pto,
 470                 CGroupFlags flags) {
 471
 472         _cleanup_closedir_ DIR *d = NULL;
 473         int r, ret = 0;
 474         char *fn;
 475
 476         assert(cfrom);
 477         assert(pfrom);
 478         assert(cto);
 479         assert(pto);
 480
 481         ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
 482
 483         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 484         if (r < 0) {
 485                 if (ret >= 0 && r != -ENOENT)
 486                         return r;
 487
 488                 return ret;
 489         }
 490
 491         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 492                 _cleanup_free_ char *p = NULL;
 493
 494                 p = strjoin(pfrom, "/", fn);
 495                 free(fn);
 496                 if (!p)
 497                         return -ENOMEM;
 498
 499                 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
 500                 if (r != 0 && ret >= 0)
 501                         ret = r;
 502         }
 503
 504         if (r < 0 && ret >= 0)
 505                 ret = r;
 506
 507         if (flags & CGROUP_REMOVE) {
 508                 r = cg_rmdir(cfrom, pfrom);
 509                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 510                         return r;
 511         }
 512
 513         return ret;
 514 }
 515
 516 int cg_migrate_recursive_fallback(
 517                 const char *cfrom,
 518                 const char *pfrom,
 519                 const char *cto,
 520                 const char *pto,
 521                 CGroupFlags flags) {
 522
 523         int r;
 524
 525         assert(cfrom);
 526         assert(pfrom);
 527         assert(cto);
 528         assert(pto);
 529
 530         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
 531         if (r < 0) {
 532                 char prefix[strlen(pto) + 1];
 533
 534                 /* This didn't work? Then let's try all prefixes of the destination */
 535
 536                 PATH_FOREACH_PREFIX(prefix, pto) {
 537                         int q;
 538
 539                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
 540                         if (q >= 0)
 541                                 return q;
 542                 }
 543         }
 544
 545         return r;
 546 }
 547
 548 static const char *controller_to_dirname(const char *controller) {
 549         const char *e;
 550
 551         assert(controller);
 552
 553         /* Converts a controller name to the directory name below
 554          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
 555          * just cuts off the name= prefixed used for named
 556          * hierarchies, if it is specified. */
 557
 558         if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 559                 if (cg_hybrid_unified() > 0)
 560                         controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
 561                 else
 562                         controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
 563         }
 564
 565         e = startswith(controller, "name=");
 566         if (e)
 567                 return e;
 568
 569         return controller;
 570 }
 571
 572 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
 573         const char *dn;
 574         char *t = NULL;
 575
 576         assert(fs);
 577         assert(controller);
 578
 579         dn = controller_to_dirname(controller);
 580
 581         if (isempty(path) && isempty(suffix))
 582                 t = strappend("/sys/fs/cgroup/", dn);
 583         else if (isempty(path))
 584                 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix);
 585         else if (isempty(suffix))
 586                 t = strjoin("/sys/fs/cgroup/", dn, "/", path);
 587         else
 588                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix);
 589         if (!t)
 590                 return -ENOMEM;
 591
 592         *fs = t;
 593         return 0;
 594 }
 595
 596 static int join_path_unified(const char *path, const char *suffix, char **fs) {
 597         char *t;
 598
 599         assert(fs);
 600
 601         if (isempty(path) && isempty(suffix))
 602                 t = strdup("/sys/fs/cgroup");
 603         else if (isempty(path))
 604                 t = strappend("/sys/fs/cgroup/", suffix);
 605         else if (isempty(suffix))
 606                 t = strappend("/sys/fs/cgroup/", path);
 607         else
 608                 t = strjoin("/sys/fs/cgroup/", path, "/", suffix);
 609         if (!t)
 610                 return -ENOMEM;
 611
 612         *fs = t;
 613         return 0;
 614 }
 615
 616 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
 617         int r;
 618
 619         assert(fs);
 620
 621         if (!controller) {
 622                 char *t;
 623
 624                 /* If no controller is specified, we return the path
 625                  * *below* the controllers, without any prefix. */
 626
 627                 if (!path && !suffix)
 628                         return -EINVAL;
 629
 630                 if (!suffix)
 631                         t = strdup(path);
 632                 else if (!path)
 633                         t = strdup(suffix);
 634                 else
 635                         t = strjoin(path, "/", suffix);
 636                 if (!t)
 637                         return -ENOMEM;
 638
 639                 *fs = path_kill_slashes(t);
 640                 return 0;
 641         }
 642
 643         if (!cg_controller_is_valid(controller))
 644                 return -EINVAL;
 645
 646         r = cg_all_unified();
 647         if (r < 0)
 648                 return r;
 649         if (r > 0)
 650                 r = join_path_unified(path, suffix, fs);
 651         else
 652                 r = join_path_legacy(controller, path, suffix, fs);
 653         if (r < 0)
 654                 return r;
 655
 656         path_kill_slashes(*fs);
 657         return 0;
 658 }
 659
 660 static int controller_is_accessible(const char *controller) {
 661         int r;
 662
 663         assert(controller);
 664
 665         /* Checks whether a specific controller is accessible,
 666          * i.e. its hierarchy mounted. In the unified hierarchy all
 667          * controllers are considered accessible, except for the named
 668          * hierarchies */
 669
 670         if (!cg_controller_is_valid(controller))
 671                 return -EINVAL;
 672
 673         r = cg_all_unified();
 674         if (r < 0)
 675                 return r;
 676         if (r > 0) {
 677                 /* We don't support named hierarchies if we are using
 678                  * the unified hierarchy. */
 679
 680                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
 681                         return 0;
 682
 683                 if (startswith(controller, "name="))
 684                         return -EOPNOTSUPP;
 685
 686         } else {
 687                 const char *cc, *dn;
 688
 689                 dn = controller_to_dirname(controller);
 690                 cc = strjoina("/sys/fs/cgroup/", dn);
 691
 692                 if (laccess(cc, F_OK) < 0)
 693                         return -errno;
 694         }
 695
 696         return 0;
 697 }
 698
 699 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
 700         int r;
 701
 702         assert(controller);
 703         assert(fs);
 704
 705         /* Check if the specified controller is actually accessible */
 706         r = controller_is_accessible(controller);
 707         if (r < 0)
 708                 return r;
 709
 710         return cg_get_path(controller, path, suffix, fs);
 711 }
 712
 713 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 714         assert(path);
 715         assert(sb);
 716         assert(ftwbuf);
 717
 718         if (typeflag != FTW_DP)
 719                 return 0;
 720
 721         if (ftwbuf->level < 1)
 722                 return 0;
 723
 724         (void) rmdir(path);
 725         return 0;
 726 }
 727
 728 int cg_trim(const char *controller, const char *path, bool delete_root) {
 729         _cleanup_free_ char *fs = NULL;
 730         int r = 0, q;
 731
 732         assert(path);
 733
 734         r = cg_get_path(controller, path, NULL, &fs);
 735         if (r < 0)
 736                 return r;
 737
 738         errno = 0;
 739         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 740                 if (errno == ENOENT)
 741                         r = 0;
 742                 else if (errno > 0)
 743                         r = -errno;
 744                 else
 745                         r = -EIO;
 746         }
 747
 748         if (delete_root) {
 749                 if (rmdir(fs) < 0 && errno != ENOENT)
 750                         return -errno;
 751         }
 752
 753         q = cg_hybrid_unified();
 754         if (q < 0)
 755                 return q;
 756         if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 757                 q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
 758                 if (q < 0)
 759                         log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
 760         }
 761
 762         return r;
 763 }
 764
 765 int cg_create(const char *controller, const char *path) {
 766         _cleanup_free_ char *fs = NULL;
 767         int r;
 768
 769         r = cg_get_path_and_check(controller, path, NULL, &fs);
 770         if (r < 0)
 771                 return r;
 772
 773         r = mkdir_parents(fs, 0755);
 774         if (r < 0)
 775                 return r;
 776
 777         if (mkdir(fs, 0755) < 0) {
 778
 779                 if (errno == EEXIST)
 780                         return 0;
 781
 782                 return -errno;
 783         }
 784
 785         r = cg_hybrid_unified();
 786         if (r < 0)
 787                 return r;
 788
 789         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 790                 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
 791                 if (r < 0)
 792                         log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
 793         }
 794
 795         return 1;
 796 }
 797
 798 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 799         int r, q;
 800
 801         assert(pid >= 0);
 802
 803         r = cg_create(controller, path);
 804         if (r < 0)
 805                 return r;
 806
 807         q = cg_attach(controller, path, pid);
 808         if (q < 0)
 809                 return q;
 810
 811         /* This does not remove the cgroup on failure */
 812         return r;
 813 }
 814
 815 int cg_attach(const char *controller, const char *path, pid_t pid) {
 816         _cleanup_free_ char *fs = NULL;
 817         char c[DECIMAL_STR_MAX(pid_t) + 2];
 818         int r;
 819
 820         assert(path);
 821         assert(pid >= 0);
 822
 823         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 824         if (r < 0)
 825                 return r;
 826
 827         if (pid == 0)
 828                 pid = getpid();
 829
 830         xsprintf(c, PID_FMT "\n", pid);
 831
 832         r = write_string_file(fs, c, 0);
 833         if (r < 0)
 834                 return r;
 835
 836         r = cg_hybrid_unified();
 837         if (r < 0)
 838                 return r;
 839
 840         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 841                 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
 842                 if (r < 0)
 843                         log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
 844         }
 845
 846         return 0;
 847 }
 848
 849 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 850         int r;
 851
 852         assert(controller);
 853         assert(path);
 854         assert(pid >= 0);
 855
 856         r = cg_attach(controller, path, pid);
 857         if (r < 0) {
 858                 char prefix[strlen(path) + 1];
 859
 860                 /* This didn't work? Then let's try all prefixes of
 861                  * the destination */
 862
 863                 PATH_FOREACH_PREFIX(prefix, path) {
 864                         int q;
 865
 866                         q = cg_attach(controller, prefix, pid);
 867                         if (q >= 0)
 868                                 return q;
 869                 }
 870         }
 871
 872         return r;
 873 }
 874
 875 int cg_set_group_access(
 876                 const char *controller,
 877                 const char *path,
 878                 mode_t mode,
 879                 uid_t uid,
 880                 gid_t gid) {
 881
 882         _cleanup_free_ char *fs = NULL;
 883         int r;
 884
 885         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 886                 return 0;
 887
 888         if (mode != MODE_INVALID)
 889                 mode &= 0777;
 890
 891         r = cg_get_path(controller, path, NULL, &fs);
 892         if (r < 0)
 893                 return r;
 894
 895         r = chmod_and_chown(fs, mode, uid, gid);
 896         if (r < 0)
 897                 return r;
 898
 899         r = cg_hybrid_unified();
 900         if (r < 0)
 901                 return r;
 902         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 903                 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid);
 904                 if (r < 0)
 905                         log_warning_errno(r, "Failed to set group access on compat systemd cgroup %s: %m", path);
 906         }
 907
 908         return 0;
 909 }
 910
 911 int cg_set_task_access(
 912                 const char *controller,
 913                 const char *path,
 914                 mode_t mode,
 915                 uid_t uid,
 916                 gid_t gid) {
 917
 918         _cleanup_free_ char *fs = NULL, *procs = NULL;
 919         int r;
 920
 921         assert(path);
 922
 923         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 924                 return 0;
 925
 926         if (mode != MODE_INVALID)
 927                 mode &= 0666;
 928
 929         r = cg_get_path(controller, path, "cgroup.procs", &fs);
 930         if (r < 0)
 931                 return r;
 932
 933         r = chmod_and_chown(fs, mode, uid, gid);
 934         if (r < 0)
 935                 return r;
 936
 937         r = cg_unified_controller(controller);
 938         if (r < 0)
 939                 return r;
 940         if (r == 0) {
 941                 /* Compatibility, Always keep values for "tasks" in sync with
 942                  * "cgroup.procs" */
 943                 if (cg_get_path(controller, path, "tasks", &procs) >= 0)
 944                         (void) chmod_and_chown(procs, mode, uid, gid);
 945         }
 946
 947         r = cg_hybrid_unified();
 948         if (r < 0)
 949                 return r;
 950         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 951                 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, mode, uid, gid);
 952                 if (r < 0)
 953                         log_warning_errno(r, "Failed to set task access on compat systemd cgroup %s: %m", path);
 954         }
 955
 956         return 0;
 957 }
 958
 959 int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
 960         _cleanup_free_ char *fs = NULL;
 961         int r;
 962
 963         assert(path);
 964         assert(name);
 965         assert(value || size <= 0);
 966
 967         r = cg_get_path(controller, path, NULL, &fs);
 968         if (r < 0)
 969                 return r;
 970
 971         if (setxattr(fs, name, value, size, flags) < 0)
 972                 return -errno;
 973
 974         return 0;
 975 }
 976
 977 int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
 978         _cleanup_free_ char *fs = NULL;
 979         ssize_t n;
 980         int r;
 981
 982         assert(path);
 983         assert(name);
 984
 985         r = cg_get_path(controller, path, NULL, &fs);
 986         if (r < 0)
 987                 return r;
 988
 989         n = getxattr(fs, name, value, size);
 990         if (n < 0)
 991                 return -errno;
 992
 993         return (int) n;
 994 }
 995
 996 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
 997         _cleanup_fclose_ FILE *f = NULL;
 998         char line[LINE_MAX];
 999         const char *fs, *controller_str;
1000         size_t cs = 0;
1001         int unified;
1002
1003         assert(path);
1004         assert(pid >= 0);
1005
1006         if (controller) {
1007                 if (!cg_controller_is_valid(controller))
1008                         return -EINVAL;
1009         } else
1010                 controller = SYSTEMD_CGROUP_CONTROLLER;
1011
1012         unified = cg_unified_controller(controller);
1013         if (unified < 0)
1014                 return unified;
1015         if (unified == 0) {
1016                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
1017                         controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
1018                 else
1019                         controller_str = controller;
1020
1021                 cs = strlen(controller_str);
1022         }
1023
1024         fs = procfs_file_alloca(pid, "cgroup");
1025         f = fopen(fs, "re");
1026         if (!f)
1027                 return errno == ENOENT ? -ESRCH : -errno;
1028
1029         FOREACH_LINE(line, f, return -errno) {
1030                 char *e, *p;
1031
1032                 truncate_nl(line);
1033
1034                 if (unified) {
1035                         e = startswith(line, "0:");
1036                         if (!e)
1037                                 continue;
1038
1039                         e = strchr(e, ':');
1040                         if (!e)
1041                                 continue;
1042                 } else {
1043                         char *l;
1044                         size_t k;
1045                         const char *word, *state;
1046                         bool found = false;
1047
1048                         l = strchr(line, ':');
1049                         if (!l)
1050                                 continue;
1051
1052                         l++;
1053                         e = strchr(l, ':');
1054                         if (!e)
1055                                 continue;
1056
1057                         *e = 0;
1058                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
1059                                 if (k == cs && memcmp(word, controller_str, cs) == 0) {
1060                                         found = true;
1061                                         break;
1062                                 }
1063                         }
1064
1065                         if (!found)
1066                                 continue;
1067                 }
1068
1069                 p = strdup(e + 1);
1070                 if (!p)
1071                         return -ENOMEM;
1072
1073                 *path = p;
1074                 return 0;
1075         }
1076
1077         return -ENODATA;
1078 }
1079
1080 int cg_install_release_agent(const char *controller, const char *agent) {
1081         _cleanup_free_ char *fs = NULL, *contents = NULL;
1082         const char *sc;
1083         int r;
1084
1085         assert(agent);
1086
1087         r = cg_unified_controller(controller);
1088         if (r < 0)
1089                 return r;
1090         if (r > 0) /* doesn't apply to unified hierarchy */
1091                 return -EOPNOTSUPP;
1092
1093         r = cg_get_path(controller, NULL, "release_agent", &fs);
1094         if (r < 0)
1095                 return r;
1096
1097         r = read_one_line_file(fs, &contents);
1098         if (r < 0)
1099                 return r;
1100
1101         sc = strstrip(contents);
1102         if (isempty(sc)) {
1103                 r = write_string_file(fs, agent, 0);
1104                 if (r < 0)
1105                         return r;
1106         } else if (!path_equal(sc, agent))
1107                 return -EEXIST;
1108
1109         fs = mfree(fs);
1110         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1111         if (r < 0)
1112                 return r;
1113
1114         contents = mfree(contents);
1115         r = read_one_line_file(fs, &contents);
1116         if (r < 0)
1117                 return r;
1118
1119         sc = strstrip(contents);
1120         if (streq(sc, "0")) {
1121                 r = write_string_file(fs, "1", 0);
1122                 if (r < 0)
1123                         return r;
1124
1125                 return 1;
1126         }
1127
1128         if (!streq(sc, "1"))
1129                 return -EIO;
1130
1131         return 0;
1132 }
1133
1134 int cg_uninstall_release_agent(const char *controller) {
1135         _cleanup_free_ char *fs = NULL;
1136         int r;
1137
1138         r = cg_unified_controller(controller);
1139         if (r < 0)
1140                 return r;
1141         if (r > 0) /* Doesn't apply to unified hierarchy */
1142                 return -EOPNOTSUPP;
1143
1144         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1145         if (r < 0)
1146                 return r;
1147
1148         r = write_string_file(fs, "0", 0);
1149         if (r < 0)
1150                 return r;
1151
1152         fs = mfree(fs);
1153
1154         r = cg_get_path(controller, NULL, "release_agent", &fs);
1155         if (r < 0)
1156                 return r;
1157
1158         r = write_string_file(fs, "", 0);
1159         if (r < 0)
1160                 return r;
1161
1162         return 0;
1163 }
1164
1165 int cg_is_empty(const char *controller, const char *path) {
1166         _cleanup_fclose_ FILE *f = NULL;
1167         pid_t pid;
1168         int r;
1169
1170         assert(path);
1171
1172         r = cg_enumerate_processes(controller, path, &f);
1173         if (r == -ENOENT)
1174                 return 1;
1175         if (r < 0)
1176                 return r;
1177
1178         r = cg_read_pid(f, &pid);
1179         if (r < 0)
1180                 return r;
1181
1182         return r == 0;
1183 }
1184
1185 int cg_is_empty_recursive(const char *controller, const char *path) {
1186         int r;
1187
1188         assert(path);
1189
1190         /* The root cgroup is always populated */
1191         if (controller && (isempty(path) || path_equal(path, "/")))
1192                 return false;
1193
1194         r = cg_unified_controller(controller);
1195         if (r < 0)
1196                 return r;
1197         if (r > 0) {
1198                 _cleanup_free_ char *t = NULL;
1199
1200                 /* On the unified hierarchy we can check empty state
1201                  * via the "populated" attribute of "cgroup.events". */
1202
1203                 r = cg_read_event(controller, path, "populated", &t);
1204                 if (r < 0)
1205                         return r;
1206
1207                 return streq(t, "0");
1208         } else {
1209                 _cleanup_closedir_ DIR *d = NULL;
1210                 char *fn;
1211
1212                 r = cg_is_empty(controller, path);
1213                 if (r <= 0)
1214                         return r;
1215
1216                 r = cg_enumerate_subgroups(controller, path, &d);
1217                 if (r == -ENOENT)
1218                         return 1;
1219                 if (r < 0)
1220                         return r;
1221
1222                 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1223                         _cleanup_free_ char *p = NULL;
1224
1225                         p = strjoin(path, "/", fn);
1226                         free(fn);
1227                         if (!p)
1228                                 return -ENOMEM;
1229
1230                         r = cg_is_empty_recursive(controller, p);
1231                         if (r <= 0)
1232                                 return r;
1233                 }
1234                 if (r < 0)
1235                         return r;
1236
1237                 return true;
1238         }
1239 }
1240
1241 int cg_split_spec(const char *spec, char **controller, char **path) {
1242         char *t = NULL, *u = NULL;
1243         const char *e;
1244
1245         assert(spec);
1246
1247         if (*spec == '/') {
1248                 if (!path_is_safe(spec))
1249                         return -EINVAL;
1250
1251                 if (path) {
1252                         t = strdup(spec);
1253                         if (!t)
1254                                 return -ENOMEM;
1255
1256                         *path = path_kill_slashes(t);
1257                 }
1258
1259                 if (controller)
1260                         *controller = NULL;
1261
1262                 return 0;
1263         }
1264
1265         e = strchr(spec, ':');
1266         if (!e) {
1267                 if (!cg_controller_is_valid(spec))
1268                         return -EINVAL;
1269
1270                 if (controller) {
1271                         t = strdup(spec);
1272                         if (!t)
1273                                 return -ENOMEM;
1274
1275                         *controller = t;
1276                 }
1277
1278                 if (path)
1279                         *path = NULL;
1280
1281                 return 0;
1282         }
1283
1284         t = strndup(spec, e-spec);
1285         if (!t)
1286                 return -ENOMEM;
1287         if (!cg_controller_is_valid(t)) {
1288                 free(t);
1289                 return -EINVAL;
1290         }
1291
1292         if (isempty(e+1))
1293                 u = NULL;
1294         else {
1295                 u = strdup(e+1);
1296                 if (!u) {
1297                         free(t);
1298                         return -ENOMEM;
1299                 }
1300
1301                 if (!path_is_safe(u) ||
1302                     !path_is_absolute(u)) {
1303                         free(t);
1304                         free(u);
1305                         return -EINVAL;
1306                 }
1307
1308                 path_kill_slashes(u);
1309         }
1310
1311         if (controller)
1312                 *controller = t;
1313         else
1314                 free(t);
1315
1316         if (path)
1317                 *path = u;
1318         else
1319                 free(u);
1320
1321         return 0;
1322 }
1323
1324 int cg_mangle_path(const char *path, char **result) {
1325         _cleanup_free_ char *c = NULL, *p = NULL;
1326         char *t;
1327         int r;
1328
1329         assert(path);
1330         assert(result);
1331
1332         /* First, check if it already is a filesystem path */
1333         if (path_startswith(path, "/sys/fs/cgroup")) {
1334
1335                 t = strdup(path);
1336                 if (!t)
1337                         return -ENOMEM;
1338
1339                 *result = path_kill_slashes(t);
1340                 return 0;
1341         }
1342
1343         /* Otherwise, treat it as cg spec */
1344         r = cg_split_spec(path, &c, &p);
1345         if (r < 0)
1346                 return r;
1347
1348         return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1349 }
1350
1351 int cg_get_root_path(char **path) {
1352         char *p, *e;
1353         int r;
1354
1355         assert(path);
1356
1357         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1358         if (r < 0)
1359                 return r;
1360
1361         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1362         if (!e)
1363                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1364         if (!e)
1365                 e = endswith(p, "/system"); /* even more legacy */
1366         if (e)
1367                 *e = 0;
1368
1369         *path = p;
1370         return 0;
1371 }
1372
1373 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1374         _cleanup_free_ char *rt = NULL;
1375         char *p;
1376         int r;
1377
1378         assert(cgroup);
1379         assert(shifted);
1380
1381         if (!root) {
1382                 /* If the root was specified let's use that, otherwise
1383                  * let's determine it from PID 1 */
1384
1385                 r = cg_get_root_path(&rt);
1386                 if (r < 0)
1387                         return r;
1388
1389                 root = rt;
1390         }
1391
1392         p = path_startswith(cgroup, root);
1393         if (p && p > cgroup)
1394                 *shifted = p - 1;
1395         else
1396                 *shifted = cgroup;
1397
1398         return 0;
1399 }
1400
1401 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1402         _cleanup_free_ char *raw = NULL;
1403         const char *c;
1404         int r;
1405
1406         assert(pid >= 0);
1407         assert(cgroup);
1408
1409         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1410         if (r < 0)
1411                 return r;
1412
1413         r = cg_shift_path(raw, root, &c);
1414         if (r < 0)
1415                 return r;
1416
1417         if (c == raw) {
1418                 *cgroup = raw;
1419                 raw = NULL;
1420         } else {
1421                 char *n;
1422
1423                 n = strdup(c);
1424                 if (!n)
1425                         return -ENOMEM;
1426
1427                 *cgroup = n;
1428         }
1429
1430         return 0;
1431 }
1432
1433 int cg_path_decode_unit(const char *cgroup, char **unit) {
1434         char *c, *s;
1435         size_t n;
1436
1437         assert(cgroup);
1438         assert(unit);
1439
1440         n = strcspn(cgroup, "/");
1441         if (n < 3)
1442                 return -ENXIO;
1443
1444         c = strndupa(cgroup, n);
1445         c = cg_unescape(c);
1446
1447         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1448                 return -ENXIO;
1449
1450         s = strdup(c);
1451         if (!s)
1452                 return -ENOMEM;
1453
1454         *unit = s;
1455         return 0;
1456 }
1457
1458 static bool valid_slice_name(const char *p, size_t n) {
1459
1460         if (!p)
1461                 return false;
1462
1463         if (n < strlen("x.slice"))
1464                 return false;
1465
1466         if (memcmp(p + n - 6, ".slice", 6) == 0) {
1467                 char buf[n+1], *c;
1468
1469                 memcpy(buf, p, n);
1470                 buf[n] = 0;
1471
1472                 c = cg_unescape(buf);
1473
1474                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1475         }
1476
1477         return false;
1478 }
1479
1480 static const char *skip_slices(const char *p) {
1481         assert(p);
1482
1483         /* Skips over all slice assignments */
1484
1485         for (;;) {
1486                 size_t n;
1487
1488                 p += strspn(p, "/");
1489
1490                 n = strcspn(p, "/");
1491                 if (!valid_slice_name(p, n))
1492                         return p;
1493
1494                 p += n;
1495         }
1496 }
1497
1498 int cg_path_get_unit(const char *path, char **ret) {
1499         const char *e;
1500         char *unit;
1501         int r;
1502
1503         assert(path);
1504         assert(ret);
1505
1506         e = skip_slices(path);
1507
1508         r = cg_path_decode_unit(e, &unit);
1509         if (r < 0)
1510                 return r;
1511
1512         /* We skipped over the slices, don't accept any now */
1513         if (endswith(unit, ".slice")) {
1514                 free(unit);
1515                 return -ENXIO;
1516         }
1517
1518         *ret = unit;
1519         return 0;
1520 }
1521
1522 int cg_pid_get_unit(pid_t pid, char **unit) {
1523         _cleanup_free_ char *cgroup = NULL;
1524         int r;
1525
1526         assert(unit);
1527
1528         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1529         if (r < 0)
1530                 return r;
1531
1532         return cg_path_get_unit(cgroup, unit);
1533 }
1534
1535 /**
1536  * Skip session-*.scope, but require it to be there.
1537  */
1538 static const char *skip_session(const char *p) {
1539         size_t n;
1540
1541         if (isempty(p))
1542                 return NULL;
1543
1544         p += strspn(p, "/");
1545
1546         n = strcspn(p, "/");
1547         if (n < strlen("session-x.scope"))
1548                 return NULL;
1549
1550         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1551                 char buf[n - 8 - 6 + 1];
1552
1553                 memcpy(buf, p + 8, n - 8 - 6);
1554                 buf[n - 8 - 6] = 0;
1555
1556                 /* Note that session scopes never need unescaping,
1557                  * since they cannot conflict with the kernel's own
1558                  * names, hence we don't need to call cg_unescape()
1559                  * here. */
1560
1561                 if (!session_id_valid(buf))
1562                         return false;
1563
1564                 p += n;
1565                 p += strspn(p, "/");
1566                 return p;
1567         }
1568
1569         return NULL;
1570 }
1571
1572 /**
1573  * Skip user@*.service, but require it to be there.
1574  */
1575 static const char *skip_user_manager(const char *p) {
1576         size_t n;
1577
1578         if (isempty(p))
1579                 return NULL;
1580
1581         p += strspn(p, "/");
1582
1583         n = strcspn(p, "/");
1584         if (n < strlen("user@x.service"))
1585                 return NULL;
1586
1587         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1588                 char buf[n - 5 - 8 + 1];
1589
1590                 memcpy(buf, p + 5, n - 5 - 8);
1591                 buf[n - 5 - 8] = 0;
1592
1593                 /* Note that user manager services never need unescaping,
1594                  * since they cannot conflict with the kernel's own
1595                  * names, hence we don't need to call cg_unescape()
1596                  * here. */
1597
1598                 if (parse_uid(buf, NULL) < 0)
1599                         return NULL;
1600
1601                 p += n;
1602                 p += strspn(p, "/");
1603
1604                 return p;
1605         }
1606
1607         return NULL;
1608 }
1609
1610 static const char *skip_user_prefix(const char *path) {
1611         const char *e, *t;
1612
1613         assert(path);
1614
1615         /* Skip slices, if there are any */
1616         e = skip_slices(path);
1617
1618         /* Skip the user manager, if it's in the path now... */
1619         t = skip_user_manager(e);
1620         if (t)
1621                 return t;
1622
1623         /* Alternatively skip the user session if it is in the path... */
1624         return skip_session(e);
1625 }
1626
1627 int cg_path_get_user_unit(const char *path, char **ret) {
1628         const char *t;
1629
1630         assert(path);
1631         assert(ret);
1632
1633         t = skip_user_prefix(path);
1634         if (!t)
1635                 return -ENXIO;
1636
1637         /* And from here on it looks pretty much the same as for a
1638          * system unit, hence let's use the same parser from here
1639          * on. */
1640         return cg_path_get_unit(t, ret);
1641 }
1642
1643 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1644         _cleanup_free_ char *cgroup = NULL;
1645         int r;
1646
1647         assert(unit);
1648
1649         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1650         if (r < 0)
1651                 return r;
1652
1653         return cg_path_get_user_unit(cgroup, unit);
1654 }
1655
1656 int cg_path_get_machine_name(const char *path, char **machine) {
1657         _cleanup_free_ char *u = NULL;
1658         const char *sl;
1659         int r;
1660
1661         r = cg_path_get_unit(path, &u);
1662         if (r < 0)
1663                 return r;
1664
1665         sl = strjoina("/run/systemd/machines/unit:", u);
1666         return readlink_malloc(sl, machine);
1667 }
1668
1669 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1670         _cleanup_free_ char *cgroup = NULL;
1671         int r;
1672
1673         assert(machine);
1674
1675         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1676         if (r < 0)
1677                 return r;
1678
1679         return cg_path_get_machine_name(cgroup, machine);
1680 }
1681
1682 int cg_path_get_session(const char *path, char **session) {
1683         _cleanup_free_ char *unit = NULL;
1684         char *start, *end;
1685         int r;
1686
1687         assert(path);
1688
1689         r = cg_path_get_unit(path, &unit);
1690         if (r < 0)
1691                 return r;
1692
1693         start = startswith(unit, "session-");
1694         if (!start)
1695                 return -ENXIO;
1696         end = endswith(start, ".scope");
1697         if (!end)
1698                 return -ENXIO;
1699
1700         *end = 0;
1701         if (!session_id_valid(start))
1702                 return -ENXIO;
1703
1704         if (session) {
1705                 char *rr;
1706
1707                 rr = strdup(start);
1708                 if (!rr)
1709                         return -ENOMEM;
1710
1711                 *session = rr;
1712         }
1713
1714         return 0;
1715 }
1716
1717 int cg_pid_get_session(pid_t pid, char **session) {
1718         _cleanup_free_ char *cgroup = NULL;
1719         int r;
1720
1721         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1722         if (r < 0)
1723                 return r;
1724
1725         return cg_path_get_session(cgroup, session);
1726 }
1727
1728 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1729         _cleanup_free_ char *slice = NULL;
1730         char *start, *end;
1731         int r;
1732
1733         assert(path);
1734
1735         r = cg_path_get_slice(path, &slice);
1736         if (r < 0)
1737                 return r;
1738
1739         start = startswith(slice, "user-");
1740         if (!start)
1741                 return -ENXIO;
1742         end = endswith(start, ".slice");
1743         if (!end)
1744                 return -ENXIO;
1745
1746         *end = 0;
1747         if (parse_uid(start, uid) < 0)
1748                 return -ENXIO;
1749
1750         return 0;
1751 }
1752
1753 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1754         _cleanup_free_ char *cgroup = NULL;
1755         int r;
1756
1757         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1758         if (r < 0)
1759                 return r;
1760
1761         return cg_path_get_owner_uid(cgroup, uid);
1762 }
1763
1764 int cg_path_get_slice(const char *p, char **slice) {
1765         const char *e = NULL;
1766
1767         assert(p);
1768         assert(slice);
1769
1770         /* Finds the right-most slice unit from the beginning, but
1771          * stops before we come to the first non-slice unit. */
1772
1773         for (;;) {
1774                 size_t n;
1775
1776                 p += strspn(p, "/");
1777
1778                 n = strcspn(p, "/");
1779                 if (!valid_slice_name(p, n)) {
1780
1781                         if (!e) {
1782                                 char *s;
1783
1784                                 s = strdup(SPECIAL_ROOT_SLICE);
1785                                 if (!s)
1786                                         return -ENOMEM;
1787
1788                                 *slice = s;
1789                                 return 0;
1790                         }
1791
1792                         return cg_path_decode_unit(e, slice);
1793                 }
1794
1795                 e = p;
1796                 p += n;
1797         }
1798 }
1799
1800 int cg_pid_get_slice(pid_t pid, char **slice) {
1801         _cleanup_free_ char *cgroup = NULL;
1802         int r;
1803
1804         assert(slice);
1805
1806         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1807         if (r < 0)
1808                 return r;
1809
1810         return cg_path_get_slice(cgroup, slice);
1811 }
1812
1813 int cg_path_get_user_slice(const char *p, char **slice) {
1814         const char *t;
1815         assert(p);
1816         assert(slice);
1817
1818         t = skip_user_prefix(p);
1819         if (!t)
1820                 return -ENXIO;
1821
1822         /* And now it looks pretty much the same as for a system
1823          * slice, so let's just use the same parser from here on. */
1824         return cg_path_get_slice(t, slice);
1825 }
1826
1827 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1828         _cleanup_free_ char *cgroup = NULL;
1829         int r;
1830
1831         assert(slice);
1832
1833         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1834         if (r < 0)
1835                 return r;
1836
1837         return cg_path_get_user_slice(cgroup, slice);
1838 }
1839
1840 char *cg_escape(const char *p) {
1841         bool need_prefix = false;
1842
1843         /* This implements very minimal escaping for names to be used
1844          * as file names in the cgroup tree: any name which might
1845          * conflict with a kernel name or is prefixed with '_' is
1846          * prefixed with a '_'. That way, when reading cgroup names it
1847          * is sufficient to remove a single prefixing underscore if
1848          * there is one. */
1849
1850         /* The return value of this function (unlike cg_unescape())
1851          * needs free()! */
1852
1853         if (p[0] == 0 ||
1854             p[0] == '_' ||
1855             p[0] == '.' ||
1856             streq(p, "notify_on_release") ||
1857             streq(p, "release_agent") ||
1858             streq(p, "tasks") ||
1859             startswith(p, "cgroup."))
1860                 need_prefix = true;
1861         else {
1862                 const char *dot;
1863
1864                 dot = strrchr(p, '.');
1865                 if (dot) {
1866                         CGroupController c;
1867                         size_t l = dot - p;
1868
1869                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1870                                 const char *n;
1871
1872                                 n = cgroup_controller_to_string(c);
1873
1874                                 if (l != strlen(n))
1875                                         continue;
1876
1877                                 if (memcmp(p, n, l) != 0)
1878                                         continue;
1879
1880                                 need_prefix = true;
1881                                 break;
1882                         }
1883                 }
1884         }
1885
1886         if (need_prefix)
1887                 return strappend("_", p);
1888
1889         return strdup(p);
1890 }
1891
1892 char *cg_unescape(const char *p) {
1893         assert(p);
1894
1895         /* The return value of this function (unlike cg_escape())
1896          * doesn't need free()! */
1897
1898         if (p[0] == '_')
1899                 return (char*) p+1;
1900
1901         return (char*) p;
1902 }
1903
1904 #define CONTROLLER_VALID                        \
1905         DIGITS LETTERS                          \
1906         "_"
1907
1908 bool cg_controller_is_valid(const char *p) {
1909         const char *t, *s;
1910
1911         if (!p)
1912                 return false;
1913
1914         if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
1915                 return true;
1916
1917         s = startswith(p, "name=");
1918         if (s)
1919                 p = s;
1920
1921         if (*p == 0 || *p == '_')
1922                 return false;
1923
1924         for (t = p; *t; t++)
1925                 if (!strchr(CONTROLLER_VALID, *t))
1926                         return false;
1927
1928         if (t - p > FILENAME_MAX)
1929                 return false;
1930
1931         return true;
1932 }
1933
1934 int cg_slice_to_path(const char *unit, char **ret) {
1935         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1936         const char *dash;
1937         int r;
1938
1939         assert(unit);
1940         assert(ret);
1941
1942         if (streq(unit, SPECIAL_ROOT_SLICE)) {
1943                 char *x;
1944
1945                 x = strdup("");
1946                 if (!x)
1947                         return -ENOMEM;
1948                 *ret = x;
1949                 return 0;
1950         }
1951
1952         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1953                 return -EINVAL;
1954
1955         if (!endswith(unit, ".slice"))
1956                 return -EINVAL;
1957
1958         r = unit_name_to_prefix(unit, &p);
1959         if (r < 0)
1960                 return r;
1961
1962         dash = strchr(p, '-');
1963
1964         /* Don't allow initial dashes */
1965         if (dash == p)
1966                 return -EINVAL;
1967
1968         while (dash) {
1969                 _cleanup_free_ char *escaped = NULL;
1970                 char n[dash - p + sizeof(".slice")];
1971
1972                 /* Don't allow trailing or double dashes */
1973                 if (dash[1] == 0 || dash[1] == '-')
1974                         return -EINVAL;
1975
1976                 strcpy(stpncpy(n, p, dash - p), ".slice");
1977                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1978                         return -EINVAL;
1979
1980                 escaped = cg_escape(n);
1981                 if (!escaped)
1982                         return -ENOMEM;
1983
1984                 if (!strextend(&s, escaped, "/", NULL))
1985                         return -ENOMEM;
1986
1987                 dash = strchr(dash+1, '-');
1988         }
1989
1990         e = cg_escape(unit);
1991         if (!e)
1992                 return -ENOMEM;
1993
1994         if (!strextend(&s, e, NULL))
1995                 return -ENOMEM;
1996
1997         *ret = s;
1998         s = NULL;
1999
2000         return 0;
2001 }
2002
2003 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
2004         _cleanup_free_ char *p = NULL;
2005         int r;
2006
2007         r = cg_get_path(controller, path, attribute, &p);
2008         if (r < 0)
2009                 return r;
2010
2011         return write_string_file(p, value, 0);
2012 }
2013
2014 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
2015         _cleanup_free_ char *p = NULL;
2016         int r;
2017
2018         r = cg_get_path(controller, path, attribute, &p);
2019         if (r < 0)
2020                 return r;
2021
2022         return read_one_line_file(p, ret);
2023 }
2024
2025 int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values) {
2026         _cleanup_free_ char *filename = NULL, *content = NULL;
2027         char *line, *p;
2028         int i, r;
2029
2030         for (i = 0; keys[i]; i++)
2031                 values[i] = NULL;
2032
2033         r = cg_get_path(controller, path, attribute, &filename);
2034         if (r < 0)
2035                 return r;
2036
2037         r = read_full_file(filename, &content, NULL);
2038         if (r < 0)
2039                 return r;
2040
2041         p = content;
2042         while ((line = strsep(&p, "\n"))) {
2043                 char *key;
2044
2045                 key = strsep(&line, " ");
2046
2047                 for (i = 0; keys[i]; i++) {
2048                         if (streq(key, keys[i])) {
2049                                 values[i] = strdup(line);
2050                                 break;
2051                         }
2052                 }
2053         }
2054
2055         for (i = 0; keys[i]; i++) {
2056                 if (!values[i]) {
2057                         for (i = 0; keys[i]; i++) {
2058                                 free(values[i]);
2059                                 values[i] = NULL;
2060                         }
2061                         return -ENOENT;
2062                 }
2063         }
2064
2065         return 0;
2066 }
2067
2068 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
2069         CGroupController c;
2070         int r;
2071
2072         /* This one will create a cgroup in our private tree, but also
2073          * duplicate it in the trees specified in mask, and remove it
2074          * in all others */
2075
2076         /* First create the cgroup in our own hierarchy. */
2077         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
2078         if (r < 0)
2079                 return r;
2080
2081         /* If we are in the unified hierarchy, we are done now */
2082         r = cg_all_unified();
2083         if (r < 0)
2084                 return r;
2085         if (r > 0)
2086                 return 0;
2087
2088         /* Otherwise, do the same in the other hierarchies */
2089         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2090                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2091                 const char *n;
2092
2093                 n = cgroup_controller_to_string(c);
2094
2095                 if (mask & bit)
2096                         (void) cg_create(n, path);
2097                 else if (supported & bit)
2098                         (void) cg_trim(n, path, true);
2099         }
2100
2101         return 0;
2102 }
2103
2104 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
2105         CGroupController c;
2106         int r;
2107
2108         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
2109         if (r < 0)
2110                 return r;
2111
2112         r = cg_all_unified();
2113         if (r < 0)
2114                 return r;
2115         if (r > 0)
2116                 return 0;
2117
2118         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2119                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2120                 const char *p = NULL;
2121
2122                 if (!(supported & bit))
2123                         continue;
2124
2125                 if (path_callback)
2126                         p = path_callback(bit, userdata);
2127
2128                 if (!p)
2129                         p = path;
2130
2131                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
2132         }
2133
2134         return 0;
2135 }
2136
2137 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
2138         Iterator i;
2139         void *pidp;
2140         int r = 0;
2141
2142         SET_FOREACH(pidp, pids, i) {
2143                 pid_t pid = PTR_TO_PID(pidp);
2144                 int q;
2145
2146                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
2147                 if (q < 0 && r >= 0)
2148                         r = q;
2149         }
2150
2151         return r;
2152 }
2153
2154 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
2155         CGroupController c;
2156         int r = 0, q;
2157
2158         if (!path_equal(from, to))  {
2159                 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
2160                 if (r < 0)
2161                         return r;
2162         }
2163
2164         q = cg_all_unified();
2165         if (q < 0)
2166                 return q;
2167         if (q > 0)
2168                 return r;
2169
2170         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2171                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2172                 const char *p = NULL;
2173
2174                 if (!(supported & bit))
2175                         continue;
2176
2177                 if (to_callback)
2178                         p = to_callback(bit, userdata);
2179
2180                 if (!p)
2181                         p = to;
2182
2183                 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
2184         }
2185
2186         return 0;
2187 }
2188
2189 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2190         CGroupController c;
2191         int r, q;
2192
2193         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2194         if (r < 0)
2195                 return r;
2196
2197         q = cg_all_unified();
2198         if (q < 0)
2199                 return q;
2200         if (q > 0)
2201                 return r;
2202
2203         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2204                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2205
2206                 if (!(supported & bit))
2207                         continue;
2208
2209                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
2210         }
2211
2212         return 0;
2213 }
2214
2215 int cg_mask_to_string(CGroupMask mask, char **ret) {
2216         const char *controllers[_CGROUP_CONTROLLER_MAX + 1];
2217         CGroupController c;
2218         int i = 0;
2219         char *s;
2220
2221         assert(ret);
2222
2223         if (mask == 0) {
2224                 *ret = NULL;
2225                 return 0;
2226         }
2227
2228         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2229
2230                 if (!(mask & CGROUP_CONTROLLER_TO_MASK(c)))
2231                         continue;
2232
2233                 controllers[i++] = cgroup_controller_to_string(c);
2234                 controllers[i] = NULL;
2235         }
2236
2237         s = strv_join((char **)controllers, NULL);
2238         if (!s)
2239                 return -ENOMEM;
2240
2241         *ret = s;
2242         return 0;
2243 }
2244
2245 int cg_mask_from_string(const char *value, CGroupMask *mask) {
2246         assert(mask);
2247         assert(value);
2248
2249         for (;;) {
2250                 _cleanup_free_ char *n = NULL;
2251                 CGroupController v;
2252                 int r;
2253
2254                 r = extract_first_word(&value, &n, NULL, 0);
2255                 if (r < 0)
2256                         return r;
2257                 if (r == 0)
2258                         break;
2259
2260                 v = cgroup_controller_from_string(n);
2261                 if (v < 0)
2262                         continue;
2263
2264                 *mask |= CGROUP_CONTROLLER_TO_MASK(v);
2265         }
2266         return 0;
2267 }
2268
2269 int cg_mask_supported(CGroupMask *ret) {
2270         CGroupMask mask = 0;
2271         int r;
2272
2273         /* Determines the mask of supported cgroup controllers. Only
2274          * includes controllers we can make sense of and that are
2275          * actually accessible. */
2276
2277         r = cg_all_unified();
2278         if (r < 0)
2279                 return r;
2280         if (r > 0) {
2281                 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2282
2283                 /* In the unified hierarchy we can read the supported
2284                  * and accessible controllers from a the top-level
2285                  * cgroup attribute */
2286
2287                 r = cg_get_root_path(&root);
2288                 if (r < 0)
2289                         return r;
2290
2291                 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2292                 if (r < 0)
2293                         return r;
2294
2295                 r = read_one_line_file(path, &controllers);
2296                 if (r < 0)
2297                         return r;
2298
2299                 r = cg_mask_from_string(controllers, &mask);
2300                 if (r < 0)
2301                         return r;
2302
2303                 /* Currently, we support the cpu, memory, io and pids
2304                  * controller in the unified hierarchy, mask
2305                  * everything else off. */
2306                 mask &= CGROUP_MASK_CPU | CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
2307
2308         } else {
2309                 CGroupController c;
2310
2311                 /* In the legacy hierarchy, we check whether which
2312                  * hierarchies are mounted. */
2313
2314                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2315                         const char *n;
2316
2317                         n = cgroup_controller_to_string(c);
2318                         if (controller_is_accessible(n) >= 0)
2319                                 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2320                 }
2321         }
2322
2323         *ret = mask;
2324         return 0;
2325 }
2326
2327 int cg_kernel_controllers(Set *controllers) {
2328         _cleanup_fclose_ FILE *f = NULL;
2329         char buf[LINE_MAX];
2330         int r;
2331
2332         assert(controllers);
2333
2334         /* Determines the full list of kernel-known controllers. Might
2335          * include controllers we don't actually support, arbitrary
2336          * named hierarchies and controllers that aren't currently
2337          * accessible (because not mounted). */
2338
2339         f = fopen("/proc/cgroups", "re");
2340         if (!f) {
2341                 if (errno == ENOENT)
2342                         return 0;
2343                 return -errno;
2344         }
2345
2346         /* Ignore the header line */
2347         (void) fgets(buf, sizeof(buf), f);
2348
2349         for (;;) {
2350                 char *controller;
2351                 int enabled = 0;
2352
2353                 errno = 0;
2354                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2355
2356                         if (feof(f))
2357                                 break;
2358
2359                         if (ferror(f) && errno > 0)
2360                                 return -errno;
2361
2362                         return -EBADMSG;
2363                 }
2364
2365                 if (!enabled) {
2366                         free(controller);
2367                         continue;
2368                 }
2369
2370                 if (!cg_controller_is_valid(controller)) {
2371                         free(controller);
2372                         return -EBADMSG;
2373                 }
2374
2375                 r = set_consume(controllers, controller);
2376                 if (r < 0)
2377                         return r;
2378         }
2379
2380         return 0;
2381 }
2382
2383 static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
2384
2385 /* The hybrid mode was initially implemented in v232 and simply mounted cgroup v2 on /sys/fs/cgroup/systemd.  This
2386  * unfortunately broke other tools (such as docker) which expected the v1 "name=systemd" hierarchy on
2387  * /sys/fs/cgroup/systemd.  From v233 and on, the hybrid mode mountnbs v2 on /sys/fs/cgroup/unified and maintains
2388  * "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility with other tools.
2389  *
2390  * To keep live upgrade working, we detect and support v232 layout.  When v232 layout is detected, to keep cgroup v2
2391  * process management but disable the compat dual layout, we return %true on
2392  * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
2393  */
2394 static thread_local bool unified_systemd_v232;
2395
2396 static int cg_unified_update(void) {
2397
2398         struct statfs fs;
2399
2400         /* Checks if we support the unified hierarchy. Returns an
2401          * error when the cgroup hierarchies aren't mounted yet or we
2402          * have any other trouble determining if the unified hierarchy
2403          * is supported. */
2404
2405         if (unified_cache >= CGROUP_UNIFIED_NONE)
2406                 return 0;
2407
2408         if (statfs("/sys/fs/cgroup/", &fs) < 0)
2409                 return -errno;
2410
2411         if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC))
2412                 unified_cache = CGROUP_UNIFIED_ALL;
2413         else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
2414                 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
2415                     F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2416                         unified_cache = CGROUP_UNIFIED_SYSTEMD;
2417                         unified_systemd_v232 = false;
2418                 } else if (statfs("/sys/fs/cgroup/systemd/", &fs) == 0 &&
2419                            F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
2420                         unified_cache = CGROUP_UNIFIED_SYSTEMD;
2421                         unified_systemd_v232 = true;
2422                 } else {
2423                         if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
2424                                 return -errno;
2425                         if (!F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC))
2426                                 return -ENOMEDIUM;
2427                         unified_cache = CGROUP_UNIFIED_NONE;
2428                 }
2429         } else
2430                 return -ENOMEDIUM;
2431
2432         return 0;
2433 }
2434
2435 int cg_unified_controller(const char *controller) {
2436         int r;
2437
2438         r = cg_unified_update();
2439         if (r < 0)
2440                 return r;
2441
2442         if (unified_cache == CGROUP_UNIFIED_NONE)
2443                 return false;
2444
2445         if (unified_cache >= CGROUP_UNIFIED_ALL)
2446                 return true;
2447
2448         return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
2449 }
2450
2451 int cg_all_unified(void) {
2452         int r;
2453
2454         r = cg_unified_update();
2455         if (r < 0)
2456                 return r;
2457
2458         return unified_cache >= CGROUP_UNIFIED_ALL;
2459 }
2460
2461 int cg_hybrid_unified(void) {
2462         int r;
2463
2464         r = cg_unified_update();
2465         if (r < 0)
2466                 return r;
2467
2468         return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
2469 }
2470
2471 int cg_unified_flush(void) {
2472         unified_cache = CGROUP_UNIFIED_UNKNOWN;
2473
2474         return cg_unified_update();
2475 }
2476
2477 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2478         _cleanup_free_ char *fs = NULL;
2479         CGroupController c;
2480         int r;
2481
2482         assert(p);
2483
2484         if (supported == 0)
2485                 return 0;
2486
2487         r = cg_all_unified();
2488         if (r < 0)
2489                 return r;
2490         if (r == 0) /* on the legacy hiearchy there's no joining of controllers defined */
2491                 return 0;
2492
2493         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2494         if (r < 0)
2495                 return r;
2496
2497         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2498                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2499                 const char *n;
2500
2501                 if (!(supported & bit))
2502                         continue;
2503
2504                 n = cgroup_controller_to_string(c);
2505                 {
2506                         char s[1 + strlen(n) + 1];
2507
2508                         s[0] = mask & bit ? '+' : '-';
2509                         strcpy(s + 1, n);
2510
2511                         r = write_string_file(fs, s, 0);
2512                         if (r < 0)
2513                                 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2514                 }
2515         }
2516
2517         return 0;
2518 }
2519
2520 bool cg_is_unified_wanted(void) {
2521         static thread_local int wanted = -1;
2522         int r;
2523         bool b;
2524         const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
2525
2526         /* If we have a cached value, return that. */
2527         if (wanted >= 0)
2528                 return wanted;
2529
2530         /* If the hierarchy is already mounted, then follow whatever
2531          * was chosen for it. */
2532         if (cg_unified_flush() >= 0)
2533                 return (wanted = unified_cache >= CGROUP_UNIFIED_ALL);
2534
2535         /* Otherwise, let's see what the kernel command line has to say.
2536          * Since checking is expensive, cache a non-error result. */
2537         r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
2538
2539         return (wanted = r > 0 ? b : is_default);
2540 }
2541
2542 bool cg_is_legacy_wanted(void) {
2543         static thread_local int wanted = -1;
2544
2545         /* If we have a cached value, return that. */
2546         if (wanted >= 0)
2547                 return wanted;
2548
2549         /* Check if we have cgroups2 already mounted. */
2550         if (cg_unified_flush() >= 0 &&
2551             unified_cache == CGROUP_UNIFIED_ALL)
2552                 return (wanted = false);
2553
2554         /* Otherwise, assume that at least partial legacy is wanted,
2555          * since cgroups2 should already be mounted at this point. */
2556         return (wanted = true);
2557 }
2558
2559 bool cg_is_hybrid_wanted(void) {
2560         static thread_local int wanted = -1;
2561         int r;
2562         bool b;
2563         const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
2564         /* We default to true if the default is "hybrid", obviously,
2565          * but also when the default is "unified", because if we get
2566          * called, it means that unified hierarchy was not mounted. */
2567
2568         /* If we have a cached value, return that. */
2569         if (wanted >= 0)
2570                 return wanted;
2571
2572         /* If the hierarchy is already mounted, then follow whatever
2573          * was chosen for it. */
2574         if (cg_unified_flush() >= 0 &&
2575             unified_cache == CGROUP_UNIFIED_ALL)
2576                 return (wanted = false);
2577
2578         /* Otherwise, let's see what the kernel command line has to say.
2579          * Since checking is expensive, cache a non-error result. */
2580         r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
2581
2582         /* The meaning of the kernel option is reversed wrt. to the return value
2583          * of this function, hence the negation. */
2584         return (wanted = r > 0 ? !b : is_default);
2585 }
2586
2587 int cg_weight_parse(const char *s, uint64_t *ret) {
2588         uint64_t u;
2589         int r;
2590
2591         if (isempty(s)) {
2592                 *ret = CGROUP_WEIGHT_INVALID;
2593                 return 0;
2594         }
2595
2596         r = safe_atou64(s, &u);
2597         if (r < 0)
2598                 return r;
2599
2600         if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2601                 return -ERANGE;
2602
2603         *ret = u;
2604         return 0;
2605 }
2606
2607 const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2608         [CGROUP_IO_RBPS_MAX]    = CGROUP_LIMIT_MAX,
2609         [CGROUP_IO_WBPS_MAX]    = CGROUP_LIMIT_MAX,
2610         [CGROUP_IO_RIOPS_MAX]   = CGROUP_LIMIT_MAX,
2611         [CGROUP_IO_WIOPS_MAX]   = CGROUP_LIMIT_MAX,
2612 };
2613
2614 static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2615         [CGROUP_IO_RBPS_MAX]    = "IOReadBandwidthMax",
2616         [CGROUP_IO_WBPS_MAX]    = "IOWriteBandwidthMax",
2617         [CGROUP_IO_RIOPS_MAX]   = "IOReadIOPSMax",
2618         [CGROUP_IO_WIOPS_MAX]   = "IOWriteIOPSMax",
2619 };
2620
2621 DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2622
2623 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2624         uint64_t u;
2625         int r;
2626
2627         if (isempty(s)) {
2628                 *ret = CGROUP_CPU_SHARES_INVALID;
2629                 return 0;
2630         }
2631
2632         r = safe_atou64(s, &u);
2633         if (r < 0)
2634                 return r;
2635
2636         if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2637                 return -ERANGE;
2638
2639         *ret = u;
2640         return 0;
2641 }
2642
2643 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2644         uint64_t u;
2645         int r;
2646
2647         if (isempty(s)) {
2648                 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2649                 return 0;
2650         }
2651
2652         r = safe_atou64(s, &u);
2653         if (r < 0)
2654                 return r;
2655
2656         if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2657                 return -ERANGE;
2658
2659         *ret = u;
2660         return 0;
2661 }
2662
2663 bool is_cgroup_fs(const struct statfs *s) {
2664         return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
2665                is_fs_type(s, CGROUP2_SUPER_MAGIC);
2666 }
2667
2668 bool fd_is_cgroup_fs(int fd) {
2669         struct statfs s;
2670
2671         if (fstatfs(fd, &s) < 0)
2672                 return -errno;
2673
2674         return is_cgroup_fs(&s);
2675 }
2676
2677 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2678         [CGROUP_CONTROLLER_CPU] = "cpu",
2679         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2680         [CGROUP_CONTROLLER_IO] = "io",
2681         [CGROUP_CONTROLLER_BLKIO] = "blkio",
2682         [CGROUP_CONTROLLER_MEMORY] = "memory",
2683         [CGROUP_CONTROLLER_DEVICES] = "devices",
2684         [CGROUP_CONTROLLER_PIDS] = "pids",
2685 };
2686
2687 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);