src/basic/cgroup-util.c

   1 /***
   2   This file is part of systemd.
   3
   4   Copyright 2010 Lennart Poettering
   5
   6   systemd is free software; you can redistribute it and/or modify it
   7   under the terms of the GNU Lesser General Public License as published by
   8   the Free Software Foundation; either version 2.1 of the License, or
   9   (at your option) any later version.
  10
  11   systemd is distributed in the hope that it will be useful, but
  12   WITHOUT ANY WARRANTY; without even the implied warranty of
  13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14   Lesser General Public License for more details.
  15
  16   You should have received a copy of the GNU Lesser General Public License
  17   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  18 ***/
  19
  20 #include <dirent.h>
  21 #include <errno.h>
  22 #include <ftw.h>
  23 #include <limits.h>
  24 #include <signal.h>
  25 #include <stddef.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28 #include <sys/stat.h>
  29 #include <sys/statfs.h>
  30 #include <sys/types.h>
  31 #include <unistd.h>
  32
  33 #include "alloc-util.h"
  34 #include "cgroup-util.h"
  35 #include "def.h"
  36 #include "dirent-util.h"
  37 #include "extract-word.h"
  38 #include "fd-util.h"
  39 #include "fileio.h"
  40 #include "formats-util.h"
  41 #include "fs-util.h"
  42 #include "log.h"
  43 #include "login-util.h"
  44 #include "macro.h"
  45 #include "missing.h"
  46 #include "mkdir.h"
  47 #include "parse-util.h"
  48 #include "path-util.h"
  49 #include "proc-cmdline.h"
  50 #include "process-util.h"
  51 #include "set.h"
  52 #include "special.h"
  53 #include "stat-util.h"
  54 #include "stdio-util.h"
  55 #include "string-table.h"
  56 #include "string-util.h"
  57 #include "unit-name.h"
  58 #include "user-util.h"
  59
  60 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
  61         _cleanup_free_ char *fs = NULL;
  62         FILE *f;
  63         int r;
  64
  65         assert(_f);
  66
  67         r = cg_get_path(controller, path, "cgroup.procs", &fs);
  68         if (r < 0)
  69                 return r;
  70
  71         f = fopen(fs, "re");
  72         if (!f)
  73                 return -errno;
  74
  75         *_f = f;
  76         return 0;
  77 }
  78
  79 int cg_read_pid(FILE *f, pid_t *_pid) {
  80         unsigned long ul;
  81
  82         /* Note that the cgroup.procs might contain duplicates! See
  83          * cgroups.txt for details. */
  84
  85         assert(f);
  86         assert(_pid);
  87
  88         errno = 0;
  89         if (fscanf(f, "%lu", &ul) != 1) {
  90
  91                 if (feof(f))
  92                         return 0;
  93
  94                 return errno > 0 ? -errno : -EIO;
  95         }
  96
  97         if (ul <= 0)
  98                 return -EIO;
  99
 100         *_pid = (pid_t) ul;
 101         return 1;
 102 }
 103
 104 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
 105         _cleanup_free_ char *fs = NULL;
 106         int r;
 107         DIR *d;
 108
 109         assert(_d);
 110
 111         /* This is not recursive! */
 112
 113         r = cg_get_path(controller, path, NULL, &fs);
 114         if (r < 0)
 115                 return r;
 116
 117         d = opendir(fs);
 118         if (!d)
 119                 return -errno;
 120
 121         *_d = d;
 122         return 0;
 123 }
 124
 125 int cg_read_subgroup(DIR *d, char **fn) {
 126         struct dirent *de;
 127
 128         assert(d);
 129         assert(fn);
 130
 131         FOREACH_DIRENT_ALL(de, d, return -errno) {
 132                 char *b;
 133
 134                 if (de->d_type != DT_DIR)
 135                         continue;
 136
 137                 if (streq(de->d_name, ".") ||
 138                     streq(de->d_name, ".."))
 139                         continue;
 140
 141                 b = strdup(de->d_name);
 142                 if (!b)
 143                         return -ENOMEM;
 144
 145                 *fn = b;
 146                 return 1;
 147         }
 148
 149         return 0;
 150 }
 151
 152 int cg_rmdir(const char *controller, const char *path) {
 153         _cleanup_free_ char *p = NULL;
 154         int r;
 155
 156         r = cg_get_path(controller, path, NULL, &p);
 157         if (r < 0)
 158                 return r;
 159
 160         r = rmdir(p);
 161         if (r < 0 && errno != ENOENT)
 162                 return -errno;
 163
 164         return 0;
 165 }
 166
 167 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
 168         _cleanup_set_free_ Set *allocated_set = NULL;
 169         bool done = false;
 170         int r, ret = 0;
 171         pid_t my_pid;
 172
 173         assert(sig >= 0);
 174
 175         /* This goes through the tasks list and kills them all. This
 176          * is repeated until no further processes are added to the
 177          * tasks list, to properly handle forking processes */
 178
 179         if (!s) {
 180                 s = allocated_set = set_new(NULL);
 181                 if (!s)
 182                         return -ENOMEM;
 183         }
 184
 185         my_pid = getpid();
 186
 187         do {
 188                 _cleanup_fclose_ FILE *f = NULL;
 189                 pid_t pid = 0;
 190                 done = true;
 191
 192                 r = cg_enumerate_processes(controller, path, &f);
 193                 if (r < 0) {
 194                         if (ret >= 0 && r != -ENOENT)
 195                                 return r;
 196
 197                         return ret;
 198                 }
 199
 200                 while ((r = cg_read_pid(f, &pid)) > 0) {
 201
 202                         if (ignore_self && pid == my_pid)
 203                                 continue;
 204
 205                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 206                                 continue;
 207
 208                         /* If we haven't killed this process yet, kill
 209                          * it */
 210                         if (kill(pid, sig) < 0) {
 211                                 if (ret >= 0 && errno != ESRCH)
 212                                         ret = -errno;
 213                         } else {
 214                                 if (sigcont && sig != SIGKILL)
 215                                         (void) kill(pid, SIGCONT);
 216
 217                                 if (ret == 0)
 218                                         ret = 1;
 219                         }
 220
 221                         done = false;
 222
 223                         r = set_put(s, PID_TO_PTR(pid));
 224                         if (r < 0) {
 225                                 if (ret >= 0)
 226                                         return r;
 227
 228                                 return ret;
 229                         }
 230                 }
 231
 232                 if (r < 0) {
 233                         if (ret >= 0)
 234                                 return r;
 235
 236                         return ret;
 237                 }
 238
 239                 /* To avoid racing against processes which fork
 240                  * quicker than we can kill them we repeat this until
 241                  * no new pids need to be killed. */
 242
 243         } while (!done);
 244
 245         return ret;
 246 }
 247
 248 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
 249         _cleanup_set_free_ Set *allocated_set = NULL;
 250         _cleanup_closedir_ DIR *d = NULL;
 251         int r, ret;
 252         char *fn;
 253
 254         assert(path);
 255         assert(sig >= 0);
 256
 257         if (!s) {
 258                 s = allocated_set = set_new(NULL);
 259                 if (!s)
 260                         return -ENOMEM;
 261         }
 262
 263         ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
 264
 265         r = cg_enumerate_subgroups(controller, path, &d);
 266         if (r < 0) {
 267                 if (ret >= 0 && r != -ENOENT)
 268                         return r;
 269
 270                 return ret;
 271         }
 272
 273         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 274                 _cleanup_free_ char *p = NULL;
 275
 276                 p = strjoin(path, "/", fn, NULL);
 277                 free(fn);
 278                 if (!p)
 279                         return -ENOMEM;
 280
 281                 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
 282                 if (r != 0 && ret >= 0)
 283                         ret = r;
 284         }
 285
 286         if (ret >= 0 && r < 0)
 287                 ret = r;
 288
 289         if (rem) {
 290                 r = cg_rmdir(controller, path);
 291                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 292                         return r;
 293         }
 294
 295         return ret;
 296 }
 297
 298 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
 299         bool done = false;
 300         _cleanup_set_free_ Set *s = NULL;
 301         int r, ret = 0;
 302         pid_t my_pid;
 303
 304         assert(cfrom);
 305         assert(pfrom);
 306         assert(cto);
 307         assert(pto);
 308
 309         s = set_new(NULL);
 310         if (!s)
 311                 return -ENOMEM;
 312
 313         my_pid = getpid();
 314
 315         do {
 316                 _cleanup_fclose_ FILE *f = NULL;
 317                 pid_t pid = 0;
 318                 done = true;
 319
 320                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 321                 if (r < 0) {
 322                         if (ret >= 0 && r != -ENOENT)
 323                                 return r;
 324
 325                         return ret;
 326                 }
 327
 328                 while ((r = cg_read_pid(f, &pid)) > 0) {
 329
 330                         /* This might do weird stuff if we aren't a
 331                          * single-threaded program. However, we
 332                          * luckily know we are not */
 333                         if (ignore_self && pid == my_pid)
 334                                 continue;
 335
 336                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 337                                 continue;
 338
 339                         /* Ignore kernel threads. Since they can only
 340                          * exist in the root cgroup, we only check for
 341                          * them there. */
 342                         if (cfrom &&
 343                             (isempty(pfrom) || path_equal(pfrom, "/")) &&
 344                             is_kernel_thread(pid) > 0)
 345                                 continue;
 346
 347                         r = cg_attach(cto, pto, pid);
 348                         if (r < 0) {
 349                                 if (ret >= 0 && r != -ESRCH)
 350                                         ret = r;
 351                         } else if (ret == 0)
 352                                 ret = 1;
 353
 354                         done = false;
 355
 356                         r = set_put(s, PID_TO_PTR(pid));
 357                         if (r < 0) {
 358                                 if (ret >= 0)
 359                                         return r;
 360
 361                                 return ret;
 362                         }
 363                 }
 364
 365                 if (r < 0) {
 366                         if (ret >= 0)
 367                                 return r;
 368
 369                         return ret;
 370                 }
 371         } while (!done);
 372
 373         return ret;
 374 }
 375
 376 int cg_migrate_recursive(
 377                 const char *cfrom,
 378                 const char *pfrom,
 379                 const char *cto,
 380                 const char *pto,
 381                 bool ignore_self,
 382                 bool rem) {
 383
 384         _cleanup_closedir_ DIR *d = NULL;
 385         int r, ret = 0;
 386         char *fn;
 387
 388         assert(cfrom);
 389         assert(pfrom);
 390         assert(cto);
 391         assert(pto);
 392
 393         ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
 394
 395         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 396         if (r < 0) {
 397                 if (ret >= 0 && r != -ENOENT)
 398                         return r;
 399
 400                 return ret;
 401         }
 402
 403         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 404                 _cleanup_free_ char *p = NULL;
 405
 406                 p = strjoin(pfrom, "/", fn, NULL);
 407                 free(fn);
 408                 if (!p)
 409                         return -ENOMEM;
 410
 411                 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
 412                 if (r != 0 && ret >= 0)
 413                         ret = r;
 414         }
 415
 416         if (r < 0 && ret >= 0)
 417                 ret = r;
 418
 419         if (rem) {
 420                 r = cg_rmdir(cfrom, pfrom);
 421                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 422                         return r;
 423         }
 424
 425         return ret;
 426 }
 427
 428 int cg_migrate_recursive_fallback(
 429                 const char *cfrom,
 430                 const char *pfrom,
 431                 const char *cto,
 432                 const char *pto,
 433                 bool ignore_self,
 434                 bool rem) {
 435
 436         int r;
 437
 438         assert(cfrom);
 439         assert(pfrom);
 440         assert(cto);
 441         assert(pto);
 442
 443         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem);
 444         if (r < 0) {
 445                 char prefix[strlen(pto) + 1];
 446
 447                 /* This didn't work? Then let's try all prefixes of the destination */
 448
 449                 PATH_FOREACH_PREFIX(prefix, pto) {
 450                         int q;
 451
 452                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem);
 453                         if (q >= 0)
 454                                 return q;
 455                 }
 456         }
 457
 458         return r;
 459 }
 460
 461 static const char *controller_to_dirname(const char *controller) {
 462         const char *e;
 463
 464         assert(controller);
 465
 466         /* Converts a controller name to the directory name below
 467          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
 468          * just cuts off the name= prefixed used for named
 469          * hierarchies, if it is specified. */
 470
 471         e = startswith(controller, "name=");
 472         if (e)
 473                 return e;
 474
 475         return controller;
 476 }
 477
 478 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
 479         const char *dn;
 480         char *t = NULL;
 481
 482         assert(fs);
 483         assert(controller);
 484
 485         dn = controller_to_dirname(controller);
 486
 487         if (isempty(path) && isempty(suffix))
 488                 t = strappend("/sys/fs/cgroup/", dn);
 489         else if (isempty(path))
 490                 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL);
 491         else if (isempty(suffix))
 492                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL);
 493         else
 494                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL);
 495         if (!t)
 496                 return -ENOMEM;
 497
 498         *fs = t;
 499         return 0;
 500 }
 501
 502 static int join_path_unified(const char *path, const char *suffix, char **fs) {
 503         char *t;
 504
 505         assert(fs);
 506
 507         if (isempty(path) && isempty(suffix))
 508                 t = strdup("/sys/fs/cgroup");
 509         else if (isempty(path))
 510                 t = strappend("/sys/fs/cgroup/", suffix);
 511         else if (isempty(suffix))
 512                 t = strappend("/sys/fs/cgroup/", path);
 513         else
 514                 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
 515         if (!t)
 516                 return -ENOMEM;
 517
 518         *fs = t;
 519         return 0;
 520 }
 521
 522 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
 523         int unified, r;
 524
 525         assert(fs);
 526
 527         if (!controller) {
 528                 char *t;
 529
 530                 /* If no controller is specified, we return the path
 531                  * *below* the controllers, without any prefix. */
 532
 533                 if (!path && !suffix)
 534                         return -EINVAL;
 535
 536                 if (!suffix)
 537                         t = strdup(path);
 538                 else if (!path)
 539                         t = strdup(suffix);
 540                 else
 541                         t = strjoin(path, "/", suffix, NULL);
 542                 if (!t)
 543                         return -ENOMEM;
 544
 545                 *fs = path_kill_slashes(t);
 546                 return 0;
 547         }
 548
 549         if (!cg_controller_is_valid(controller))
 550                 return -EINVAL;
 551
 552         unified = cg_unified();
 553         if (unified < 0)
 554                 return unified;
 555
 556         if (unified > 0)
 557                 r = join_path_unified(path, suffix, fs);
 558         else
 559                 r = join_path_legacy(controller, path, suffix, fs);
 560         if (r < 0)
 561                 return r;
 562
 563         path_kill_slashes(*fs);
 564         return 0;
 565 }
 566
 567 static int controller_is_accessible(const char *controller) {
 568         int unified;
 569
 570         assert(controller);
 571
 572         /* Checks whether a specific controller is accessible,
 573          * i.e. its hierarchy mounted. In the unified hierarchy all
 574          * controllers are considered accessible, except for the named
 575          * hierarchies */
 576
 577         if (!cg_controller_is_valid(controller))
 578                 return -EINVAL;
 579
 580         unified = cg_unified();
 581         if (unified < 0)
 582                 return unified;
 583         if (unified > 0) {
 584                 /* We don't support named hierarchies if we are using
 585                  * the unified hierarchy. */
 586
 587                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
 588                         return 0;
 589
 590                 if (startswith(controller, "name="))
 591                         return -EOPNOTSUPP;
 592
 593         } else {
 594                 const char *cc, *dn;
 595
 596                 dn = controller_to_dirname(controller);
 597                 cc = strjoina("/sys/fs/cgroup/", dn);
 598
 599                 if (laccess(cc, F_OK) < 0)
 600                         return -errno;
 601         }
 602
 603         return 0;
 604 }
 605
 606 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
 607         int r;
 608
 609         assert(controller);
 610         assert(fs);
 611
 612         /* Check if the specified controller is actually accessible */
 613         r = controller_is_accessible(controller);
 614         if (r < 0)
 615                 return r;
 616
 617         return cg_get_path(controller, path, suffix, fs);
 618 }
 619
 620 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 621         assert(path);
 622         assert(sb);
 623         assert(ftwbuf);
 624
 625         if (typeflag != FTW_DP)
 626                 return 0;
 627
 628         if (ftwbuf->level < 1)
 629                 return 0;
 630
 631         (void) rmdir(path);
 632         return 0;
 633 }
 634
 635 int cg_trim(const char *controller, const char *path, bool delete_root) {
 636         _cleanup_free_ char *fs = NULL;
 637         int r = 0;
 638
 639         assert(path);
 640
 641         r = cg_get_path(controller, path, NULL, &fs);
 642         if (r < 0)
 643                 return r;
 644
 645         errno = 0;
 646         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 647                 if (errno == ENOENT)
 648                         r = 0;
 649                 else if (errno > 0)
 650                         r = -errno;
 651                 else
 652                         r = -EIO;
 653         }
 654
 655         if (delete_root) {
 656                 if (rmdir(fs) < 0 && errno != ENOENT)
 657                         return -errno;
 658         }
 659
 660         return r;
 661 }
 662
 663 int cg_create(const char *controller, const char *path) {
 664         _cleanup_free_ char *fs = NULL;
 665         int r;
 666
 667         r = cg_get_path_and_check(controller, path, NULL, &fs);
 668         if (r < 0)
 669                 return r;
 670
 671         r = mkdir_parents(fs, 0755);
 672         if (r < 0)
 673                 return r;
 674
 675         if (mkdir(fs, 0755) < 0) {
 676
 677                 if (errno == EEXIST)
 678                         return 0;
 679
 680                 return -errno;
 681         }
 682
 683         return 1;
 684 }
 685
 686 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 687         int r, q;
 688
 689         assert(pid >= 0);
 690
 691         r = cg_create(controller, path);
 692         if (r < 0)
 693                 return r;
 694
 695         q = cg_attach(controller, path, pid);
 696         if (q < 0)
 697                 return q;
 698
 699         /* This does not remove the cgroup on failure */
 700         return r;
 701 }
 702
 703 int cg_attach(const char *controller, const char *path, pid_t pid) {
 704         _cleanup_free_ char *fs = NULL;
 705         char c[DECIMAL_STR_MAX(pid_t) + 2];
 706         int r;
 707
 708         assert(path);
 709         assert(pid >= 0);
 710
 711         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 712         if (r < 0)
 713                 return r;
 714
 715         if (pid == 0)
 716                 pid = getpid();
 717
 718         xsprintf(c, PID_FMT "\n", pid);
 719
 720         return write_string_file(fs, c, 0);
 721 }
 722
 723 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 724         int r;
 725
 726         assert(controller);
 727         assert(path);
 728         assert(pid >= 0);
 729
 730         r = cg_attach(controller, path, pid);
 731         if (r < 0) {
 732                 char prefix[strlen(path) + 1];
 733
 734                 /* This didn't work? Then let's try all prefixes of
 735                  * the destination */
 736
 737                 PATH_FOREACH_PREFIX(prefix, path) {
 738                         int q;
 739
 740                         q = cg_attach(controller, prefix, pid);
 741                         if (q >= 0)
 742                                 return q;
 743                 }
 744         }
 745
 746         return r;
 747 }
 748
 749 int cg_set_group_access(
 750                 const char *controller,
 751                 const char *path,
 752                 mode_t mode,
 753                 uid_t uid,
 754                 gid_t gid) {
 755
 756         _cleanup_free_ char *fs = NULL;
 757         int r;
 758
 759         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 760                 return 0;
 761
 762         if (mode != MODE_INVALID)
 763                 mode &= 0777;
 764
 765         r = cg_get_path(controller, path, NULL, &fs);
 766         if (r < 0)
 767                 return r;
 768
 769         return chmod_and_chown(fs, mode, uid, gid);
 770 }
 771
 772 int cg_set_task_access(
 773                 const char *controller,
 774                 const char *path,
 775                 mode_t mode,
 776                 uid_t uid,
 777                 gid_t gid) {
 778
 779         _cleanup_free_ char *fs = NULL, *procs = NULL;
 780         int r, unified;
 781
 782         assert(path);
 783
 784         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 785                 return 0;
 786
 787         if (mode != MODE_INVALID)
 788                 mode &= 0666;
 789
 790         r = cg_get_path(controller, path, "cgroup.procs", &fs);
 791         if (r < 0)
 792                 return r;
 793
 794         r = chmod_and_chown(fs, mode, uid, gid);
 795         if (r < 0)
 796                 return r;
 797
 798         unified = cg_unified();
 799         if (unified < 0)
 800                 return unified;
 801         if (unified)
 802                 return 0;
 803
 804         /* Compatibility, Always keep values for "tasks" in sync with
 805          * "cgroup.procs" */
 806         if (cg_get_path(controller, path, "tasks", &procs) >= 0)
 807                 (void) chmod_and_chown(procs, mode, uid, gid);
 808
 809         return 0;
 810 }
 811
 812 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
 813         _cleanup_fclose_ FILE *f = NULL;
 814         char line[LINE_MAX];
 815         const char *fs;
 816         size_t cs = 0;
 817         int unified;
 818
 819         assert(path);
 820         assert(pid >= 0);
 821
 822         unified = cg_unified();
 823         if (unified < 0)
 824                 return unified;
 825         if (unified == 0) {
 826                 if (controller) {
 827                         if (!cg_controller_is_valid(controller))
 828                                 return -EINVAL;
 829                 } else
 830                         controller = SYSTEMD_CGROUP_CONTROLLER;
 831
 832                 cs = strlen(controller);
 833         }
 834
 835         fs = procfs_file_alloca(pid, "cgroup");
 836         f = fopen(fs, "re");
 837         if (!f)
 838                 return errno == ENOENT ? -ESRCH : -errno;
 839
 840         FOREACH_LINE(line, f, return -errno) {
 841                 char *e, *p;
 842
 843                 truncate_nl(line);
 844
 845                 if (unified) {
 846                         e = startswith(line, "0:");
 847                         if (!e)
 848                                 continue;
 849
 850                         e = strchr(e, ':');
 851                         if (!e)
 852                                 continue;
 853                 } else {
 854                         char *l;
 855                         size_t k;
 856                         const char *word, *state;
 857                         bool found = false;
 858
 859                         l = strchr(line, ':');
 860                         if (!l)
 861                                 continue;
 862
 863                         l++;
 864                         e = strchr(l, ':');
 865                         if (!e)
 866                                 continue;
 867
 868                         *e = 0;
 869                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
 870                                 if (k == cs && memcmp(word, controller, cs) == 0) {
 871                                         found = true;
 872                                         break;
 873                                 }
 874                         }
 875
 876                         if (!found)
 877                                 continue;
 878                 }
 879
 880                 p = strdup(e + 1);
 881                 if (!p)
 882                         return -ENOMEM;
 883
 884                 *path = p;
 885                 return 0;
 886         }
 887
 888         return -ENODATA;
 889 }
 890
 891 int cg_install_release_agent(const char *controller, const char *agent) {
 892         _cleanup_free_ char *fs = NULL, *contents = NULL;
 893         const char *sc;
 894         int r, unified;
 895
 896         assert(agent);
 897
 898         unified = cg_unified();
 899         if (unified < 0)
 900                 return unified;
 901         if (unified) /* doesn't apply to unified hierarchy */
 902                 return -EOPNOTSUPP;
 903
 904         r = cg_get_path(controller, NULL, "release_agent", &fs);
 905         if (r < 0)
 906                 return r;
 907
 908         r = read_one_line_file(fs, &contents);
 909         if (r < 0)
 910                 return r;
 911
 912         sc = strstrip(contents);
 913         if (isempty(sc)) {
 914                 r = write_string_file(fs, agent, 0);
 915                 if (r < 0)
 916                         return r;
 917         } else if (!path_equal(sc, agent))
 918                 return -EEXIST;
 919
 920         fs = mfree(fs);
 921         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 922         if (r < 0)
 923                 return r;
 924
 925         contents = mfree(contents);
 926         r = read_one_line_file(fs, &contents);
 927         if (r < 0)
 928                 return r;
 929
 930         sc = strstrip(contents);
 931         if (streq(sc, "0")) {
 932                 r = write_string_file(fs, "1", 0);
 933                 if (r < 0)
 934                         return r;
 935
 936                 return 1;
 937         }
 938
 939         if (!streq(sc, "1"))
 940                 return -EIO;
 941
 942         return 0;
 943 }
 944
 945 int cg_uninstall_release_agent(const char *controller) {
 946         _cleanup_free_ char *fs = NULL;
 947         int r, unified;
 948
 949         unified = cg_unified();
 950         if (unified < 0)
 951                 return unified;
 952         if (unified) /* Doesn't apply to unified hierarchy */
 953                 return -EOPNOTSUPP;
 954
 955         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 956         if (r < 0)
 957                 return r;
 958
 959         r = write_string_file(fs, "0", 0);
 960         if (r < 0)
 961                 return r;
 962
 963         fs = mfree(fs);
 964
 965         r = cg_get_path(controller, NULL, "release_agent", &fs);
 966         if (r < 0)
 967                 return r;
 968
 969         r = write_string_file(fs, "", 0);
 970         if (r < 0)
 971                 return r;
 972
 973         return 0;
 974 }
 975
 976 int cg_is_empty(const char *controller, const char *path) {
 977         _cleanup_fclose_ FILE *f = NULL;
 978         pid_t pid;
 979         int r;
 980
 981         assert(path);
 982
 983         r = cg_enumerate_processes(controller, path, &f);
 984         if (r == -ENOENT)
 985                 return 1;
 986         if (r < 0)
 987                 return r;
 988
 989         r = cg_read_pid(f, &pid);
 990         if (r < 0)
 991                 return r;
 992
 993         return r == 0;
 994 }
 995
 996 int cg_is_empty_recursive(const char *controller, const char *path) {
 997         int unified, r;
 998
 999         assert(path);
1000
1001         /* The root cgroup is always populated */
1002         if (controller && (isempty(path) || path_equal(path, "/")))
1003                 return false;
1004
1005         unified = cg_unified();
1006         if (unified < 0)
1007                 return unified;
1008
1009         if (unified > 0) {
1010                 _cleanup_free_ char *populated = NULL, *t = NULL;
1011
1012                 /* On the unified hierarchy we can check empty state
1013                  * via the "cgroup.populated" attribute. */
1014
1015                 r = cg_get_path(controller, path, "cgroup.populated", &populated);
1016                 if (r < 0)
1017                         return r;
1018
1019                 r = read_one_line_file(populated, &t);
1020                 if (r == -ENOENT)
1021                         return 1;
1022                 if (r < 0)
1023                         return r;
1024
1025                 return streq(t, "0");
1026         } else {
1027                 _cleanup_closedir_ DIR *d = NULL;
1028                 char *fn;
1029
1030                 r = cg_is_empty(controller, path);
1031                 if (r <= 0)
1032                         return r;
1033
1034                 r = cg_enumerate_subgroups(controller, path, &d);
1035                 if (r == -ENOENT)
1036                         return 1;
1037                 if (r < 0)
1038                         return r;
1039
1040                 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1041                         _cleanup_free_ char *p = NULL;
1042
1043                         p = strjoin(path, "/", fn, NULL);
1044                         free(fn);
1045                         if (!p)
1046                                 return -ENOMEM;
1047
1048                         r = cg_is_empty_recursive(controller, p);
1049                         if (r <= 0)
1050                                 return r;
1051                 }
1052                 if (r < 0)
1053                         return r;
1054
1055                 return true;
1056         }
1057 }
1058
1059 int cg_split_spec(const char *spec, char **controller, char **path) {
1060         char *t = NULL, *u = NULL;
1061         const char *e;
1062
1063         assert(spec);
1064
1065         if (*spec == '/') {
1066                 if (!path_is_safe(spec))
1067                         return -EINVAL;
1068
1069                 if (path) {
1070                         t = strdup(spec);
1071                         if (!t)
1072                                 return -ENOMEM;
1073
1074                         *path = path_kill_slashes(t);
1075                 }
1076
1077                 if (controller)
1078                         *controller = NULL;
1079
1080                 return 0;
1081         }
1082
1083         e = strchr(spec, ':');
1084         if (!e) {
1085                 if (!cg_controller_is_valid(spec))
1086                         return -EINVAL;
1087
1088                 if (controller) {
1089                         t = strdup(spec);
1090                         if (!t)
1091                                 return -ENOMEM;
1092
1093                         *controller = t;
1094                 }
1095
1096                 if (path)
1097                         *path = NULL;
1098
1099                 return 0;
1100         }
1101
1102         t = strndup(spec, e-spec);
1103         if (!t)
1104                 return -ENOMEM;
1105         if (!cg_controller_is_valid(t)) {
1106                 free(t);
1107                 return -EINVAL;
1108         }
1109
1110         if (isempty(e+1))
1111                 u = NULL;
1112         else {
1113                 u = strdup(e+1);
1114                 if (!u) {
1115                         free(t);
1116                         return -ENOMEM;
1117                 }
1118
1119                 if (!path_is_safe(u) ||
1120                     !path_is_absolute(u)) {
1121                         free(t);
1122                         free(u);
1123                         return -EINVAL;
1124                 }
1125
1126                 path_kill_slashes(u);
1127         }
1128
1129         if (controller)
1130                 *controller = t;
1131         else
1132                 free(t);
1133
1134         if (path)
1135                 *path = u;
1136         else
1137                 free(u);
1138
1139         return 0;
1140 }
1141
1142 int cg_mangle_path(const char *path, char **result) {
1143         _cleanup_free_ char *c = NULL, *p = NULL;
1144         char *t;
1145         int r;
1146
1147         assert(path);
1148         assert(result);
1149
1150         /* First, check if it already is a filesystem path */
1151         if (path_startswith(path, "/sys/fs/cgroup")) {
1152
1153                 t = strdup(path);
1154                 if (!t)
1155                         return -ENOMEM;
1156
1157                 *result = path_kill_slashes(t);
1158                 return 0;
1159         }
1160
1161         /* Otherwise, treat it as cg spec */
1162         r = cg_split_spec(path, &c, &p);
1163         if (r < 0)
1164                 return r;
1165
1166         return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1167 }
1168
1169 int cg_get_root_path(char **path) {
1170         char *p, *e;
1171         int r;
1172
1173         assert(path);
1174
1175         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1176         if (r < 0)
1177                 return r;
1178
1179         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1180         if (!e)
1181                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1182         if (!e)
1183                 e = endswith(p, "/system"); /* even more legacy */
1184         if (e)
1185                 *e = 0;
1186
1187         *path = p;
1188         return 0;
1189 }
1190
1191 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1192         _cleanup_free_ char *rt = NULL;
1193         char *p;
1194         int r;
1195
1196         assert(cgroup);
1197         assert(shifted);
1198
1199         if (!root) {
1200                 /* If the root was specified let's use that, otherwise
1201                  * let's determine it from PID 1 */
1202
1203                 r = cg_get_root_path(&rt);
1204                 if (r < 0)
1205                         return r;
1206
1207                 root = rt;
1208         }
1209
1210         p = path_startswith(cgroup, root);
1211         if (p && p > cgroup)
1212                 *shifted = p - 1;
1213         else
1214                 *shifted = cgroup;
1215
1216         return 0;
1217 }
1218
1219 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1220         _cleanup_free_ char *raw = NULL;
1221         const char *c;
1222         int r;
1223
1224         assert(pid >= 0);
1225         assert(cgroup);
1226
1227         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1228         if (r < 0)
1229                 return r;
1230
1231         r = cg_shift_path(raw, root, &c);
1232         if (r < 0)
1233                 return r;
1234
1235         if (c == raw) {
1236                 *cgroup = raw;
1237                 raw = NULL;
1238         } else {
1239                 char *n;
1240
1241                 n = strdup(c);
1242                 if (!n)
1243                         return -ENOMEM;
1244
1245                 *cgroup = n;
1246         }
1247
1248         return 0;
1249 }
1250
1251 int cg_path_decode_unit(const char *cgroup, char **unit) {
1252         char *c, *s;
1253         size_t n;
1254
1255         assert(cgroup);
1256         assert(unit);
1257
1258         n = strcspn(cgroup, "/");
1259         if (n < 3)
1260                 return -ENXIO;
1261
1262         c = strndupa(cgroup, n);
1263         c = cg_unescape(c);
1264
1265         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1266                 return -ENXIO;
1267
1268         s = strdup(c);
1269         if (!s)
1270                 return -ENOMEM;
1271
1272         *unit = s;
1273         return 0;
1274 }
1275
1276 static bool valid_slice_name(const char *p, size_t n) {
1277
1278         if (!p)
1279                 return false;
1280
1281         if (n < strlen("x.slice"))
1282                 return false;
1283
1284         if (memcmp(p + n - 6, ".slice", 6) == 0) {
1285                 char buf[n+1], *c;
1286
1287                 memcpy(buf, p, n);
1288                 buf[n] = 0;
1289
1290                 c = cg_unescape(buf);
1291
1292                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1293         }
1294
1295         return false;
1296 }
1297
1298 static const char *skip_slices(const char *p) {
1299         assert(p);
1300
1301         /* Skips over all slice assignments */
1302
1303         for (;;) {
1304                 size_t n;
1305
1306                 p += strspn(p, "/");
1307
1308                 n = strcspn(p, "/");
1309                 if (!valid_slice_name(p, n))
1310                         return p;
1311
1312                 p += n;
1313         }
1314 }
1315
1316 int cg_path_get_unit(const char *path, char **ret) {
1317         const char *e;
1318         char *unit;
1319         int r;
1320
1321         assert(path);
1322         assert(ret);
1323
1324         e = skip_slices(path);
1325
1326         r = cg_path_decode_unit(e, &unit);
1327         if (r < 0)
1328                 return r;
1329
1330         /* We skipped over the slices, don't accept any now */
1331         if (endswith(unit, ".slice")) {
1332                 free(unit);
1333                 return -ENXIO;
1334         }
1335
1336         *ret = unit;
1337         return 0;
1338 }
1339
1340 int cg_pid_get_unit(pid_t pid, char **unit) {
1341         _cleanup_free_ char *cgroup = NULL;
1342         int r;
1343
1344         assert(unit);
1345
1346         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1347         if (r < 0)
1348                 return r;
1349
1350         return cg_path_get_unit(cgroup, unit);
1351 }
1352
1353 /**
1354  * Skip session-*.scope, but require it to be there.
1355  */
1356 static const char *skip_session(const char *p) {
1357         size_t n;
1358
1359         if (isempty(p))
1360                 return NULL;
1361
1362         p += strspn(p, "/");
1363
1364         n = strcspn(p, "/");
1365         if (n < strlen("session-x.scope"))
1366                 return NULL;
1367
1368         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1369                 char buf[n - 8 - 6 + 1];
1370
1371                 memcpy(buf, p + 8, n - 8 - 6);
1372                 buf[n - 8 - 6] = 0;
1373
1374                 /* Note that session scopes never need unescaping,
1375                  * since they cannot conflict with the kernel's own
1376                  * names, hence we don't need to call cg_unescape()
1377                  * here. */
1378
1379                 if (!session_id_valid(buf))
1380                         return false;
1381
1382                 p += n;
1383                 p += strspn(p, "/");
1384                 return p;
1385         }
1386
1387         return NULL;
1388 }
1389
1390 /**
1391  * Skip user@*.service, but require it to be there.
1392  */
1393 static const char *skip_user_manager(const char *p) {
1394         size_t n;
1395
1396         if (isempty(p))
1397                 return NULL;
1398
1399         p += strspn(p, "/");
1400
1401         n = strcspn(p, "/");
1402         if (n < strlen("user@x.service"))
1403                 return NULL;
1404
1405         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1406                 char buf[n - 5 - 8 + 1];
1407
1408                 memcpy(buf, p + 5, n - 5 - 8);
1409                 buf[n - 5 - 8] = 0;
1410
1411                 /* Note that user manager services never need unescaping,
1412                  * since they cannot conflict with the kernel's own
1413                  * names, hence we don't need to call cg_unescape()
1414                  * here. */
1415
1416                 if (parse_uid(buf, NULL) < 0)
1417                         return NULL;
1418
1419                 p += n;
1420                 p += strspn(p, "/");
1421
1422                 return p;
1423         }
1424
1425         return NULL;
1426 }
1427
1428 static const char *skip_user_prefix(const char *path) {
1429         const char *e, *t;
1430
1431         assert(path);
1432
1433         /* Skip slices, if there are any */
1434         e = skip_slices(path);
1435
1436         /* Skip the user manager, if it's in the path now... */
1437         t = skip_user_manager(e);
1438         if (t)
1439                 return t;
1440
1441         /* Alternatively skip the user session if it is in the path... */
1442         return skip_session(e);
1443 }
1444
1445 int cg_path_get_user_unit(const char *path, char **ret) {
1446         const char *t;
1447
1448         assert(path);
1449         assert(ret);
1450
1451         t = skip_user_prefix(path);
1452         if (!t)
1453                 return -ENXIO;
1454
1455         /* And from here on it looks pretty much the same as for a
1456          * system unit, hence let's use the same parser from here
1457          * on. */
1458         return cg_path_get_unit(t, ret);
1459 }
1460
1461 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1462         _cleanup_free_ char *cgroup = NULL;
1463         int r;
1464
1465         assert(unit);
1466
1467         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1468         if (r < 0)
1469                 return r;
1470
1471         return cg_path_get_user_unit(cgroup, unit);
1472 }
1473
1474 int cg_path_get_machine_name(const char *path, char **machine) {
1475         _cleanup_free_ char *u = NULL;
1476         const char *sl;
1477         int r;
1478
1479         r = cg_path_get_unit(path, &u);
1480         if (r < 0)
1481                 return r;
1482
1483         sl = strjoina("/run/systemd/machines/unit:", u);
1484         return readlink_malloc(sl, machine);
1485 }
1486
1487 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1488         _cleanup_free_ char *cgroup = NULL;
1489         int r;
1490
1491         assert(machine);
1492
1493         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1494         if (r < 0)
1495                 return r;
1496
1497         return cg_path_get_machine_name(cgroup, machine);
1498 }
1499
1500 int cg_path_get_session(const char *path, char **session) {
1501         _cleanup_free_ char *unit = NULL;
1502         char *start, *end;
1503         int r;
1504
1505         assert(path);
1506
1507         r = cg_path_get_unit(path, &unit);
1508         if (r < 0)
1509                 return r;
1510
1511         start = startswith(unit, "session-");
1512         if (!start)
1513                 return -ENXIO;
1514         end = endswith(start, ".scope");
1515         if (!end)
1516                 return -ENXIO;
1517
1518         *end = 0;
1519         if (!session_id_valid(start))
1520                 return -ENXIO;
1521
1522         if (session) {
1523                 char *rr;
1524
1525                 rr = strdup(start);
1526                 if (!rr)
1527                         return -ENOMEM;
1528
1529                 *session = rr;
1530         }
1531
1532         return 0;
1533 }
1534
1535 int cg_pid_get_session(pid_t pid, char **session) {
1536         _cleanup_free_ char *cgroup = NULL;
1537         int r;
1538
1539         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1540         if (r < 0)
1541                 return r;
1542
1543         return cg_path_get_session(cgroup, session);
1544 }
1545
1546 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1547         _cleanup_free_ char *slice = NULL;
1548         char *start, *end;
1549         int r;
1550
1551         assert(path);
1552
1553         r = cg_path_get_slice(path, &slice);
1554         if (r < 0)
1555                 return r;
1556
1557         start = startswith(slice, "user-");
1558         if (!start)
1559                 return -ENXIO;
1560         end = endswith(start, ".slice");
1561         if (!end)
1562                 return -ENXIO;
1563
1564         *end = 0;
1565         if (parse_uid(start, uid) < 0)
1566                 return -ENXIO;
1567
1568         return 0;
1569 }
1570
1571 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1572         _cleanup_free_ char *cgroup = NULL;
1573         int r;
1574
1575         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1576         if (r < 0)
1577                 return r;
1578
1579         return cg_path_get_owner_uid(cgroup, uid);
1580 }
1581
1582 int cg_path_get_slice(const char *p, char **slice) {
1583         const char *e = NULL;
1584
1585         assert(p);
1586         assert(slice);
1587
1588         /* Finds the right-most slice unit from the beginning, but
1589          * stops before we come to the first non-slice unit. */
1590
1591         for (;;) {
1592                 size_t n;
1593
1594                 p += strspn(p, "/");
1595
1596                 n = strcspn(p, "/");
1597                 if (!valid_slice_name(p, n)) {
1598
1599                         if (!e) {
1600                                 char *s;
1601
1602                                 s = strdup("-.slice");
1603                                 if (!s)
1604                                         return -ENOMEM;
1605
1606                                 *slice = s;
1607                                 return 0;
1608                         }
1609
1610                         return cg_path_decode_unit(e, slice);
1611                 }
1612
1613                 e = p;
1614                 p += n;
1615         }
1616 }
1617
1618 int cg_pid_get_slice(pid_t pid, char **slice) {
1619         _cleanup_free_ char *cgroup = NULL;
1620         int r;
1621
1622         assert(slice);
1623
1624         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1625         if (r < 0)
1626                 return r;
1627
1628         return cg_path_get_slice(cgroup, slice);
1629 }
1630
1631 int cg_path_get_user_slice(const char *p, char **slice) {
1632         const char *t;
1633         assert(p);
1634         assert(slice);
1635
1636         t = skip_user_prefix(p);
1637         if (!t)
1638                 return -ENXIO;
1639
1640         /* And now it looks pretty much the same as for a system
1641          * slice, so let's just use the same parser from here on. */
1642         return cg_path_get_slice(t, slice);
1643 }
1644
1645 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1646         _cleanup_free_ char *cgroup = NULL;
1647         int r;
1648
1649         assert(slice);
1650
1651         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1652         if (r < 0)
1653                 return r;
1654
1655         return cg_path_get_user_slice(cgroup, slice);
1656 }
1657
1658 char *cg_escape(const char *p) {
1659         bool need_prefix = false;
1660
1661         /* This implements very minimal escaping for names to be used
1662          * as file names in the cgroup tree: any name which might
1663          * conflict with a kernel name or is prefixed with '_' is
1664          * prefixed with a '_'. That way, when reading cgroup names it
1665          * is sufficient to remove a single prefixing underscore if
1666          * there is one. */
1667
1668         /* The return value of this function (unlike cg_unescape())
1669          * needs free()! */
1670
1671         if (p[0] == 0 ||
1672             p[0] == '_' ||
1673             p[0] == '.' ||
1674             streq(p, "notify_on_release") ||
1675             streq(p, "release_agent") ||
1676             streq(p, "tasks") ||
1677             startswith(p, "cgroup."))
1678                 need_prefix = true;
1679         else {
1680                 const char *dot;
1681
1682                 dot = strrchr(p, '.');
1683                 if (dot) {
1684                         CGroupController c;
1685                         size_t l = dot - p;
1686
1687                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1688                                 const char *n;
1689
1690                                 n = cgroup_controller_to_string(c);
1691
1692                                 if (l != strlen(n))
1693                                         continue;
1694
1695                                 if (memcmp(p, n, l) != 0)
1696                                         continue;
1697
1698                                 need_prefix = true;
1699                                 break;
1700                         }
1701                 }
1702         }
1703
1704         if (need_prefix)
1705                 return strappend("_", p);
1706
1707         return strdup(p);
1708 }
1709
1710 char *cg_unescape(const char *p) {
1711         assert(p);
1712
1713         /* The return value of this function (unlike cg_escape())
1714          * doesn't need free()! */
1715
1716         if (p[0] == '_')
1717                 return (char*) p+1;
1718
1719         return (char*) p;
1720 }
1721
1722 #define CONTROLLER_VALID                        \
1723         DIGITS LETTERS                          \
1724         "_"
1725
1726 bool cg_controller_is_valid(const char *p) {
1727         const char *t, *s;
1728
1729         if (!p)
1730                 return false;
1731
1732         s = startswith(p, "name=");
1733         if (s)
1734                 p = s;
1735
1736         if (*p == 0 || *p == '_')
1737                 return false;
1738
1739         for (t = p; *t; t++)
1740                 if (!strchr(CONTROLLER_VALID, *t))
1741                         return false;
1742
1743         if (t - p > FILENAME_MAX)
1744                 return false;
1745
1746         return true;
1747 }
1748
1749 int cg_slice_to_path(const char *unit, char **ret) {
1750         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1751         const char *dash;
1752         int r;
1753
1754         assert(unit);
1755         assert(ret);
1756
1757         if (streq(unit, "-.slice")) {
1758                 char *x;
1759
1760                 x = strdup("");
1761                 if (!x)
1762                         return -ENOMEM;
1763                 *ret = x;
1764                 return 0;
1765         }
1766
1767         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1768                 return -EINVAL;
1769
1770         if (!endswith(unit, ".slice"))
1771                 return -EINVAL;
1772
1773         r = unit_name_to_prefix(unit, &p);
1774         if (r < 0)
1775                 return r;
1776
1777         dash = strchr(p, '-');
1778
1779         /* Don't allow initial dashes */
1780         if (dash == p)
1781                 return -EINVAL;
1782
1783         while (dash) {
1784                 _cleanup_free_ char *escaped = NULL;
1785                 char n[dash - p + sizeof(".slice")];
1786
1787                 /* Don't allow trailing or double dashes */
1788                 if (dash[1] == 0 || dash[1] == '-')
1789                         return -EINVAL;
1790
1791                 strcpy(stpncpy(n, p, dash - p), ".slice");
1792                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1793                         return -EINVAL;
1794
1795                 escaped = cg_escape(n);
1796                 if (!escaped)
1797                         return -ENOMEM;
1798
1799                 if (!strextend(&s, escaped, "/", NULL))
1800                         return -ENOMEM;
1801
1802                 dash = strchr(dash+1, '-');
1803         }
1804
1805         e = cg_escape(unit);
1806         if (!e)
1807                 return -ENOMEM;
1808
1809         if (!strextend(&s, e, NULL))
1810                 return -ENOMEM;
1811
1812         *ret = s;
1813         s = NULL;
1814
1815         return 0;
1816 }
1817
1818 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1819         _cleanup_free_ char *p = NULL;
1820         int r;
1821
1822         r = cg_get_path(controller, path, attribute, &p);
1823         if (r < 0)
1824                 return r;
1825
1826         return write_string_file(p, value, 0);
1827 }
1828
1829 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1830         _cleanup_free_ char *p = NULL;
1831         int r;
1832
1833         r = cg_get_path(controller, path, attribute, &p);
1834         if (r < 0)
1835                 return r;
1836
1837         return read_one_line_file(p, ret);
1838 }
1839
1840 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1841         CGroupController c;
1842         int r, unified;
1843
1844         /* This one will create a cgroup in our private tree, but also
1845          * duplicate it in the trees specified in mask, and remove it
1846          * in all others */
1847
1848         /* First create the cgroup in our own hierarchy. */
1849         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1850         if (r < 0)
1851                 return r;
1852
1853         /* If we are in the unified hierarchy, we are done now */
1854         unified = cg_unified();
1855         if (unified < 0)
1856                 return unified;
1857         if (unified > 0)
1858                 return 0;
1859
1860         /* Otherwise, do the same in the other hierarchies */
1861         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1862                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1863                 const char *n;
1864
1865                 n = cgroup_controller_to_string(c);
1866
1867                 if (mask & bit)
1868                         (void) cg_create(n, path);
1869                 else if (supported & bit)
1870                         (void) cg_trim(n, path, true);
1871         }
1872
1873         return 0;
1874 }
1875
1876 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1877         CGroupController c;
1878         int r, unified;
1879
1880         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1881         if (r < 0)
1882                 return r;
1883
1884         unified = cg_unified();
1885         if (unified < 0)
1886                 return unified;
1887         if (unified > 0)
1888                 return 0;
1889
1890         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1891                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1892                 const char *p = NULL;
1893
1894                 if (!(supported & bit))
1895                         continue;
1896
1897                 if (path_callback)
1898                         p = path_callback(bit, userdata);
1899
1900                 if (!p)
1901                         p = path;
1902
1903                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
1904         }
1905
1906         return 0;
1907 }
1908
1909 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1910         Iterator i;
1911         void *pidp;
1912         int r = 0;
1913
1914         SET_FOREACH(pidp, pids, i) {
1915                 pid_t pid = PTR_TO_PID(pidp);
1916                 int q;
1917
1918                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1919                 if (q < 0 && r >= 0)
1920                         r = q;
1921         }
1922
1923         return r;
1924 }
1925
1926 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1927         CGroupController c;
1928         int r = 0, unified;
1929
1930         if (!path_equal(from, to))  {
1931                 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true);
1932                 if (r < 0)
1933                         return r;
1934         }
1935
1936         unified = cg_unified();
1937         if (unified < 0)
1938                 return unified;
1939         if (unified > 0)
1940                 return r;
1941
1942         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1943                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1944                 const char *p = NULL;
1945
1946                 if (!(supported & bit))
1947                         continue;
1948
1949                 if (to_callback)
1950                         p = to_callback(bit, userdata);
1951
1952                 if (!p)
1953                         p = to;
1954
1955                 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, false, false);
1956         }
1957
1958         return 0;
1959 }
1960
1961 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
1962         CGroupController c;
1963         int r, unified;
1964
1965         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
1966         if (r < 0)
1967                 return r;
1968
1969         unified = cg_unified();
1970         if (unified < 0)
1971                 return unified;
1972         if (unified > 0)
1973                 return r;
1974
1975         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1976                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1977
1978                 if (!(supported & bit))
1979                         continue;
1980
1981                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
1982         }
1983
1984         return 0;
1985 }
1986
1987 int cg_mask_supported(CGroupMask *ret) {
1988         CGroupMask mask = 0;
1989         int r, unified;
1990
1991         /* Determines the mask of supported cgroup controllers. Only
1992          * includes controllers we can make sense of and that are
1993          * actually accessible. */
1994
1995         unified = cg_unified();
1996         if (unified < 0)
1997                 return unified;
1998         if (unified > 0) {
1999                 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2000                 const char *c;
2001
2002                 /* In the unified hierarchy we can read the supported
2003                  * and accessible controllers from a the top-level
2004                  * cgroup attribute */
2005
2006                 r = cg_get_root_path(&root);
2007                 if (r < 0)
2008                         return r;
2009
2010                 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2011                 if (r < 0)
2012                         return r;
2013
2014                 r = read_one_line_file(path, &controllers);
2015                 if (r < 0)
2016                         return r;
2017
2018                 c = controllers;
2019                 for (;;) {
2020                         _cleanup_free_ char *n = NULL;
2021                         CGroupController v;
2022
2023                         r = extract_first_word(&c, &n, NULL, 0);
2024                         if (r < 0)
2025                                 return r;
2026                         if (r == 0)
2027                                 break;
2028
2029                         v = cgroup_controller_from_string(n);
2030                         if (v < 0)
2031                                 continue;
2032
2033                         mask |= CGROUP_CONTROLLER_TO_MASK(v);
2034                 }
2035
2036                 /* Currently, we only support the memory and pids
2037                  * controller in the unified hierarchy, mask
2038                  * everything else off. */
2039                 mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_PIDS;
2040
2041         } else {
2042                 CGroupController c;
2043
2044                 /* In the legacy hierarchy, we check whether which
2045                  * hierarchies are mounted. */
2046
2047                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2048                         const char *n;
2049
2050                         n = cgroup_controller_to_string(c);
2051                         if (controller_is_accessible(n) >= 0)
2052                                 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2053                 }
2054         }
2055
2056         *ret = mask;
2057         return 0;
2058 }
2059
2060 int cg_kernel_controllers(Set *controllers) {
2061         _cleanup_fclose_ FILE *f = NULL;
2062         char buf[LINE_MAX];
2063         int r;
2064
2065         assert(controllers);
2066
2067         /* Determines the full list of kernel-known controllers. Might
2068          * include controllers we don't actually support, arbitrary
2069          * named hierarchies and controllers that aren't currently
2070          * accessible (because not mounted). */
2071
2072         f = fopen("/proc/cgroups", "re");
2073         if (!f) {
2074                 if (errno == ENOENT)
2075                         return 0;
2076                 return -errno;
2077         }
2078
2079         /* Ignore the header line */
2080         (void) fgets(buf, sizeof(buf), f);
2081
2082         for (;;) {
2083                 char *controller;
2084                 int enabled = 0;
2085
2086                 errno = 0;
2087                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2088
2089                         if (feof(f))
2090                                 break;
2091
2092                         if (ferror(f) && errno > 0)
2093                                 return -errno;
2094
2095                         return -EBADMSG;
2096                 }
2097
2098                 if (!enabled) {
2099                         free(controller);
2100                         continue;
2101                 }
2102
2103                 if (!cg_controller_is_valid(controller)) {
2104                         free(controller);
2105                         return -EBADMSG;
2106                 }
2107
2108                 r = set_consume(controllers, controller);
2109                 if (r < 0)
2110                         return r;
2111         }
2112
2113         return 0;
2114 }
2115
2116 static thread_local int unified_cache = -1;
2117
2118 int cg_unified(void) {
2119         struct statfs fs;
2120
2121         /* Checks if we support the unified hierarchy. Returns an
2122          * error when the cgroup hierarchies aren't mounted yet or we
2123          * have any other trouble determining if the unified hierarchy
2124          * is supported. */
2125
2126         if (unified_cache >= 0)
2127                 return unified_cache;
2128
2129         if (statfs("/sys/fs/cgroup/", &fs) < 0)
2130                 return -errno;
2131
2132         if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC))
2133                 unified_cache = true;
2134         else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2135                 unified_cache = false;
2136         else
2137                 return -ENOMEDIUM;
2138
2139         return unified_cache;
2140 }
2141
2142 void cg_unified_flush(void) {
2143         unified_cache = -1;
2144 }
2145
2146 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2147         _cleanup_free_ char *fs = NULL;
2148         CGroupController c;
2149         int r, unified;
2150
2151         assert(p);
2152
2153         if (supported == 0)
2154                 return 0;
2155
2156         unified = cg_unified();
2157         if (unified < 0)
2158                 return unified;
2159         if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2160                 return 0;
2161
2162         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2163         if (r < 0)
2164                 return r;
2165
2166         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2167                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2168                 const char *n;
2169
2170                 if (!(supported & bit))
2171                         continue;
2172
2173                 n = cgroup_controller_to_string(c);
2174                 {
2175                         char s[1 + strlen(n) + 1];
2176
2177                         s[0] = mask & bit ? '+' : '-';
2178                         strcpy(s + 1, n);
2179
2180                         r = write_string_file(fs, s, 0);
2181                         if (r < 0)
2182                                 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2183                 }
2184         }
2185
2186         return 0;
2187 }
2188
2189 bool cg_is_unified_wanted(void) {
2190         static thread_local int wanted = -1;
2191         int r, unified;
2192
2193         /* If the hierarchy is already mounted, then follow whatever
2194          * was chosen for it. */
2195         unified = cg_unified();
2196         if (unified >= 0)
2197                 return unified;
2198
2199         /* Otherwise, let's see what the kernel command line has to
2200          * say. Since checking that is expensive, let's cache the
2201          * result. */
2202         if (wanted >= 0)
2203                 return wanted;
2204
2205         r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2206         if (r > 0)
2207                 return (wanted = true);
2208         else {
2209                 _cleanup_free_ char *value = NULL;
2210
2211                 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2212                 if (r < 0)
2213                         return false;
2214                 if (r == 0)
2215                         return (wanted = false);
2216
2217                 return (wanted = parse_boolean(value) > 0);
2218         }
2219 }
2220
2221 bool cg_is_legacy_wanted(void) {
2222         return !cg_is_unified_wanted();
2223 }
2224
2225 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2226         uint64_t u;
2227         int r;
2228
2229         if (isempty(s)) {
2230                 *ret = CGROUP_CPU_SHARES_INVALID;
2231                 return 0;
2232         }
2233
2234         r = safe_atou64(s, &u);
2235         if (r < 0)
2236                 return r;
2237
2238         if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2239                 return -ERANGE;
2240
2241         *ret = u;
2242         return 0;
2243 }
2244
2245 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2246         uint64_t u;
2247         int r;
2248
2249         if (isempty(s)) {
2250                 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2251                 return 0;
2252         }
2253
2254         r = safe_atou64(s, &u);
2255         if (r < 0)
2256                 return r;
2257
2258         if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2259                 return -ERANGE;
2260
2261         *ret = u;
2262         return 0;
2263 }
2264
2265 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2266         [CGROUP_CONTROLLER_CPU] = "cpu",
2267         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2268         [CGROUP_CONTROLLER_BLKIO] = "blkio",
2269         [CGROUP_CONTROLLER_MEMORY] = "memory",
2270         [CGROUP_CONTROLLER_DEVICES] = "devices",
2271         [CGROUP_CONTROLLER_PIDS] = "pids",
2272 };
2273
2274 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);