src/basic/cgroup-util.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2010 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <errno.h>
  23 #include <unistd.h>
  24 #include <signal.h>
  25 #include <string.h>
  26 #include <stdlib.h>
  27 #include <dirent.h>
  28 #include <sys/stat.h>
  29 #include <sys/types.h>
  30 #include <ftw.h>
  31
  32 #include "set.h"
  33 #include "macro.h"
  34 #include "util.h"
  35 #include "formats-util.h"
  36 #include "process-util.h"
  37 #include "path-util.h"
  38 #include "unit-name.h"
  39 #include "fileio.h"
  40 #include "special.h"
  41 #include "mkdir.h"
  42 #include "login-util.h"
  43 #include "cgroup-util.h"
  44
  45 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
  46         _cleanup_free_ char *fs = NULL;
  47         FILE *f;
  48         int r;
  49
  50         assert(_f);
  51
  52         r = cg_get_path(controller, path, "cgroup.procs", &fs);
  53         if (r < 0)
  54                 return r;
  55
  56         f = fopen(fs, "re");
  57         if (!f)
  58                 return -errno;
  59
  60         *_f = f;
  61         return 0;
  62 }
  63
  64 int cg_read_pid(FILE *f, pid_t *_pid) {
  65         unsigned long ul;
  66
  67         /* Note that the cgroup.procs might contain duplicates! See
  68          * cgroups.txt for details. */
  69
  70         assert(f);
  71         assert(_pid);
  72
  73         errno = 0;
  74         if (fscanf(f, "%lu", &ul) != 1) {
  75
  76                 if (feof(f))
  77                         return 0;
  78
  79                 return errno ? -errno : -EIO;
  80         }
  81
  82         if (ul <= 0)
  83                 return -EIO;
  84
  85         *_pid = (pid_t) ul;
  86         return 1;
  87 }
  88
  89 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
  90         _cleanup_free_ char *fs = NULL;
  91         int r;
  92         DIR *d;
  93
  94         assert(_d);
  95
  96         /* This is not recursive! */
  97
  98         r = cg_get_path(controller, path, NULL, &fs);
  99         if (r < 0)
 100                 return r;
 101
 102         d = opendir(fs);
 103         if (!d)
 104                 return -errno;
 105
 106         *_d = d;
 107         return 0;
 108 }
 109
 110 int cg_read_subgroup(DIR *d, char **fn) {
 111         struct dirent *de;
 112
 113         assert(d);
 114         assert(fn);
 115
 116         FOREACH_DIRENT_ALL(de, d, return -errno) {
 117                 char *b;
 118
 119                 if (de->d_type != DT_DIR)
 120                         continue;
 121
 122                 if (streq(de->d_name, ".") ||
 123                     streq(de->d_name, ".."))
 124                         continue;
 125
 126                 b = strdup(de->d_name);
 127                 if (!b)
 128                         return -ENOMEM;
 129
 130                 *fn = b;
 131                 return 1;
 132         }
 133
 134         return 0;
 135 }
 136
 137 int cg_rmdir(const char *controller, const char *path) {
 138         _cleanup_free_ char *p = NULL;
 139         int r;
 140
 141         r = cg_get_path(controller, path, NULL, &p);
 142         if (r < 0)
 143                 return r;
 144
 145         r = rmdir(p);
 146         if (r < 0 && errno != ENOENT)
 147                 return -errno;
 148
 149         return 0;
 150 }
 151
 152 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
 153         _cleanup_set_free_ Set *allocated_set = NULL;
 154         bool done = false;
 155         int r, ret = 0;
 156         pid_t my_pid;
 157
 158         assert(sig >= 0);
 159
 160         /* This goes through the tasks list and kills them all. This
 161          * is repeated until no further processes are added to the
 162          * tasks list, to properly handle forking processes */
 163
 164         if (!s) {
 165                 s = allocated_set = set_new(NULL);
 166                 if (!s)
 167                         return -ENOMEM;
 168         }
 169
 170         my_pid = getpid();
 171
 172         do {
 173                 _cleanup_fclose_ FILE *f = NULL;
 174                 pid_t pid = 0;
 175                 done = true;
 176
 177                 r = cg_enumerate_processes(controller, path, &f);
 178                 if (r < 0) {
 179                         if (ret >= 0 && r != -ENOENT)
 180                                 return r;
 181
 182                         return ret;
 183                 }
 184
 185                 while ((r = cg_read_pid(f, &pid)) > 0) {
 186
 187                         if (ignore_self && pid == my_pid)
 188                                 continue;
 189
 190                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 191                                 continue;
 192
 193                         /* If we haven't killed this process yet, kill
 194                          * it */
 195                         if (kill(pid, sig) < 0) {
 196                                 if (ret >= 0 && errno != ESRCH)
 197                                         ret = -errno;
 198                         } else {
 199                                 if (sigcont && sig != SIGKILL)
 200                                         (void) kill(pid, SIGCONT);
 201
 202                                 if (ret == 0)
 203                                         ret = 1;
 204                         }
 205
 206                         done = false;
 207
 208                         r = set_put(s, PID_TO_PTR(pid));
 209                         if (r < 0) {
 210                                 if (ret >= 0)
 211                                         return r;
 212
 213                                 return ret;
 214                         }
 215                 }
 216
 217                 if (r < 0) {
 218                         if (ret >= 0)
 219                                 return r;
 220
 221                         return ret;
 222                 }
 223
 224                 /* To avoid racing against processes which fork
 225                  * quicker than we can kill them we repeat this until
 226                  * no new pids need to be killed. */
 227
 228         } while (!done);
 229
 230         return ret;
 231 }
 232
 233 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
 234         _cleanup_set_free_ Set *allocated_set = NULL;
 235         _cleanup_closedir_ DIR *d = NULL;
 236         int r, ret;
 237         char *fn;
 238
 239         assert(path);
 240         assert(sig >= 0);
 241
 242         if (!s) {
 243                 s = allocated_set = set_new(NULL);
 244                 if (!s)
 245                         return -ENOMEM;
 246         }
 247
 248         ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
 249
 250         r = cg_enumerate_subgroups(controller, path, &d);
 251         if (r < 0) {
 252                 if (ret >= 0 && r != -ENOENT)
 253                         return r;
 254
 255                 return ret;
 256         }
 257
 258         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 259                 _cleanup_free_ char *p = NULL;
 260
 261                 p = strjoin(path, "/", fn, NULL);
 262                 free(fn);
 263                 if (!p)
 264                         return -ENOMEM;
 265
 266                 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
 267                 if (r != 0 && ret >= 0)
 268                         ret = r;
 269         }
 270
 271         if (ret >= 0 && r < 0)
 272                 ret = r;
 273
 274         if (rem) {
 275                 r = cg_rmdir(controller, path);
 276                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 277                         return r;
 278         }
 279
 280         return ret;
 281 }
 282
 283 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
 284         bool done = false;
 285         _cleanup_set_free_ Set *s = NULL;
 286         int r, ret = 0;
 287         pid_t my_pid;
 288
 289         assert(cfrom);
 290         assert(pfrom);
 291         assert(cto);
 292         assert(pto);
 293
 294         s = set_new(NULL);
 295         if (!s)
 296                 return -ENOMEM;
 297
 298         my_pid = getpid();
 299
 300         do {
 301                 _cleanup_fclose_ FILE *f = NULL;
 302                 pid_t pid = 0;
 303                 done = true;
 304
 305                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 306                 if (r < 0) {
 307                         if (ret >= 0 && r != -ENOENT)
 308                                 return r;
 309
 310                         return ret;
 311                 }
 312
 313                 while ((r = cg_read_pid(f, &pid)) > 0) {
 314
 315                         /* This might do weird stuff if we aren't a
 316                          * single-threaded program. However, we
 317                          * luckily know we are not */
 318                         if (ignore_self && pid == my_pid)
 319                                 continue;
 320
 321                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 322                                 continue;
 323
 324                         /* Ignore kernel threads. Since they can only
 325                          * exist in the root cgroup, we only check for
 326                          * them there. */
 327                         if (cfrom &&
 328                             (isempty(pfrom) || path_equal(pfrom, "/")) &&
 329                             is_kernel_thread(pid) > 0)
 330                                 continue;
 331
 332                         r = cg_attach(cto, pto, pid);
 333                         if (r < 0) {
 334                                 if (ret >= 0 && r != -ESRCH)
 335                                         ret = r;
 336                         } else if (ret == 0)
 337                                 ret = 1;
 338
 339                         done = false;
 340
 341                         r = set_put(s, PID_TO_PTR(pid));
 342                         if (r < 0) {
 343                                 if (ret >= 0)
 344                                         return r;
 345
 346                                 return ret;
 347                         }
 348                 }
 349
 350                 if (r < 0) {
 351                         if (ret >= 0)
 352                                 return r;
 353
 354                         return ret;
 355                 }
 356         } while (!done);
 357
 358         return ret;
 359 }
 360
 361 int cg_migrate_recursive(
 362                 const char *cfrom,
 363                 const char *pfrom,
 364                 const char *cto,
 365                 const char *pto,
 366                 bool ignore_self,
 367                 bool rem) {
 368
 369         _cleanup_closedir_ DIR *d = NULL;
 370         int r, ret = 0;
 371         char *fn;
 372
 373         assert(cfrom);
 374         assert(pfrom);
 375         assert(cto);
 376         assert(pto);
 377
 378         ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
 379
 380         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 381         if (r < 0) {
 382                 if (ret >= 0 && r != -ENOENT)
 383                         return r;
 384
 385                 return ret;
 386         }
 387
 388         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 389                 _cleanup_free_ char *p = NULL;
 390
 391                 p = strjoin(pfrom, "/", fn, NULL);
 392                 free(fn);
 393                 if (!p)
 394                         return -ENOMEM;
 395
 396                 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
 397                 if (r != 0 && ret >= 0)
 398                         ret = r;
 399         }
 400
 401         if (r < 0 && ret >= 0)
 402                 ret = r;
 403
 404         if (rem) {
 405                 r = cg_rmdir(cfrom, pfrom);
 406                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 407                         return r;
 408         }
 409
 410         return ret;
 411 }
 412
 413 int cg_migrate_recursive_fallback(
 414                 const char *cfrom,
 415                 const char *pfrom,
 416                 const char *cto,
 417                 const char *pto,
 418                 bool ignore_self,
 419                 bool rem) {
 420
 421         int r;
 422
 423         assert(cfrom);
 424         assert(pfrom);
 425         assert(cto);
 426         assert(pto);
 427
 428         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem);
 429         if (r < 0) {
 430                 char prefix[strlen(pto) + 1];
 431
 432                 /* This didn't work? Then let's try all prefixes of the destination */
 433
 434                 PATH_FOREACH_PREFIX(prefix, pto) {
 435                         int q;
 436
 437                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem);
 438                         if (q >= 0)
 439                                 return q;
 440                 }
 441         }
 442
 443         return r;
 444 }
 445
 446 static const char *controller_to_dirname(const char *controller) {
 447         const char *e;
 448
 449         assert(controller);
 450
 451         /* Converts a controller name to the directory name below
 452          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
 453          * just cuts off the name= prefixed used for named
 454          * hierarchies, if it is specified. */
 455
 456         e = startswith(controller, "name=");
 457         if (e)
 458                 return e;
 459
 460         return controller;
 461 }
 462
 463 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
 464         const char *dn;
 465         char *t = NULL;
 466
 467         assert(fs);
 468         assert(controller);
 469
 470         dn = controller_to_dirname(controller);
 471
 472         if (isempty(path) && isempty(suffix))
 473                 t = strappend("/sys/fs/cgroup/", dn);
 474         else if (isempty(path))
 475                 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL);
 476         else if (isempty(suffix))
 477                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL);
 478         else
 479                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL);
 480         if (!t)
 481                 return -ENOMEM;
 482
 483         *fs = t;
 484         return 0;
 485 }
 486
 487 static int join_path_unified(const char *path, const char *suffix, char **fs) {
 488         char *t;
 489
 490         assert(fs);
 491
 492         if (isempty(path) && isempty(suffix))
 493                 t = strdup("/sys/fs/cgroup");
 494         else if (isempty(path))
 495                 t = strappend("/sys/fs/cgroup/", suffix);
 496         else if (isempty(suffix))
 497                 t = strappend("/sys/fs/cgroup/", path);
 498         else
 499                 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
 500         if (!t)
 501                 return -ENOMEM;
 502
 503         *fs = t;
 504         return 0;
 505 }
 506
 507 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
 508         int unified, r;
 509
 510         assert(fs);
 511
 512         if (!controller) {
 513                 char *t;
 514
 515                 /* If no controller is specified, we return the path
 516                  * *below* the controllers, without any prefix. */
 517
 518                 if (!path && !suffix)
 519                         return -EINVAL;
 520
 521                 if (isempty(suffix))
 522                         t = strdup(path);
 523                 else if (isempty(path))
 524                         t = strdup(suffix);
 525                 else
 526                         t = strjoin(path, "/", suffix, NULL);
 527                 if (!t)
 528                         return -ENOMEM;
 529
 530                 *fs = path_kill_slashes(t);
 531                 return 0;
 532         }
 533
 534         if (!cg_controller_is_valid(controller))
 535                 return -EINVAL;
 536
 537         unified = cg_unified();
 538         if (unified < 0)
 539                 return unified;
 540
 541         if (unified > 0)
 542                 r = join_path_unified(path, suffix, fs);
 543         else
 544                 r = join_path_legacy(controller, path, suffix, fs);
 545         if (r < 0)
 546                 return r;
 547
 548         path_kill_slashes(*fs);
 549         return 0;
 550 }
 551
 552 static int controller_is_accessible(const char *controller) {
 553         int unified;
 554
 555         assert(controller);
 556
 557         /* Checks whether a specific controller is accessible,
 558          * i.e. its hierarchy mounted. In the unified hierarchy all
 559          * controllers are considered accessible, except for the named
 560          * hierarchies */
 561
 562         if (!cg_controller_is_valid(controller))
 563                 return -EINVAL;
 564
 565         unified = cg_unified();
 566         if (unified < 0)
 567                 return unified;
 568         if (unified > 0) {
 569                 /* We don't support named hierarchies if we are using
 570                  * the unified hierarchy. */
 571
 572                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
 573                         return 0;
 574
 575                 if (startswith(controller, "name="))
 576                         return -EOPNOTSUPP;
 577
 578         } else {
 579                 const char *cc, *dn;
 580
 581                 dn = controller_to_dirname(controller);
 582                 cc = strjoina("/sys/fs/cgroup/", dn);
 583
 584                 if (laccess(cc, F_OK) < 0)
 585                         return -errno;
 586         }
 587
 588         return 0;
 589 }
 590
 591 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
 592         int r;
 593
 594         assert(controller);
 595         assert(fs);
 596
 597         /* Check if the specified controller is actually accessible */
 598         r = controller_is_accessible(controller);
 599         if (r < 0)
 600                 return r;
 601
 602         return cg_get_path(controller, path, suffix, fs);
 603 }
 604
 605 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 606         assert(path);
 607         assert(sb);
 608         assert(ftwbuf);
 609
 610         if (typeflag != FTW_DP)
 611                 return 0;
 612
 613         if (ftwbuf->level < 1)
 614                 return 0;
 615
 616         (void) rmdir(path);
 617         return 0;
 618 }
 619
 620 int cg_trim(const char *controller, const char *path, bool delete_root) {
 621         _cleanup_free_ char *fs = NULL;
 622         int r = 0;
 623
 624         assert(path);
 625
 626         r = cg_get_path(controller, path, NULL, &fs);
 627         if (r < 0)
 628                 return r;
 629
 630         errno = 0;
 631         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 632                 if (errno == ENOENT)
 633                         r = 0;
 634                 else if (errno != 0)
 635                         r = -errno;
 636                 else
 637                         r = -EIO;
 638         }
 639
 640         if (delete_root) {
 641                 if (rmdir(fs) < 0 && errno != ENOENT)
 642                         return -errno;
 643         }
 644
 645         return r;
 646 }
 647
 648 int cg_create(const char *controller, const char *path) {
 649         _cleanup_free_ char *fs = NULL;
 650         int r;
 651
 652         r = cg_get_path_and_check(controller, path, NULL, &fs);
 653         if (r < 0)
 654                 return r;
 655
 656         r = mkdir_parents(fs, 0755);
 657         if (r < 0)
 658                 return r;
 659
 660         if (mkdir(fs, 0755) < 0) {
 661
 662                 if (errno == EEXIST)
 663                         return 0;
 664
 665                 return -errno;
 666         }
 667
 668         return 1;
 669 }
 670
 671 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 672         int r, q;
 673
 674         assert(pid >= 0);
 675
 676         r = cg_create(controller, path);
 677         if (r < 0)
 678                 return r;
 679
 680         q = cg_attach(controller, path, pid);
 681         if (q < 0)
 682                 return q;
 683
 684         /* This does not remove the cgroup on failure */
 685         return r;
 686 }
 687
 688 int cg_attach(const char *controller, const char *path, pid_t pid) {
 689         _cleanup_free_ char *fs = NULL;
 690         char c[DECIMAL_STR_MAX(pid_t) + 2];
 691         int r;
 692
 693         assert(path);
 694         assert(pid >= 0);
 695
 696         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 697         if (r < 0)
 698                 return r;
 699
 700         if (pid == 0)
 701                 pid = getpid();
 702
 703         snprintf(c, sizeof(c), PID_FMT"\n", pid);
 704
 705         return write_string_file(fs, c, 0);
 706 }
 707
 708 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 709         int r;
 710
 711         assert(controller);
 712         assert(path);
 713         assert(pid >= 0);
 714
 715         r = cg_attach(controller, path, pid);
 716         if (r < 0) {
 717                 char prefix[strlen(path) + 1];
 718
 719                 /* This didn't work? Then let's try all prefixes of
 720                  * the destination */
 721
 722                 PATH_FOREACH_PREFIX(prefix, path) {
 723                         int q;
 724
 725                         q = cg_attach(controller, prefix, pid);
 726                         if (q >= 0)
 727                                 return q;
 728                 }
 729         }
 730
 731         return r;
 732 }
 733
 734 int cg_set_group_access(
 735                 const char *controller,
 736                 const char *path,
 737                 mode_t mode,
 738                 uid_t uid,
 739                 gid_t gid) {
 740
 741         _cleanup_free_ char *fs = NULL;
 742         int r;
 743
 744         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 745                 return 0;
 746
 747         if (mode != MODE_INVALID)
 748                 mode &= 0777;
 749
 750         r = cg_get_path(controller, path, NULL, &fs);
 751         if (r < 0)
 752                 return r;
 753
 754         return chmod_and_chown(fs, mode, uid, gid);
 755 }
 756
 757 int cg_set_task_access(
 758                 const char *controller,
 759                 const char *path,
 760                 mode_t mode,
 761                 uid_t uid,
 762                 gid_t gid) {
 763
 764         _cleanup_free_ char *fs = NULL, *procs = NULL;
 765         int r, unified;
 766
 767         assert(path);
 768
 769         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 770                 return 0;
 771
 772         if (mode != MODE_INVALID)
 773                 mode &= 0666;
 774
 775         r = cg_get_path(controller, path, "cgroup.procs", &fs);
 776         if (r < 0)
 777                 return r;
 778
 779         r = chmod_and_chown(fs, mode, uid, gid);
 780         if (r < 0)
 781                 return r;
 782
 783         unified = cg_unified();
 784         if (unified < 0)
 785                 return unified;
 786         if (unified)
 787                 return 0;
 788
 789         /* Compatibility, Always keep values for "tasks" in sync with
 790          * "cgroup.procs" */
 791         if (cg_get_path(controller, path, "tasks", &procs) >= 0)
 792                 (void) chmod_and_chown(procs, mode, uid, gid);
 793
 794         return 0;
 795 }
 796
 797 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
 798         _cleanup_fclose_ FILE *f = NULL;
 799         char line[LINE_MAX];
 800         const char *fs;
 801         size_t cs = 0;
 802         int unified;
 803
 804         assert(path);
 805         assert(pid >= 0);
 806
 807         unified = cg_unified();
 808         if (unified < 0)
 809                 return unified;
 810         if (unified == 0) {
 811                 if (controller) {
 812                         if (!cg_controller_is_valid(controller))
 813                                 return -EINVAL;
 814                 } else
 815                         controller = SYSTEMD_CGROUP_CONTROLLER;
 816
 817                 cs = strlen(controller);
 818         }
 819
 820         fs = procfs_file_alloca(pid, "cgroup");
 821         f = fopen(fs, "re");
 822         if (!f)
 823                 return errno == ENOENT ? -ESRCH : -errno;
 824
 825         FOREACH_LINE(line, f, return -errno) {
 826                 char *e, *p;
 827
 828                 truncate_nl(line);
 829
 830                 if (unified) {
 831                         e = startswith(line, "0:");
 832                         if (!e)
 833                                 continue;
 834
 835                         e = strchr(e, ':');
 836                         if (!e)
 837                                 continue;
 838                 } else {
 839                         char *l;
 840                         size_t k;
 841                         const char *word, *state;
 842                         bool found = false;
 843
 844                         l = strchr(line, ':');
 845                         if (!l)
 846                                 continue;
 847
 848                         l++;
 849                         e = strchr(l, ':');
 850                         if (!e)
 851                                 continue;
 852
 853                         *e = 0;
 854                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
 855                                 if (k == cs && memcmp(word, controller, cs) == 0) {
 856                                         found = true;
 857                                         break;
 858                                 }
 859                         }
 860
 861                         if (!found)
 862                                 continue;
 863                 }
 864
 865                 p = strdup(e + 1);
 866                 if (!p)
 867                         return -ENOMEM;
 868
 869                 *path = p;
 870                 return 0;
 871         }
 872
 873         return -ENOENT;
 874 }
 875
 876 int cg_install_release_agent(const char *controller, const char *agent) {
 877         _cleanup_free_ char *fs = NULL, *contents = NULL;
 878         const char *sc;
 879         int r, unified;
 880
 881         assert(agent);
 882
 883         unified = cg_unified();
 884         if (unified < 0)
 885                 return unified;
 886         if (unified) /* doesn't apply to unified hierarchy */
 887                 return -EOPNOTSUPP;
 888
 889         r = cg_get_path(controller, NULL, "release_agent", &fs);
 890         if (r < 0)
 891                 return r;
 892
 893         r = read_one_line_file(fs, &contents);
 894         if (r < 0)
 895                 return r;
 896
 897         sc = strstrip(contents);
 898         if (isempty(sc)) {
 899                 r = write_string_file(fs, agent, 0);
 900                 if (r < 0)
 901                         return r;
 902         } else if (!path_equal(sc, agent))
 903                 return -EEXIST;
 904
 905         fs = mfree(fs);
 906         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 907         if (r < 0)
 908                 return r;
 909
 910         contents = mfree(contents);
 911         r = read_one_line_file(fs, &contents);
 912         if (r < 0)
 913                 return r;
 914
 915         sc = strstrip(contents);
 916         if (streq(sc, "0")) {
 917                 r = write_string_file(fs, "1", 0);
 918                 if (r < 0)
 919                         return r;
 920
 921                 return 1;
 922         }
 923
 924         if (!streq(sc, "1"))
 925                 return -EIO;
 926
 927         return 0;
 928 }
 929
 930 int cg_uninstall_release_agent(const char *controller) {
 931         _cleanup_free_ char *fs = NULL;
 932         int r, unified;
 933
 934         unified = cg_unified();
 935         if (unified < 0)
 936                 return unified;
 937         if (unified) /* Doesn't apply to unified hierarchy */
 938                 return -EOPNOTSUPP;
 939
 940         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 941         if (r < 0)
 942                 return r;
 943
 944         r = write_string_file(fs, "0", 0);
 945         if (r < 0)
 946                 return r;
 947
 948         fs = mfree(fs);
 949
 950         r = cg_get_path(controller, NULL, "release_agent", &fs);
 951         if (r < 0)
 952                 return r;
 953
 954         r = write_string_file(fs, "", 0);
 955         if (r < 0)
 956                 return r;
 957
 958         return 0;
 959 }
 960
 961 int cg_is_empty(const char *controller, const char *path) {
 962         _cleanup_fclose_ FILE *f = NULL;
 963         pid_t pid;
 964         int r;
 965
 966         assert(path);
 967
 968         r = cg_enumerate_processes(controller, path, &f);
 969         if (r == -ENOENT)
 970                 return 1;
 971         if (r < 0)
 972                 return r;
 973
 974         r = cg_read_pid(f, &pid);
 975         if (r < 0)
 976                 return r;
 977
 978         return r == 0;
 979 }
 980
 981 int cg_is_empty_recursive(const char *controller, const char *path) {
 982         int unified, r;
 983
 984         assert(path);
 985
 986         /* The root cgroup is always populated */
 987         if (controller && (isempty(path) || path_equal(path, "/")))
 988                 return false;
 989
 990         unified = cg_unified();
 991         if (unified < 0)
 992                 return unified;
 993
 994         if (unified > 0) {
 995                 _cleanup_free_ char *populated = NULL, *t = NULL;
 996
 997                 /* On the unified hierarchy we can check empty state
 998                  * via the "cgroup.populated" attribute. */
 999
1000                 r = cg_get_path(controller, path, "cgroup.populated", &populated);
1001                 if (r < 0)
1002                         return r;
1003
1004                 r = read_one_line_file(populated, &t);
1005                 if (r < 0)
1006                         return r;
1007
1008                 return streq(t, "0");
1009         } else {
1010                 _cleanup_closedir_ DIR *d = NULL;
1011                 char *fn;
1012
1013                 r = cg_is_empty(controller, path);
1014                 if (r <= 0)
1015                         return r;
1016
1017                 r = cg_enumerate_subgroups(controller, path, &d);
1018                 if (r == -ENOENT)
1019                         return 1;
1020                 if (r < 0)
1021                         return r;
1022
1023                 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1024                         _cleanup_free_ char *p = NULL;
1025
1026                         p = strjoin(path, "/", fn, NULL);
1027                         free(fn);
1028                         if (!p)
1029                                 return -ENOMEM;
1030
1031                         r = cg_is_empty_recursive(controller, p);
1032                         if (r <= 0)
1033                                 return r;
1034                 }
1035                 if (r < 0)
1036                         return r;
1037
1038                 return true;
1039         }
1040 }
1041
1042 int cg_split_spec(const char *spec, char **controller, char **path) {
1043         char *t = NULL, *u = NULL;
1044         const char *e;
1045
1046         assert(spec);
1047
1048         if (*spec == '/') {
1049                 if (!path_is_safe(spec))
1050                         return -EINVAL;
1051
1052                 if (path) {
1053                         t = strdup(spec);
1054                         if (!t)
1055                                 return -ENOMEM;
1056
1057                         *path = path_kill_slashes(t);
1058                 }
1059
1060                 if (controller)
1061                         *controller = NULL;
1062
1063                 return 0;
1064         }
1065
1066         e = strchr(spec, ':');
1067         if (!e) {
1068                 if (!cg_controller_is_valid(spec))
1069                         return -EINVAL;
1070
1071                 if (controller) {
1072                         t = strdup(spec);
1073                         if (!t)
1074                                 return -ENOMEM;
1075
1076                         *controller = t;
1077                 }
1078
1079                 if (path)
1080                         *path = NULL;
1081
1082                 return 0;
1083         }
1084
1085         t = strndup(spec, e-spec);
1086         if (!t)
1087                 return -ENOMEM;
1088         if (!cg_controller_is_valid(t)) {
1089                 free(t);
1090                 return -EINVAL;
1091         }
1092
1093         if (isempty(e+1))
1094                 u = NULL;
1095         else {
1096                 u = strdup(e+1);
1097                 if (!u) {
1098                         free(t);
1099                         return -ENOMEM;
1100                 }
1101
1102                 if (!path_is_safe(u) ||
1103                     !path_is_absolute(u)) {
1104                         free(t);
1105                         free(u);
1106                         return -EINVAL;
1107                 }
1108
1109                 path_kill_slashes(u);
1110         }
1111
1112         if (controller)
1113                 *controller = t;
1114         else
1115                 free(t);
1116
1117         if (path)
1118                 *path = u;
1119         else
1120                 free(u);
1121
1122         return 0;
1123 }
1124
1125 int cg_mangle_path(const char *path, char **result) {
1126         _cleanup_free_ char *c = NULL, *p = NULL;
1127         char *t;
1128         int r;
1129
1130         assert(path);
1131         assert(result);
1132
1133         /* First, check if it already is a filesystem path */
1134         if (path_startswith(path, "/sys/fs/cgroup")) {
1135
1136                 t = strdup(path);
1137                 if (!t)
1138                         return -ENOMEM;
1139
1140                 *result = path_kill_slashes(t);
1141                 return 0;
1142         }
1143
1144         /* Otherwise, treat it as cg spec */
1145         r = cg_split_spec(path, &c, &p);
1146         if (r < 0)
1147                 return r;
1148
1149         return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1150 }
1151
1152 int cg_get_root_path(char **path) {
1153         char *p, *e;
1154         int r;
1155
1156         assert(path);
1157
1158         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1159         if (r < 0)
1160                 return r;
1161
1162         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1163         if (!e)
1164                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1165         if (!e)
1166                 e = endswith(p, "/system"); /* even more legacy */
1167         if (e)
1168                 *e = 0;
1169
1170         *path = p;
1171         return 0;
1172 }
1173
1174 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1175         _cleanup_free_ char *rt = NULL;
1176         char *p;
1177         int r;
1178
1179         assert(cgroup);
1180         assert(shifted);
1181
1182         if (!root) {
1183                 /* If the root was specified let's use that, otherwise
1184                  * let's determine it from PID 1 */
1185
1186                 r = cg_get_root_path(&rt);
1187                 if (r < 0)
1188                         return r;
1189
1190                 root = rt;
1191         }
1192
1193         p = path_startswith(cgroup, root);
1194         if (p && p > cgroup)
1195                 *shifted = p - 1;
1196         else
1197                 *shifted = cgroup;
1198
1199         return 0;
1200 }
1201
1202 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1203         _cleanup_free_ char *raw = NULL;
1204         const char *c;
1205         int r;
1206
1207         assert(pid >= 0);
1208         assert(cgroup);
1209
1210         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1211         if (r < 0)
1212                 return r;
1213
1214         r = cg_shift_path(raw, root, &c);
1215         if (r < 0)
1216                 return r;
1217
1218         if (c == raw) {
1219                 *cgroup = raw;
1220                 raw = NULL;
1221         } else {
1222                 char *n;
1223
1224                 n = strdup(c);
1225                 if (!n)
1226                         return -ENOMEM;
1227
1228                 *cgroup = n;
1229         }
1230
1231         return 0;
1232 }
1233
1234 int cg_path_decode_unit(const char *cgroup, char **unit){
1235         char *c, *s;
1236         size_t n;
1237
1238         assert(cgroup);
1239         assert(unit);
1240
1241         n = strcspn(cgroup, "/");
1242         if (n < 3)
1243                 return -ENXIO;
1244
1245         c = strndupa(cgroup, n);
1246         c = cg_unescape(c);
1247
1248         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1249                 return -ENXIO;
1250
1251         s = strdup(c);
1252         if (!s)
1253                 return -ENOMEM;
1254
1255         *unit = s;
1256         return 0;
1257 }
1258
1259 static bool valid_slice_name(const char *p, size_t n) {
1260
1261         if (!p)
1262                 return false;
1263
1264         if (n < strlen("x.slice"))
1265                 return false;
1266
1267         if (memcmp(p + n - 6, ".slice", 6) == 0) {
1268                 char buf[n+1], *c;
1269
1270                 memcpy(buf, p, n);
1271                 buf[n] = 0;
1272
1273                 c = cg_unescape(buf);
1274
1275                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1276         }
1277
1278         return false;
1279 }
1280
1281 static const char *skip_slices(const char *p) {
1282         assert(p);
1283
1284         /* Skips over all slice assignments */
1285
1286         for (;;) {
1287                 size_t n;
1288
1289                 p += strspn(p, "/");
1290
1291                 n = strcspn(p, "/");
1292                 if (!valid_slice_name(p, n))
1293                         return p;
1294
1295                 p += n;
1296         }
1297 }
1298
1299 int cg_path_get_unit(const char *path, char **ret) {
1300         const char *e;
1301         char *unit;
1302         int r;
1303
1304         assert(path);
1305         assert(ret);
1306
1307         e = skip_slices(path);
1308
1309         r = cg_path_decode_unit(e, &unit);
1310         if (r < 0)
1311                 return r;
1312
1313         /* We skipped over the slices, don't accept any now */
1314         if (endswith(unit, ".slice")) {
1315                 free(unit);
1316                 return -ENXIO;
1317         }
1318
1319         *ret = unit;
1320         return 0;
1321 }
1322
1323 int cg_pid_get_unit(pid_t pid, char **unit) {
1324         _cleanup_free_ char *cgroup = NULL;
1325         int r;
1326
1327         assert(unit);
1328
1329         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1330         if (r < 0)
1331                 return r;
1332
1333         return cg_path_get_unit(cgroup, unit);
1334 }
1335
1336 /**
1337  * Skip session-*.scope, but require it to be there.
1338  */
1339 static const char *skip_session(const char *p) {
1340         size_t n;
1341
1342         if (isempty(p))
1343                 return NULL;
1344
1345         p += strspn(p, "/");
1346
1347         n = strcspn(p, "/");
1348         if (n < strlen("session-x.scope"))
1349                 return NULL;
1350
1351         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1352                 char buf[n - 8 - 6 + 1];
1353
1354                 memcpy(buf, p + 8, n - 8 - 6);
1355                 buf[n - 8 - 6] = 0;
1356
1357                 /* Note that session scopes never need unescaping,
1358                  * since they cannot conflict with the kernel's own
1359                  * names, hence we don't need to call cg_unescape()
1360                  * here. */
1361
1362                 if (!session_id_valid(buf))
1363                         return false;
1364
1365                 p += n;
1366                 p += strspn(p, "/");
1367                 return p;
1368         }
1369
1370         return NULL;
1371 }
1372
1373 /**
1374  * Skip user@*.service, but require it to be there.
1375  */
1376 static const char *skip_user_manager(const char *p) {
1377         size_t n;
1378
1379         if (isempty(p))
1380                 return NULL;
1381
1382         p += strspn(p, "/");
1383
1384         n = strcspn(p, "/");
1385         if (n < strlen("user@x.service"))
1386                 return NULL;
1387
1388         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1389                 char buf[n - 5 - 8 + 1];
1390
1391                 memcpy(buf, p + 5, n - 5 - 8);
1392                 buf[n - 5 - 8] = 0;
1393
1394                 /* Note that user manager services never need unescaping,
1395                  * since they cannot conflict with the kernel's own
1396                  * names, hence we don't need to call cg_unescape()
1397                  * here. */
1398
1399                 if (parse_uid(buf, NULL) < 0)
1400                         return NULL;
1401
1402                 p += n;
1403                 p += strspn(p, "/");
1404
1405                 return p;
1406         }
1407
1408         return NULL;
1409 }
1410
1411 static const char *skip_user_prefix(const char *path) {
1412         const char *e, *t;
1413
1414         assert(path);
1415
1416         /* Skip slices, if there are any */
1417         e = skip_slices(path);
1418
1419         /* Skip the user manager, if it's in the path now... */
1420         t = skip_user_manager(e);
1421         if (t)
1422                 return t;
1423
1424         /* Alternatively skip the user session if it is in the path... */
1425         return skip_session(e);
1426 }
1427
1428 int cg_path_get_user_unit(const char *path, char **ret) {
1429         const char *t;
1430
1431         assert(path);
1432         assert(ret);
1433
1434         t = skip_user_prefix(path);
1435         if (!t)
1436                 return -ENXIO;
1437
1438         /* And from here on it looks pretty much the same as for a
1439          * system unit, hence let's use the same parser from here
1440          * on. */
1441         return cg_path_get_unit(t, ret);
1442 }
1443
1444 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1445         _cleanup_free_ char *cgroup = NULL;
1446         int r;
1447
1448         assert(unit);
1449
1450         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1451         if (r < 0)
1452                 return r;
1453
1454         return cg_path_get_user_unit(cgroup, unit);
1455 }
1456
1457 int cg_path_get_machine_name(const char *path, char **machine) {
1458         _cleanup_free_ char *u = NULL;
1459         const char *sl;
1460         int r;
1461
1462         r = cg_path_get_unit(path, &u);
1463         if (r < 0)
1464                 return r;
1465
1466         sl = strjoina("/run/systemd/machines/unit:", u);
1467         return readlink_malloc(sl, machine);
1468 }
1469
1470 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1471         _cleanup_free_ char *cgroup = NULL;
1472         int r;
1473
1474         assert(machine);
1475
1476         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1477         if (r < 0)
1478                 return r;
1479
1480         return cg_path_get_machine_name(cgroup, machine);
1481 }
1482
1483 int cg_path_get_session(const char *path, char **session) {
1484         _cleanup_free_ char *unit = NULL;
1485         char *start, *end;
1486         int r;
1487
1488         assert(path);
1489
1490         r = cg_path_get_unit(path, &unit);
1491         if (r < 0)
1492                 return r;
1493
1494         start = startswith(unit, "session-");
1495         if (!start)
1496                 return -ENXIO;
1497         end = endswith(start, ".scope");
1498         if (!end)
1499                 return -ENXIO;
1500
1501         *end = 0;
1502         if (!session_id_valid(start))
1503                 return -ENXIO;
1504
1505         if (session) {
1506                 char *rr;
1507
1508                 rr = strdup(start);
1509                 if (!rr)
1510                         return -ENOMEM;
1511
1512                 *session = rr;
1513         }
1514
1515         return 0;
1516 }
1517
1518 int cg_pid_get_session(pid_t pid, char **session) {
1519         _cleanup_free_ char *cgroup = NULL;
1520         int r;
1521
1522         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1523         if (r < 0)
1524                 return r;
1525
1526         return cg_path_get_session(cgroup, session);
1527 }
1528
1529 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1530         _cleanup_free_ char *slice = NULL;
1531         char *start, *end;
1532         int r;
1533
1534         assert(path);
1535
1536         r = cg_path_get_slice(path, &slice);
1537         if (r < 0)
1538                 return r;
1539
1540         start = startswith(slice, "user-");
1541         if (!start)
1542                 return -ENXIO;
1543         end = endswith(start, ".slice");
1544         if (!end)
1545                 return -ENXIO;
1546
1547         *end = 0;
1548         if (parse_uid(start, uid) < 0)
1549                 return -ENXIO;
1550
1551         return 0;
1552 }
1553
1554 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1555         _cleanup_free_ char *cgroup = NULL;
1556         int r;
1557
1558         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1559         if (r < 0)
1560                 return r;
1561
1562         return cg_path_get_owner_uid(cgroup, uid);
1563 }
1564
1565 int cg_path_get_slice(const char *p, char **slice) {
1566         const char *e = NULL;
1567
1568         assert(p);
1569         assert(slice);
1570
1571         /* Finds the right-most slice unit from the beginning, but
1572          * stops before we come to the first non-slice unit. */
1573
1574         for (;;) {
1575                 size_t n;
1576
1577                 p += strspn(p, "/");
1578
1579                 n = strcspn(p, "/");
1580                 if (!valid_slice_name(p, n)) {
1581
1582                         if (!e) {
1583                                 char *s;
1584
1585                                 s = strdup("-.slice");
1586                                 if (!s)
1587                                         return -ENOMEM;
1588
1589                                 *slice = s;
1590                                 return 0;
1591                         }
1592
1593                         return cg_path_decode_unit(e, slice);
1594                 }
1595
1596                 e = p;
1597                 p += n;
1598         }
1599 }
1600
1601 int cg_pid_get_slice(pid_t pid, char **slice) {
1602         _cleanup_free_ char *cgroup = NULL;
1603         int r;
1604
1605         assert(slice);
1606
1607         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1608         if (r < 0)
1609                 return r;
1610
1611         return cg_path_get_slice(cgroup, slice);
1612 }
1613
1614 int cg_path_get_user_slice(const char *p, char **slice) {
1615         const char *t;
1616         assert(p);
1617         assert(slice);
1618
1619         t = skip_user_prefix(p);
1620         if (!t)
1621                 return -ENXIO;
1622
1623         /* And now it looks pretty much the same as for a system
1624          * slice, so let's just use the same parser from here on. */
1625         return cg_path_get_slice(t, slice);
1626 }
1627
1628 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1629         _cleanup_free_ char *cgroup = NULL;
1630         int r;
1631
1632         assert(slice);
1633
1634         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1635         if (r < 0)
1636                 return r;
1637
1638         return cg_path_get_user_slice(cgroup, slice);
1639 }
1640
1641 char *cg_escape(const char *p) {
1642         bool need_prefix = false;
1643
1644         /* This implements very minimal escaping for names to be used
1645          * as file names in the cgroup tree: any name which might
1646          * conflict with a kernel name or is prefixed with '_' is
1647          * prefixed with a '_'. That way, when reading cgroup names it
1648          * is sufficient to remove a single prefixing underscore if
1649          * there is one. */
1650
1651         /* The return value of this function (unlike cg_unescape())
1652          * needs free()! */
1653
1654         if (p[0] == 0 ||
1655             p[0] == '_' ||
1656             p[0] == '.' ||
1657             streq(p, "notify_on_release") ||
1658             streq(p, "release_agent") ||
1659             streq(p, "tasks") ||
1660             startswith(p, "cgroup."))
1661                 need_prefix = true;
1662         else {
1663                 const char *dot;
1664
1665                 dot = strrchr(p, '.');
1666                 if (dot) {
1667                         CGroupController c;
1668                         size_t l = dot - p;
1669
1670                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1671                                 const char *n;
1672
1673                                 n = cgroup_controller_to_string(c);
1674
1675                                 if (l != strlen(n))
1676                                         continue;
1677
1678                                 if (memcmp(p, n, l) != 0)
1679                                         continue;
1680
1681                                 need_prefix = true;
1682                                 break;
1683                         }
1684                 }
1685         }
1686
1687         if (need_prefix)
1688                 return strappend("_", p);
1689
1690         return strdup(p);
1691 }
1692
1693 char *cg_unescape(const char *p) {
1694         assert(p);
1695
1696         /* The return value of this function (unlike cg_escape())
1697          * doesn't need free()! */
1698
1699         if (p[0] == '_')
1700                 return (char*) p+1;
1701
1702         return (char*) p;
1703 }
1704
1705 #define CONTROLLER_VALID                        \
1706         DIGITS LETTERS                          \
1707         "_"
1708
1709 bool cg_controller_is_valid(const char *p) {
1710         const char *t, *s;
1711
1712         if (!p)
1713                 return false;
1714
1715         s = startswith(p, "name=");
1716         if (s)
1717                 p = s;
1718
1719         if (*p == 0 || *p == '_')
1720                 return false;
1721
1722         for (t = p; *t; t++)
1723                 if (!strchr(CONTROLLER_VALID, *t))
1724                         return false;
1725
1726         if (t - p > FILENAME_MAX)
1727                 return false;
1728
1729         return true;
1730 }
1731
1732 int cg_slice_to_path(const char *unit, char **ret) {
1733         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1734         const char *dash;
1735         int r;
1736
1737         assert(unit);
1738         assert(ret);
1739
1740         if (streq(unit, "-.slice")) {
1741                 char *x;
1742
1743                 x = strdup("");
1744                 if (!x)
1745                         return -ENOMEM;
1746                 *ret = x;
1747                 return 0;
1748         }
1749
1750         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1751                 return -EINVAL;
1752
1753         if (!endswith(unit, ".slice"))
1754                 return -EINVAL;
1755
1756         r = unit_name_to_prefix(unit, &p);
1757         if (r < 0)
1758                 return r;
1759
1760         dash = strchr(p, '-');
1761
1762         /* Don't allow initial dashes */
1763         if (dash == p)
1764                 return -EINVAL;
1765
1766         while (dash) {
1767                 _cleanup_free_ char *escaped = NULL;
1768                 char n[dash - p + sizeof(".slice")];
1769
1770                 /* Don't allow trailing or double dashes */
1771                 if (dash[1] == 0 || dash[1] == '-')
1772                         return -EINVAL;
1773
1774                 strcpy(stpncpy(n, p, dash - p), ".slice");
1775                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1776                         return -EINVAL;
1777
1778                 escaped = cg_escape(n);
1779                 if (!escaped)
1780                         return -ENOMEM;
1781
1782                 if (!strextend(&s, escaped, "/", NULL))
1783                         return -ENOMEM;
1784
1785                 dash = strchr(dash+1, '-');
1786         }
1787
1788         e = cg_escape(unit);
1789         if (!e)
1790                 return -ENOMEM;
1791
1792         if (!strextend(&s, e, NULL))
1793                 return -ENOMEM;
1794
1795         *ret = s;
1796         s = NULL;
1797
1798         return 0;
1799 }
1800
1801 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1802         _cleanup_free_ char *p = NULL;
1803         int r;
1804
1805         r = cg_get_path(controller, path, attribute, &p);
1806         if (r < 0)
1807                 return r;
1808
1809         return write_string_file(p, value, 0);
1810 }
1811
1812 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1813         _cleanup_free_ char *p = NULL;
1814         int r;
1815
1816         r = cg_get_path(controller, path, attribute, &p);
1817         if (r < 0)
1818                 return r;
1819
1820         return read_one_line_file(p, ret);
1821 }
1822
1823 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1824         CGroupController c;
1825         int r, unified;
1826
1827         /* This one will create a cgroup in our private tree, but also
1828          * duplicate it in the trees specified in mask, and remove it
1829          * in all others */
1830
1831         /* First create the cgroup in our own hierarchy. */
1832         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1833         if (r < 0)
1834                 return r;
1835
1836         /* If we are in the unified hierarchy, we are done now */
1837         unified = cg_unified();
1838         if (unified < 0)
1839                 return unified;
1840         if (unified > 0)
1841                 return 0;
1842
1843         /* Otherwise, do the same in the other hierarchies */
1844         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1845                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1846                 const char *n;
1847
1848                 n = cgroup_controller_to_string(c);
1849
1850                 if (mask & bit)
1851                         (void) cg_create(n, path);
1852                 else if (supported & bit)
1853                         (void) cg_trim(n, path, true);
1854         }
1855
1856         return 0;
1857 }
1858
1859 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1860         CGroupController c;
1861         int r, unified;
1862
1863         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1864         if (r < 0)
1865                 return r;
1866
1867         unified = cg_unified();
1868         if (unified < 0)
1869                 return unified;
1870         if (unified > 0)
1871                 return 0;
1872
1873         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1874                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1875                 const char *p = NULL;
1876
1877                 if (!(supported & bit))
1878                         continue;
1879
1880                 if (path_callback)
1881                         p = path_callback(bit, userdata);
1882
1883                 if (!p)
1884                         p = path;
1885
1886                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
1887         }
1888
1889         return 0;
1890 }
1891
1892 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1893         Iterator i;
1894         void *pidp;
1895         int r = 0;
1896
1897         SET_FOREACH(pidp, pids, i) {
1898                 pid_t pid = PTR_TO_PID(pidp);
1899                 int q;
1900
1901                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1902                 if (q < 0 && r >= 0)
1903                         r = q;
1904         }
1905
1906         return r;
1907 }
1908
1909 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1910         CGroupController c;
1911         int r, unified;
1912
1913         if (!path_equal(from, to))  {
1914                 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true);
1915                 if (r < 0)
1916                         return r;
1917         }
1918
1919         unified = cg_unified();
1920         if (unified < 0)
1921                 return unified;
1922         if (unified > 0)
1923                 return r;
1924
1925         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1926                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1927                 const char *p = NULL;
1928
1929                 if (!(supported & bit))
1930                         continue;
1931
1932                 if (to_callback)
1933                         p = to_callback(bit, userdata);
1934
1935                 if (!p)
1936                         p = to;
1937
1938                 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, false, false);
1939         }
1940
1941         return 0;
1942 }
1943
1944 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
1945         CGroupController c;
1946         int r, unified;
1947
1948         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
1949         if (r < 0)
1950                 return r;
1951
1952         unified = cg_unified();
1953         if (unified < 0)
1954                 return unified;
1955         if (unified > 0)
1956                 return r;
1957
1958         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1959                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1960
1961                 if (!(supported & bit))
1962                         continue;
1963
1964                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
1965         }
1966
1967         return 0;
1968 }
1969
1970 int cg_mask_supported(CGroupMask *ret) {
1971         CGroupMask mask = 0;
1972         int r, unified;
1973
1974         /* Determines the mask of supported cgroup controllers. Only
1975          * includes controllers we can make sense of and that are
1976          * actually accessible. */
1977
1978         unified = cg_unified();
1979         if (unified < 0)
1980                 return unified;
1981         if (unified > 0) {
1982                 _cleanup_free_ char *controllers = NULL;
1983                 const char *c;
1984
1985                 /* In the unified hierarchy we can read the supported
1986                  * and accessible controllers from a the top-level
1987                  * cgroup attribute */
1988
1989                 r = read_one_line_file("/sys/fs/cgroup/cgroup.controllers", &controllers);
1990                 if (r < 0)
1991                         return r;
1992
1993                 c = controllers;
1994                 for (;;) {
1995                         _cleanup_free_ char *n = NULL;
1996                         CGroupController v;
1997
1998                         r = extract_first_word(&c, &n, NULL, 0);
1999                         if (r < 0)
2000                                 return r;
2001                         if (r == 0)
2002                                 break;
2003
2004                         v = cgroup_controller_from_string(n);
2005                         if (v < 0)
2006                                 continue;
2007
2008                         mask |= CGROUP_CONTROLLER_TO_MASK(v);
2009                 }
2010
2011                 /* Currently, we only support the memory controller in
2012                  * the unified hierarchy, mask everything else off. */
2013                 mask &= CGROUP_MASK_MEMORY;
2014
2015         } else {
2016                 CGroupController c;
2017
2018                 /* In the legacy hierarchy, we check whether which
2019                  * hierarchies are mounted. */
2020
2021                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2022                         const char *n;
2023
2024                         n = cgroup_controller_to_string(c);
2025                         if (controller_is_accessible(n) >= 0)
2026                                 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2027                 }
2028         }
2029
2030         *ret = mask;
2031         return 0;
2032 }
2033
2034 int cg_kernel_controllers(Set *controllers) {
2035         _cleanup_fclose_ FILE *f = NULL;
2036         char buf[LINE_MAX];
2037         int r;
2038
2039         assert(controllers);
2040
2041         /* Determines the full list of kernel-known controllers. Might
2042          * include controllers we don't actually support, arbitrary
2043          * named hierarchies and controllers that aren't currently
2044          * accessible (because not mounted). */
2045
2046         f = fopen("/proc/cgroups", "re");
2047         if (!f) {
2048                 if (errno == ENOENT)
2049                         return 0;
2050                 return -errno;
2051         }
2052
2053         /* Ignore the header line */
2054         (void) fgets(buf, sizeof(buf), f);
2055
2056         for (;;) {
2057                 char *controller;
2058                 int enabled = 0;
2059
2060                 errno = 0;
2061                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2062
2063                         if (feof(f))
2064                                 break;
2065
2066                         if (ferror(f) && errno != 0)
2067                                 return -errno;
2068
2069                         return -EBADMSG;
2070                 }
2071
2072                 if (!enabled) {
2073                         free(controller);
2074                         continue;
2075                 }
2076
2077                 if (!cg_controller_is_valid(controller)) {
2078                         free(controller);
2079                         return -EBADMSG;
2080                 }
2081
2082                 r = set_consume(controllers, controller);
2083                 if (r < 0)
2084                         return r;
2085         }
2086
2087         return 0;
2088 }
2089
2090 static thread_local int unified_cache = -1;
2091
2092 int cg_unified(void) {
2093         struct statfs fs;
2094
2095         /* Checks if we support the unified hierarchy. Returns an
2096          * error when the cgroup hierarchies aren't mounted yet or we
2097          * have any other trouble determining if the unified hierarchy
2098          * is supported. */
2099
2100         if (unified_cache >= 0)
2101                 return unified_cache;
2102
2103         if (statfs("/sys/fs/cgroup/", &fs) < 0)
2104                 return -errno;
2105
2106         if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC))
2107                 unified_cache = true;
2108         else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2109                 unified_cache = false;
2110         else
2111                 return -ENOEXEC;
2112
2113         return unified_cache;
2114 }
2115
2116 void cg_unified_flush(void) {
2117         unified_cache = -1;
2118 }
2119
2120 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2121         _cleanup_free_ char *fs = NULL;
2122         CGroupController c;
2123         int r, unified;
2124
2125         assert(p);
2126
2127         if (supported == 0)
2128                 return 0;
2129
2130         unified = cg_unified();
2131         if (unified < 0)
2132                 return unified;
2133         if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2134                 return 0;
2135
2136         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2137         if (r < 0)
2138                 return r;
2139
2140         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2141                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2142                 const char *n;
2143
2144                 if (!(supported & bit))
2145                         continue;
2146
2147                 n = cgroup_controller_to_string(c);
2148                 {
2149                         char s[1 + strlen(n) + 1];
2150
2151                         s[0] = mask & bit ? '+' : '-';
2152                         strcpy(s + 1, n);
2153
2154                         r = write_string_file(fs, s, 0);
2155                         if (r < 0)
2156                                 log_warning_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2157                 }
2158         }
2159
2160         return 0;
2161 }
2162
2163 bool cg_is_unified_wanted(void) {
2164         static thread_local int wanted = -1;
2165         int r, unified;
2166
2167         /* If the hierarchy is already mounted, then follow whatever
2168          * was chosen for it. */
2169         unified = cg_unified();
2170         if (unified >= 0)
2171                 return unified;
2172
2173         /* Otherwise, let's see what the kernel command line has to
2174          * say. Since checking that is expensive, let's cache the
2175          * result. */
2176         if (wanted >= 0)
2177                 return wanted;
2178
2179         r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2180         if (r > 0)
2181                 return (wanted = true);
2182         else {
2183                 _cleanup_free_ char *value = NULL;
2184
2185                 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2186                 if (r < 0)
2187                         return false;
2188                 if (r == 0)
2189                         return (wanted = false);
2190
2191                 return (wanted = parse_boolean(value) > 0);
2192         }
2193 }
2194
2195 bool cg_is_legacy_wanted(void) {
2196         return !cg_is_unified_wanted();
2197 }
2198
2199 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2200         [CGROUP_CONTROLLER_CPU] = "cpu",
2201         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2202         [CGROUP_CONTROLLER_BLKIO] = "blkio",
2203         [CGROUP_CONTROLLER_MEMORY] = "memory",
2204         [CGROUP_CONTROLLER_DEVICE] = "device",
2205 };
2206
2207 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);