src/basic/cgroup-util.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2010 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <dirent.h>
  23 #include <errno.h>
  24 #include <ftw.h>
  25 #include <signal.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28 #include <sys/stat.h>
  29 #include <sys/types.h>
  30 #include <unistd.h>
  31
  32 #include "cgroup-util.h"
  33 #include "dirent-util.h"
  34 #include "extract-word.h"
  35 #include "fd-util.h"
  36 #include "fileio.h"
  37 #include "formats-util.h"
  38 #include "login-util.h"
  39 #include "macro.h"
  40 #include "mkdir.h"
  41 #include "parse-util.h"
  42 #include "path-util.h"
  43 #include "process-util.h"
  44 #include "set.h"
  45 #include "special.h"
  46 #include "string-util.h"
  47 #include "unit-name.h"
  48 #include "user-util.h"
  49 #include "util.h"
  50
  51 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
  52         _cleanup_free_ char *fs = NULL;
  53         FILE *f;
  54         int r;
  55
  56         assert(_f);
  57
  58         r = cg_get_path(controller, path, "cgroup.procs", &fs);
  59         if (r < 0)
  60                 return r;
  61
  62         f = fopen(fs, "re");
  63         if (!f)
  64                 return -errno;
  65
  66         *_f = f;
  67         return 0;
  68 }
  69
  70 int cg_read_pid(FILE *f, pid_t *_pid) {
  71         unsigned long ul;
  72
  73         /* Note that the cgroup.procs might contain duplicates! See
  74          * cgroups.txt for details. */
  75
  76         assert(f);
  77         assert(_pid);
  78
  79         errno = 0;
  80         if (fscanf(f, "%lu", &ul) != 1) {
  81
  82                 if (feof(f))
  83                         return 0;
  84
  85                 return errno ? -errno : -EIO;
  86         }
  87
  88         if (ul <= 0)
  89                 return -EIO;
  90
  91         *_pid = (pid_t) ul;
  92         return 1;
  93 }
  94
  95 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
  96         _cleanup_free_ char *fs = NULL;
  97         int r;
  98         DIR *d;
  99
 100         assert(_d);
 101
 102         /* This is not recursive! */
 103
 104         r = cg_get_path(controller, path, NULL, &fs);
 105         if (r < 0)
 106                 return r;
 107
 108         d = opendir(fs);
 109         if (!d)
 110                 return -errno;
 111
 112         *_d = d;
 113         return 0;
 114 }
 115
 116 int cg_read_subgroup(DIR *d, char **fn) {
 117         struct dirent *de;
 118
 119         assert(d);
 120         assert(fn);
 121
 122         FOREACH_DIRENT_ALL(de, d, return -errno) {
 123                 char *b;
 124
 125                 if (de->d_type != DT_DIR)
 126                         continue;
 127
 128                 if (streq(de->d_name, ".") ||
 129                     streq(de->d_name, ".."))
 130                         continue;
 131
 132                 b = strdup(de->d_name);
 133                 if (!b)
 134                         return -ENOMEM;
 135
 136                 *fn = b;
 137                 return 1;
 138         }
 139
 140         return 0;
 141 }
 142
 143 int cg_rmdir(const char *controller, const char *path) {
 144         _cleanup_free_ char *p = NULL;
 145         int r;
 146
 147         r = cg_get_path(controller, path, NULL, &p);
 148         if (r < 0)
 149                 return r;
 150
 151         r = rmdir(p);
 152         if (r < 0 && errno != ENOENT)
 153                 return -errno;
 154
 155         return 0;
 156 }
 157
 158 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
 159         _cleanup_set_free_ Set *allocated_set = NULL;
 160         bool done = false;
 161         int r, ret = 0;
 162         pid_t my_pid;
 163
 164         assert(sig >= 0);
 165
 166         /* This goes through the tasks list and kills them all. This
 167          * is repeated until no further processes are added to the
 168          * tasks list, to properly handle forking processes */
 169
 170         if (!s) {
 171                 s = allocated_set = set_new(NULL);
 172                 if (!s)
 173                         return -ENOMEM;
 174         }
 175
 176         my_pid = getpid();
 177
 178         do {
 179                 _cleanup_fclose_ FILE *f = NULL;
 180                 pid_t pid = 0;
 181                 done = true;
 182
 183                 r = cg_enumerate_processes(controller, path, &f);
 184                 if (r < 0) {
 185                         if (ret >= 0 && r != -ENOENT)
 186                                 return r;
 187
 188                         return ret;
 189                 }
 190
 191                 while ((r = cg_read_pid(f, &pid)) > 0) {
 192
 193                         if (ignore_self && pid == my_pid)
 194                                 continue;
 195
 196                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 197                                 continue;
 198
 199                         /* If we haven't killed this process yet, kill
 200                          * it */
 201                         if (kill(pid, sig) < 0) {
 202                                 if (ret >= 0 && errno != ESRCH)
 203                                         ret = -errno;
 204                         } else {
 205                                 if (sigcont && sig != SIGKILL)
 206                                         (void) kill(pid, SIGCONT);
 207
 208                                 if (ret == 0)
 209                                         ret = 1;
 210                         }
 211
 212                         done = false;
 213
 214                         r = set_put(s, PID_TO_PTR(pid));
 215                         if (r < 0) {
 216                                 if (ret >= 0)
 217                                         return r;
 218
 219                                 return ret;
 220                         }
 221                 }
 222
 223                 if (r < 0) {
 224                         if (ret >= 0)
 225                                 return r;
 226
 227                         return ret;
 228                 }
 229
 230                 /* To avoid racing against processes which fork
 231                  * quicker than we can kill them we repeat this until
 232                  * no new pids need to be killed. */
 233
 234         } while (!done);
 235
 236         return ret;
 237 }
 238
 239 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
 240         _cleanup_set_free_ Set *allocated_set = NULL;
 241         _cleanup_closedir_ DIR *d = NULL;
 242         int r, ret;
 243         char *fn;
 244
 245         assert(path);
 246         assert(sig >= 0);
 247
 248         if (!s) {
 249                 s = allocated_set = set_new(NULL);
 250                 if (!s)
 251                         return -ENOMEM;
 252         }
 253
 254         ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
 255
 256         r = cg_enumerate_subgroups(controller, path, &d);
 257         if (r < 0) {
 258                 if (ret >= 0 && r != -ENOENT)
 259                         return r;
 260
 261                 return ret;
 262         }
 263
 264         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 265                 _cleanup_free_ char *p = NULL;
 266
 267                 p = strjoin(path, "/", fn, NULL);
 268                 free(fn);
 269                 if (!p)
 270                         return -ENOMEM;
 271
 272                 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
 273                 if (r != 0 && ret >= 0)
 274                         ret = r;
 275         }
 276
 277         if (ret >= 0 && r < 0)
 278                 ret = r;
 279
 280         if (rem) {
 281                 r = cg_rmdir(controller, path);
 282                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 283                         return r;
 284         }
 285
 286         return ret;
 287 }
 288
 289 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
 290         bool done = false;
 291         _cleanup_set_free_ Set *s = NULL;
 292         int r, ret = 0;
 293         pid_t my_pid;
 294
 295         assert(cfrom);
 296         assert(pfrom);
 297         assert(cto);
 298         assert(pto);
 299
 300         s = set_new(NULL);
 301         if (!s)
 302                 return -ENOMEM;
 303
 304         my_pid = getpid();
 305
 306         do {
 307                 _cleanup_fclose_ FILE *f = NULL;
 308                 pid_t pid = 0;
 309                 done = true;
 310
 311                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 312                 if (r < 0) {
 313                         if (ret >= 0 && r != -ENOENT)
 314                                 return r;
 315
 316                         return ret;
 317                 }
 318
 319                 while ((r = cg_read_pid(f, &pid)) > 0) {
 320
 321                         /* This might do weird stuff if we aren't a
 322                          * single-threaded program. However, we
 323                          * luckily know we are not */
 324                         if (ignore_self && pid == my_pid)
 325                                 continue;
 326
 327                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 328                                 continue;
 329
 330                         /* Ignore kernel threads. Since they can only
 331                          * exist in the root cgroup, we only check for
 332                          * them there. */
 333                         if (cfrom &&
 334                             (isempty(pfrom) || path_equal(pfrom, "/")) &&
 335                             is_kernel_thread(pid) > 0)
 336                                 continue;
 337
 338                         r = cg_attach(cto, pto, pid);
 339                         if (r < 0) {
 340                                 if (ret >= 0 && r != -ESRCH)
 341                                         ret = r;
 342                         } else if (ret == 0)
 343                                 ret = 1;
 344
 345                         done = false;
 346
 347                         r = set_put(s, PID_TO_PTR(pid));
 348                         if (r < 0) {
 349                                 if (ret >= 0)
 350                                         return r;
 351
 352                                 return ret;
 353                         }
 354                 }
 355
 356                 if (r < 0) {
 357                         if (ret >= 0)
 358                                 return r;
 359
 360                         return ret;
 361                 }
 362         } while (!done);
 363
 364         return ret;
 365 }
 366
 367 int cg_migrate_recursive(
 368                 const char *cfrom,
 369                 const char *pfrom,
 370                 const char *cto,
 371                 const char *pto,
 372                 bool ignore_self,
 373                 bool rem) {
 374
 375         _cleanup_closedir_ DIR *d = NULL;
 376         int r, ret = 0;
 377         char *fn;
 378
 379         assert(cfrom);
 380         assert(pfrom);
 381         assert(cto);
 382         assert(pto);
 383
 384         ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
 385
 386         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 387         if (r < 0) {
 388                 if (ret >= 0 && r != -ENOENT)
 389                         return r;
 390
 391                 return ret;
 392         }
 393
 394         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 395                 _cleanup_free_ char *p = NULL;
 396
 397                 p = strjoin(pfrom, "/", fn, NULL);
 398                 free(fn);
 399                 if (!p)
 400                         return -ENOMEM;
 401
 402                 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
 403                 if (r != 0 && ret >= 0)
 404                         ret = r;
 405         }
 406
 407         if (r < 0 && ret >= 0)
 408                 ret = r;
 409
 410         if (rem) {
 411                 r = cg_rmdir(cfrom, pfrom);
 412                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 413                         return r;
 414         }
 415
 416         return ret;
 417 }
 418
 419 int cg_migrate_recursive_fallback(
 420                 const char *cfrom,
 421                 const char *pfrom,
 422                 const char *cto,
 423                 const char *pto,
 424                 bool ignore_self,
 425                 bool rem) {
 426
 427         int r;
 428
 429         assert(cfrom);
 430         assert(pfrom);
 431         assert(cto);
 432         assert(pto);
 433
 434         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem);
 435         if (r < 0) {
 436                 char prefix[strlen(pto) + 1];
 437
 438                 /* This didn't work? Then let's try all prefixes of the destination */
 439
 440                 PATH_FOREACH_PREFIX(prefix, pto) {
 441                         int q;
 442
 443                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem);
 444                         if (q >= 0)
 445                                 return q;
 446                 }
 447         }
 448
 449         return r;
 450 }
 451
 452 static const char *controller_to_dirname(const char *controller) {
 453         const char *e;
 454
 455         assert(controller);
 456
 457         /* Converts a controller name to the directory name below
 458          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
 459          * just cuts off the name= prefixed used for named
 460          * hierarchies, if it is specified. */
 461
 462         e = startswith(controller, "name=");
 463         if (e)
 464                 return e;
 465
 466         return controller;
 467 }
 468
 469 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
 470         const char *dn;
 471         char *t = NULL;
 472
 473         assert(fs);
 474         assert(controller);
 475
 476         dn = controller_to_dirname(controller);
 477
 478         if (isempty(path) && isempty(suffix))
 479                 t = strappend("/sys/fs/cgroup/", dn);
 480         else if (isempty(path))
 481                 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL);
 482         else if (isempty(suffix))
 483                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL);
 484         else
 485                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL);
 486         if (!t)
 487                 return -ENOMEM;
 488
 489         *fs = t;
 490         return 0;
 491 }
 492
 493 static int join_path_unified(const char *path, const char *suffix, char **fs) {
 494         char *t;
 495
 496         assert(fs);
 497
 498         if (isempty(path) && isempty(suffix))
 499                 t = strdup("/sys/fs/cgroup");
 500         else if (isempty(path))
 501                 t = strappend("/sys/fs/cgroup/", suffix);
 502         else if (isempty(suffix))
 503                 t = strappend("/sys/fs/cgroup/", path);
 504         else
 505                 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
 506         if (!t)
 507                 return -ENOMEM;
 508
 509         *fs = t;
 510         return 0;
 511 }
 512
 513 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
 514         int unified, r;
 515
 516         assert(fs);
 517
 518         if (!controller) {
 519                 char *t;
 520
 521                 /* If no controller is specified, we return the path
 522                  * *below* the controllers, without any prefix. */
 523
 524                 if (!path && !suffix)
 525                         return -EINVAL;
 526
 527                 if (!suffix)
 528                         t = strdup(path);
 529                 else if (!path)
 530                         t = strdup(suffix);
 531                 else
 532                         t = strjoin(path, "/", suffix, NULL);
 533                 if (!t)
 534                         return -ENOMEM;
 535
 536                 *fs = path_kill_slashes(t);
 537                 return 0;
 538         }
 539
 540         if (!cg_controller_is_valid(controller))
 541                 return -EINVAL;
 542
 543         unified = cg_unified();
 544         if (unified < 0)
 545                 return unified;
 546
 547         if (unified > 0)
 548                 r = join_path_unified(path, suffix, fs);
 549         else
 550                 r = join_path_legacy(controller, path, suffix, fs);
 551         if (r < 0)
 552                 return r;
 553
 554         path_kill_slashes(*fs);
 555         return 0;
 556 }
 557
 558 static int controller_is_accessible(const char *controller) {
 559         int unified;
 560
 561         assert(controller);
 562
 563         /* Checks whether a specific controller is accessible,
 564          * i.e. its hierarchy mounted. In the unified hierarchy all
 565          * controllers are considered accessible, except for the named
 566          * hierarchies */
 567
 568         if (!cg_controller_is_valid(controller))
 569                 return -EINVAL;
 570
 571         unified = cg_unified();
 572         if (unified < 0)
 573                 return unified;
 574         if (unified > 0) {
 575                 /* We don't support named hierarchies if we are using
 576                  * the unified hierarchy. */
 577
 578                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
 579                         return 0;
 580
 581                 if (startswith(controller, "name="))
 582                         return -EOPNOTSUPP;
 583
 584         } else {
 585                 const char *cc, *dn;
 586
 587                 dn = controller_to_dirname(controller);
 588                 cc = strjoina("/sys/fs/cgroup/", dn);
 589
 590                 if (laccess(cc, F_OK) < 0)
 591                         return -errno;
 592         }
 593
 594         return 0;
 595 }
 596
 597 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
 598         int r;
 599
 600         assert(controller);
 601         assert(fs);
 602
 603         /* Check if the specified controller is actually accessible */
 604         r = controller_is_accessible(controller);
 605         if (r < 0)
 606                 return r;
 607
 608         return cg_get_path(controller, path, suffix, fs);
 609 }
 610
 611 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 612         assert(path);
 613         assert(sb);
 614         assert(ftwbuf);
 615
 616         if (typeflag != FTW_DP)
 617                 return 0;
 618
 619         if (ftwbuf->level < 1)
 620                 return 0;
 621
 622         (void) rmdir(path);
 623         return 0;
 624 }
 625
 626 int cg_trim(const char *controller, const char *path, bool delete_root) {
 627         _cleanup_free_ char *fs = NULL;
 628         int r = 0;
 629
 630         assert(path);
 631
 632         r = cg_get_path(controller, path, NULL, &fs);
 633         if (r < 0)
 634                 return r;
 635
 636         errno = 0;
 637         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 638                 if (errno == ENOENT)
 639                         r = 0;
 640                 else if (errno != 0)
 641                         r = -errno;
 642                 else
 643                         r = -EIO;
 644         }
 645
 646         if (delete_root) {
 647                 if (rmdir(fs) < 0 && errno != ENOENT)
 648                         return -errno;
 649         }
 650
 651         return r;
 652 }
 653
 654 int cg_create(const char *controller, const char *path) {
 655         _cleanup_free_ char *fs = NULL;
 656         int r;
 657
 658         r = cg_get_path_and_check(controller, path, NULL, &fs);
 659         if (r < 0)
 660                 return r;
 661
 662         r = mkdir_parents(fs, 0755);
 663         if (r < 0)
 664                 return r;
 665
 666         if (mkdir(fs, 0755) < 0) {
 667
 668                 if (errno == EEXIST)
 669                         return 0;
 670
 671                 return -errno;
 672         }
 673
 674         return 1;
 675 }
 676
 677 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 678         int r, q;
 679
 680         assert(pid >= 0);
 681
 682         r = cg_create(controller, path);
 683         if (r < 0)
 684                 return r;
 685
 686         q = cg_attach(controller, path, pid);
 687         if (q < 0)
 688                 return q;
 689
 690         /* This does not remove the cgroup on failure */
 691         return r;
 692 }
 693
 694 int cg_attach(const char *controller, const char *path, pid_t pid) {
 695         _cleanup_free_ char *fs = NULL;
 696         char c[DECIMAL_STR_MAX(pid_t) + 2];
 697         int r;
 698
 699         assert(path);
 700         assert(pid >= 0);
 701
 702         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 703         if (r < 0)
 704                 return r;
 705
 706         if (pid == 0)
 707                 pid = getpid();
 708
 709         snprintf(c, sizeof(c), PID_FMT"\n", pid);
 710
 711         return write_string_file(fs, c, 0);
 712 }
 713
 714 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 715         int r;
 716
 717         assert(controller);
 718         assert(path);
 719         assert(pid >= 0);
 720
 721         r = cg_attach(controller, path, pid);
 722         if (r < 0) {
 723                 char prefix[strlen(path) + 1];
 724
 725                 /* This didn't work? Then let's try all prefixes of
 726                  * the destination */
 727
 728                 PATH_FOREACH_PREFIX(prefix, path) {
 729                         int q;
 730
 731                         q = cg_attach(controller, prefix, pid);
 732                         if (q >= 0)
 733                                 return q;
 734                 }
 735         }
 736
 737         return r;
 738 }
 739
 740 int cg_set_group_access(
 741                 const char *controller,
 742                 const char *path,
 743                 mode_t mode,
 744                 uid_t uid,
 745                 gid_t gid) {
 746
 747         _cleanup_free_ char *fs = NULL;
 748         int r;
 749
 750         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 751                 return 0;
 752
 753         if (mode != MODE_INVALID)
 754                 mode &= 0777;
 755
 756         r = cg_get_path(controller, path, NULL, &fs);
 757         if (r < 0)
 758                 return r;
 759
 760         return chmod_and_chown(fs, mode, uid, gid);
 761 }
 762
 763 int cg_set_task_access(
 764                 const char *controller,
 765                 const char *path,
 766                 mode_t mode,
 767                 uid_t uid,
 768                 gid_t gid) {
 769
 770         _cleanup_free_ char *fs = NULL, *procs = NULL;
 771         int r, unified;
 772
 773         assert(path);
 774
 775         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 776                 return 0;
 777
 778         if (mode != MODE_INVALID)
 779                 mode &= 0666;
 780
 781         r = cg_get_path(controller, path, "cgroup.procs", &fs);
 782         if (r < 0)
 783                 return r;
 784
 785         r = chmod_and_chown(fs, mode, uid, gid);
 786         if (r < 0)
 787                 return r;
 788
 789         unified = cg_unified();
 790         if (unified < 0)
 791                 return unified;
 792         if (unified)
 793                 return 0;
 794
 795         /* Compatibility, Always keep values for "tasks" in sync with
 796          * "cgroup.procs" */
 797         if (cg_get_path(controller, path, "tasks", &procs) >= 0)
 798                 (void) chmod_and_chown(procs, mode, uid, gid);
 799
 800         return 0;
 801 }
 802
 803 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
 804         _cleanup_fclose_ FILE *f = NULL;
 805         char line[LINE_MAX];
 806         const char *fs;
 807         size_t cs = 0;
 808         int unified;
 809
 810         assert(path);
 811         assert(pid >= 0);
 812
 813         unified = cg_unified();
 814         if (unified < 0)
 815                 return unified;
 816         if (unified == 0) {
 817                 if (controller) {
 818                         if (!cg_controller_is_valid(controller))
 819                                 return -EINVAL;
 820                 } else
 821                         controller = SYSTEMD_CGROUP_CONTROLLER;
 822
 823                 cs = strlen(controller);
 824         }
 825
 826         fs = procfs_file_alloca(pid, "cgroup");
 827         f = fopen(fs, "re");
 828         if (!f)
 829                 return errno == ENOENT ? -ESRCH : -errno;
 830
 831         FOREACH_LINE(line, f, return -errno) {
 832                 char *e, *p;
 833
 834                 truncate_nl(line);
 835
 836                 if (unified) {
 837                         e = startswith(line, "0:");
 838                         if (!e)
 839                                 continue;
 840
 841                         e = strchr(e, ':');
 842                         if (!e)
 843                                 continue;
 844                 } else {
 845                         char *l;
 846                         size_t k;
 847                         const char *word, *state;
 848                         bool found = false;
 849
 850                         l = strchr(line, ':');
 851                         if (!l)
 852                                 continue;
 853
 854                         l++;
 855                         e = strchr(l, ':');
 856                         if (!e)
 857                                 continue;
 858
 859                         *e = 0;
 860                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
 861                                 if (k == cs && memcmp(word, controller, cs) == 0) {
 862                                         found = true;
 863                                         break;
 864                                 }
 865                         }
 866
 867                         if (!found)
 868                                 continue;
 869                 }
 870
 871                 p = strdup(e + 1);
 872                 if (!p)
 873                         return -ENOMEM;
 874
 875                 *path = p;
 876                 return 0;
 877         }
 878
 879         return -ENODATA;
 880 }
 881
 882 int cg_install_release_agent(const char *controller, const char *agent) {
 883         _cleanup_free_ char *fs = NULL, *contents = NULL;
 884         const char *sc;
 885         int r, unified;
 886
 887         assert(agent);
 888
 889         unified = cg_unified();
 890         if (unified < 0)
 891                 return unified;
 892         if (unified) /* doesn't apply to unified hierarchy */
 893                 return -EOPNOTSUPP;
 894
 895         r = cg_get_path(controller, NULL, "release_agent", &fs);
 896         if (r < 0)
 897                 return r;
 898
 899         r = read_one_line_file(fs, &contents);
 900         if (r < 0)
 901                 return r;
 902
 903         sc = strstrip(contents);
 904         if (isempty(sc)) {
 905                 r = write_string_file(fs, agent, 0);
 906                 if (r < 0)
 907                         return r;
 908         } else if (!path_equal(sc, agent))
 909                 return -EEXIST;
 910
 911         fs = mfree(fs);
 912         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 913         if (r < 0)
 914                 return r;
 915
 916         contents = mfree(contents);
 917         r = read_one_line_file(fs, &contents);
 918         if (r < 0)
 919                 return r;
 920
 921         sc = strstrip(contents);
 922         if (streq(sc, "0")) {
 923                 r = write_string_file(fs, "1", 0);
 924                 if (r < 0)
 925                         return r;
 926
 927                 return 1;
 928         }
 929
 930         if (!streq(sc, "1"))
 931                 return -EIO;
 932
 933         return 0;
 934 }
 935
 936 int cg_uninstall_release_agent(const char *controller) {
 937         _cleanup_free_ char *fs = NULL;
 938         int r, unified;
 939
 940         unified = cg_unified();
 941         if (unified < 0)
 942                 return unified;
 943         if (unified) /* Doesn't apply to unified hierarchy */
 944                 return -EOPNOTSUPP;
 945
 946         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 947         if (r < 0)
 948                 return r;
 949
 950         r = write_string_file(fs, "0", 0);
 951         if (r < 0)
 952                 return r;
 953
 954         fs = mfree(fs);
 955
 956         r = cg_get_path(controller, NULL, "release_agent", &fs);
 957         if (r < 0)
 958                 return r;
 959
 960         r = write_string_file(fs, "", 0);
 961         if (r < 0)
 962                 return r;
 963
 964         return 0;
 965 }
 966
 967 int cg_is_empty(const char *controller, const char *path) {
 968         _cleanup_fclose_ FILE *f = NULL;
 969         pid_t pid;
 970         int r;
 971
 972         assert(path);
 973
 974         r = cg_enumerate_processes(controller, path, &f);
 975         if (r == -ENOENT)
 976                 return 1;
 977         if (r < 0)
 978                 return r;
 979
 980         r = cg_read_pid(f, &pid);
 981         if (r < 0)
 982                 return r;
 983
 984         return r == 0;
 985 }
 986
 987 int cg_is_empty_recursive(const char *controller, const char *path) {
 988         int unified, r;
 989
 990         assert(path);
 991
 992         /* The root cgroup is always populated */
 993         if (controller && (isempty(path) || path_equal(path, "/")))
 994                 return false;
 995
 996         unified = cg_unified();
 997         if (unified < 0)
 998                 return unified;
 999
1000         if (unified > 0) {
1001                 _cleanup_free_ char *populated = NULL, *t = NULL;
1002
1003                 /* On the unified hierarchy we can check empty state
1004                  * via the "cgroup.populated" attribute. */
1005
1006                 r = cg_get_path(controller, path, "cgroup.populated", &populated);
1007                 if (r < 0)
1008                         return r;
1009
1010                 r = read_one_line_file(populated, &t);
1011                 if (r == -ENOENT)
1012                         return 1;
1013                 if (r < 0)
1014                         return r;
1015
1016                 return streq(t, "0");
1017         } else {
1018                 _cleanup_closedir_ DIR *d = NULL;
1019                 char *fn;
1020
1021                 r = cg_is_empty(controller, path);
1022                 if (r <= 0)
1023                         return r;
1024
1025                 r = cg_enumerate_subgroups(controller, path, &d);
1026                 if (r == -ENOENT)
1027                         return 1;
1028                 if (r < 0)
1029                         return r;
1030
1031                 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1032                         _cleanup_free_ char *p = NULL;
1033
1034                         p = strjoin(path, "/", fn, NULL);
1035                         free(fn);
1036                         if (!p)
1037                                 return -ENOMEM;
1038
1039                         r = cg_is_empty_recursive(controller, p);
1040                         if (r <= 0)
1041                                 return r;
1042                 }
1043                 if (r < 0)
1044                         return r;
1045
1046                 return true;
1047         }
1048 }
1049
1050 int cg_split_spec(const char *spec, char **controller, char **path) {
1051         char *t = NULL, *u = NULL;
1052         const char *e;
1053
1054         assert(spec);
1055
1056         if (*spec == '/') {
1057                 if (!path_is_safe(spec))
1058                         return -EINVAL;
1059
1060                 if (path) {
1061                         t = strdup(spec);
1062                         if (!t)
1063                                 return -ENOMEM;
1064
1065                         *path = path_kill_slashes(t);
1066                 }
1067
1068                 if (controller)
1069                         *controller = NULL;
1070
1071                 return 0;
1072         }
1073
1074         e = strchr(spec, ':');
1075         if (!e) {
1076                 if (!cg_controller_is_valid(spec))
1077                         return -EINVAL;
1078
1079                 if (controller) {
1080                         t = strdup(spec);
1081                         if (!t)
1082                                 return -ENOMEM;
1083
1084                         *controller = t;
1085                 }
1086
1087                 if (path)
1088                         *path = NULL;
1089
1090                 return 0;
1091         }
1092
1093         t = strndup(spec, e-spec);
1094         if (!t)
1095                 return -ENOMEM;
1096         if (!cg_controller_is_valid(t)) {
1097                 free(t);
1098                 return -EINVAL;
1099         }
1100
1101         if (isempty(e+1))
1102                 u = NULL;
1103         else {
1104                 u = strdup(e+1);
1105                 if (!u) {
1106                         free(t);
1107                         return -ENOMEM;
1108                 }
1109
1110                 if (!path_is_safe(u) ||
1111                     !path_is_absolute(u)) {
1112                         free(t);
1113                         free(u);
1114                         return -EINVAL;
1115                 }
1116
1117                 path_kill_slashes(u);
1118         }
1119
1120         if (controller)
1121                 *controller = t;
1122         else
1123                 free(t);
1124
1125         if (path)
1126                 *path = u;
1127         else
1128                 free(u);
1129
1130         return 0;
1131 }
1132
1133 int cg_mangle_path(const char *path, char **result) {
1134         _cleanup_free_ char *c = NULL, *p = NULL;
1135         char *t;
1136         int r;
1137
1138         assert(path);
1139         assert(result);
1140
1141         /* First, check if it already is a filesystem path */
1142         if (path_startswith(path, "/sys/fs/cgroup")) {
1143
1144                 t = strdup(path);
1145                 if (!t)
1146                         return -ENOMEM;
1147
1148                 *result = path_kill_slashes(t);
1149                 return 0;
1150         }
1151
1152         /* Otherwise, treat it as cg spec */
1153         r = cg_split_spec(path, &c, &p);
1154         if (r < 0)
1155                 return r;
1156
1157         return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1158 }
1159
1160 int cg_get_root_path(char **path) {
1161         char *p, *e;
1162         int r;
1163
1164         assert(path);
1165
1166         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1167         if (r < 0)
1168                 return r;
1169
1170         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1171         if (!e)
1172                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1173         if (!e)
1174                 e = endswith(p, "/system"); /* even more legacy */
1175         if (e)
1176                 *e = 0;
1177
1178         *path = p;
1179         return 0;
1180 }
1181
1182 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1183         _cleanup_free_ char *rt = NULL;
1184         char *p;
1185         int r;
1186
1187         assert(cgroup);
1188         assert(shifted);
1189
1190         if (!root) {
1191                 /* If the root was specified let's use that, otherwise
1192                  * let's determine it from PID 1 */
1193
1194                 r = cg_get_root_path(&rt);
1195                 if (r < 0)
1196                         return r;
1197
1198                 root = rt;
1199         }
1200
1201         p = path_startswith(cgroup, root);
1202         if (p && p > cgroup)
1203                 *shifted = p - 1;
1204         else
1205                 *shifted = cgroup;
1206
1207         return 0;
1208 }
1209
1210 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1211         _cleanup_free_ char *raw = NULL;
1212         const char *c;
1213         int r;
1214
1215         assert(pid >= 0);
1216         assert(cgroup);
1217
1218         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1219         if (r < 0)
1220                 return r;
1221
1222         r = cg_shift_path(raw, root, &c);
1223         if (r < 0)
1224                 return r;
1225
1226         if (c == raw) {
1227                 *cgroup = raw;
1228                 raw = NULL;
1229         } else {
1230                 char *n;
1231
1232                 n = strdup(c);
1233                 if (!n)
1234                         return -ENOMEM;
1235
1236                 *cgroup = n;
1237         }
1238
1239         return 0;
1240 }
1241
1242 int cg_path_decode_unit(const char *cgroup, char **unit){
1243         char *c, *s;
1244         size_t n;
1245
1246         assert(cgroup);
1247         assert(unit);
1248
1249         n = strcspn(cgroup, "/");
1250         if (n < 3)
1251                 return -ENXIO;
1252
1253         c = strndupa(cgroup, n);
1254         c = cg_unescape(c);
1255
1256         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1257                 return -ENXIO;
1258
1259         s = strdup(c);
1260         if (!s)
1261                 return -ENOMEM;
1262
1263         *unit = s;
1264         return 0;
1265 }
1266
1267 static bool valid_slice_name(const char *p, size_t n) {
1268
1269         if (!p)
1270                 return false;
1271
1272         if (n < strlen("x.slice"))
1273                 return false;
1274
1275         if (memcmp(p + n - 6, ".slice", 6) == 0) {
1276                 char buf[n+1], *c;
1277
1278                 memcpy(buf, p, n);
1279                 buf[n] = 0;
1280
1281                 c = cg_unescape(buf);
1282
1283                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1284         }
1285
1286         return false;
1287 }
1288
1289 static const char *skip_slices(const char *p) {
1290         assert(p);
1291
1292         /* Skips over all slice assignments */
1293
1294         for (;;) {
1295                 size_t n;
1296
1297                 p += strspn(p, "/");
1298
1299                 n = strcspn(p, "/");
1300                 if (!valid_slice_name(p, n))
1301                         return p;
1302
1303                 p += n;
1304         }
1305 }
1306
1307 int cg_path_get_unit(const char *path, char **ret) {
1308         const char *e;
1309         char *unit;
1310         int r;
1311
1312         assert(path);
1313         assert(ret);
1314
1315         e = skip_slices(path);
1316
1317         r = cg_path_decode_unit(e, &unit);
1318         if (r < 0)
1319                 return r;
1320
1321         /* We skipped over the slices, don't accept any now */
1322         if (endswith(unit, ".slice")) {
1323                 free(unit);
1324                 return -ENXIO;
1325         }
1326
1327         *ret = unit;
1328         return 0;
1329 }
1330
1331 int cg_pid_get_unit(pid_t pid, char **unit) {
1332         _cleanup_free_ char *cgroup = NULL;
1333         int r;
1334
1335         assert(unit);
1336
1337         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1338         if (r < 0)
1339                 return r;
1340
1341         return cg_path_get_unit(cgroup, unit);
1342 }
1343
1344 /**
1345  * Skip session-*.scope, but require it to be there.
1346  */
1347 static const char *skip_session(const char *p) {
1348         size_t n;
1349
1350         if (isempty(p))
1351                 return NULL;
1352
1353         p += strspn(p, "/");
1354
1355         n = strcspn(p, "/");
1356         if (n < strlen("session-x.scope"))
1357                 return NULL;
1358
1359         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1360                 char buf[n - 8 - 6 + 1];
1361
1362                 memcpy(buf, p + 8, n - 8 - 6);
1363                 buf[n - 8 - 6] = 0;
1364
1365                 /* Note that session scopes never need unescaping,
1366                  * since they cannot conflict with the kernel's own
1367                  * names, hence we don't need to call cg_unescape()
1368                  * here. */
1369
1370                 if (!session_id_valid(buf))
1371                         return false;
1372
1373                 p += n;
1374                 p += strspn(p, "/");
1375                 return p;
1376         }
1377
1378         return NULL;
1379 }
1380
1381 /**
1382  * Skip user@*.service, but require it to be there.
1383  */
1384 static const char *skip_user_manager(const char *p) {
1385         size_t n;
1386
1387         if (isempty(p))
1388                 return NULL;
1389
1390         p += strspn(p, "/");
1391
1392         n = strcspn(p, "/");
1393         if (n < strlen("user@x.service"))
1394                 return NULL;
1395
1396         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1397                 char buf[n - 5 - 8 + 1];
1398
1399                 memcpy(buf, p + 5, n - 5 - 8);
1400                 buf[n - 5 - 8] = 0;
1401
1402                 /* Note that user manager services never need unescaping,
1403                  * since they cannot conflict with the kernel's own
1404                  * names, hence we don't need to call cg_unescape()
1405                  * here. */
1406
1407                 if (parse_uid(buf, NULL) < 0)
1408                         return NULL;
1409
1410                 p += n;
1411                 p += strspn(p, "/");
1412
1413                 return p;
1414         }
1415
1416         return NULL;
1417 }
1418
1419 static const char *skip_user_prefix(const char *path) {
1420         const char *e, *t;
1421
1422         assert(path);
1423
1424         /* Skip slices, if there are any */
1425         e = skip_slices(path);
1426
1427         /* Skip the user manager, if it's in the path now... */
1428         t = skip_user_manager(e);
1429         if (t)
1430                 return t;
1431
1432         /* Alternatively skip the user session if it is in the path... */
1433         return skip_session(e);
1434 }
1435
1436 int cg_path_get_user_unit(const char *path, char **ret) {
1437         const char *t;
1438
1439         assert(path);
1440         assert(ret);
1441
1442         t = skip_user_prefix(path);
1443         if (!t)
1444                 return -ENXIO;
1445
1446         /* And from here on it looks pretty much the same as for a
1447          * system unit, hence let's use the same parser from here
1448          * on. */
1449         return cg_path_get_unit(t, ret);
1450 }
1451
1452 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1453         _cleanup_free_ char *cgroup = NULL;
1454         int r;
1455
1456         assert(unit);
1457
1458         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1459         if (r < 0)
1460                 return r;
1461
1462         return cg_path_get_user_unit(cgroup, unit);
1463 }
1464
1465 int cg_path_get_machine_name(const char *path, char **machine) {
1466         _cleanup_free_ char *u = NULL;
1467         const char *sl;
1468         int r;
1469
1470         r = cg_path_get_unit(path, &u);
1471         if (r < 0)
1472                 return r;
1473
1474         sl = strjoina("/run/systemd/machines/unit:", u);
1475         return readlink_malloc(sl, machine);
1476 }
1477
1478 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1479         _cleanup_free_ char *cgroup = NULL;
1480         int r;
1481
1482         assert(machine);
1483
1484         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1485         if (r < 0)
1486                 return r;
1487
1488         return cg_path_get_machine_name(cgroup, machine);
1489 }
1490
1491 int cg_path_get_session(const char *path, char **session) {
1492         _cleanup_free_ char *unit = NULL;
1493         char *start, *end;
1494         int r;
1495
1496         assert(path);
1497
1498         r = cg_path_get_unit(path, &unit);
1499         if (r < 0)
1500                 return r;
1501
1502         start = startswith(unit, "session-");
1503         if (!start)
1504                 return -ENXIO;
1505         end = endswith(start, ".scope");
1506         if (!end)
1507                 return -ENXIO;
1508
1509         *end = 0;
1510         if (!session_id_valid(start))
1511                 return -ENXIO;
1512
1513         if (session) {
1514                 char *rr;
1515
1516                 rr = strdup(start);
1517                 if (!rr)
1518                         return -ENOMEM;
1519
1520                 *session = rr;
1521         }
1522
1523         return 0;
1524 }
1525
1526 int cg_pid_get_session(pid_t pid, char **session) {
1527         _cleanup_free_ char *cgroup = NULL;
1528         int r;
1529
1530         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1531         if (r < 0)
1532                 return r;
1533
1534         return cg_path_get_session(cgroup, session);
1535 }
1536
1537 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1538         _cleanup_free_ char *slice = NULL;
1539         char *start, *end;
1540         int r;
1541
1542         assert(path);
1543
1544         r = cg_path_get_slice(path, &slice);
1545         if (r < 0)
1546                 return r;
1547
1548         start = startswith(slice, "user-");
1549         if (!start)
1550                 return -ENXIO;
1551         end = endswith(start, ".slice");
1552         if (!end)
1553                 return -ENXIO;
1554
1555         *end = 0;
1556         if (parse_uid(start, uid) < 0)
1557                 return -ENXIO;
1558
1559         return 0;
1560 }
1561
1562 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1563         _cleanup_free_ char *cgroup = NULL;
1564         int r;
1565
1566         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1567         if (r < 0)
1568                 return r;
1569
1570         return cg_path_get_owner_uid(cgroup, uid);
1571 }
1572
1573 int cg_path_get_slice(const char *p, char **slice) {
1574         const char *e = NULL;
1575
1576         assert(p);
1577         assert(slice);
1578
1579         /* Finds the right-most slice unit from the beginning, but
1580          * stops before we come to the first non-slice unit. */
1581
1582         for (;;) {
1583                 size_t n;
1584
1585                 p += strspn(p, "/");
1586
1587                 n = strcspn(p, "/");
1588                 if (!valid_slice_name(p, n)) {
1589
1590                         if (!e) {
1591                                 char *s;
1592
1593                                 s = strdup("-.slice");
1594                                 if (!s)
1595                                         return -ENOMEM;
1596
1597                                 *slice = s;
1598                                 return 0;
1599                         }
1600
1601                         return cg_path_decode_unit(e, slice);
1602                 }
1603
1604                 e = p;
1605                 p += n;
1606         }
1607 }
1608
1609 int cg_pid_get_slice(pid_t pid, char **slice) {
1610         _cleanup_free_ char *cgroup = NULL;
1611         int r;
1612
1613         assert(slice);
1614
1615         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1616         if (r < 0)
1617                 return r;
1618
1619         return cg_path_get_slice(cgroup, slice);
1620 }
1621
1622 int cg_path_get_user_slice(const char *p, char **slice) {
1623         const char *t;
1624         assert(p);
1625         assert(slice);
1626
1627         t = skip_user_prefix(p);
1628         if (!t)
1629                 return -ENXIO;
1630
1631         /* And now it looks pretty much the same as for a system
1632          * slice, so let's just use the same parser from here on. */
1633         return cg_path_get_slice(t, slice);
1634 }
1635
1636 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1637         _cleanup_free_ char *cgroup = NULL;
1638         int r;
1639
1640         assert(slice);
1641
1642         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1643         if (r < 0)
1644                 return r;
1645
1646         return cg_path_get_user_slice(cgroup, slice);
1647 }
1648
1649 char *cg_escape(const char *p) {
1650         bool need_prefix = false;
1651
1652         /* This implements very minimal escaping for names to be used
1653          * as file names in the cgroup tree: any name which might
1654          * conflict with a kernel name or is prefixed with '_' is
1655          * prefixed with a '_'. That way, when reading cgroup names it
1656          * is sufficient to remove a single prefixing underscore if
1657          * there is one. */
1658
1659         /* The return value of this function (unlike cg_unescape())
1660          * needs free()! */
1661
1662         if (p[0] == 0 ||
1663             p[0] == '_' ||
1664             p[0] == '.' ||
1665             streq(p, "notify_on_release") ||
1666             streq(p, "release_agent") ||
1667             streq(p, "tasks") ||
1668             startswith(p, "cgroup."))
1669                 need_prefix = true;
1670         else {
1671                 const char *dot;
1672
1673                 dot = strrchr(p, '.');
1674                 if (dot) {
1675                         CGroupController c;
1676                         size_t l = dot - p;
1677
1678                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1679                                 const char *n;
1680
1681                                 n = cgroup_controller_to_string(c);
1682
1683                                 if (l != strlen(n))
1684                                         continue;
1685
1686                                 if (memcmp(p, n, l) != 0)
1687                                         continue;
1688
1689                                 need_prefix = true;
1690                                 break;
1691                         }
1692                 }
1693         }
1694
1695         if (need_prefix)
1696                 return strappend("_", p);
1697
1698         return strdup(p);
1699 }
1700
1701 char *cg_unescape(const char *p) {
1702         assert(p);
1703
1704         /* The return value of this function (unlike cg_escape())
1705          * doesn't need free()! */
1706
1707         if (p[0] == '_')
1708                 return (char*) p+1;
1709
1710         return (char*) p;
1711 }
1712
1713 #define CONTROLLER_VALID                        \
1714         DIGITS LETTERS                          \
1715         "_"
1716
1717 bool cg_controller_is_valid(const char *p) {
1718         const char *t, *s;
1719
1720         if (!p)
1721                 return false;
1722
1723         s = startswith(p, "name=");
1724         if (s)
1725                 p = s;
1726
1727         if (*p == 0 || *p == '_')
1728                 return false;
1729
1730         for (t = p; *t; t++)
1731                 if (!strchr(CONTROLLER_VALID, *t))
1732                         return false;
1733
1734         if (t - p > FILENAME_MAX)
1735                 return false;
1736
1737         return true;
1738 }
1739
1740 int cg_slice_to_path(const char *unit, char **ret) {
1741         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1742         const char *dash;
1743         int r;
1744
1745         assert(unit);
1746         assert(ret);
1747
1748         if (streq(unit, "-.slice")) {
1749                 char *x;
1750
1751                 x = strdup("");
1752                 if (!x)
1753                         return -ENOMEM;
1754                 *ret = x;
1755                 return 0;
1756         }
1757
1758         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1759                 return -EINVAL;
1760
1761         if (!endswith(unit, ".slice"))
1762                 return -EINVAL;
1763
1764         r = unit_name_to_prefix(unit, &p);
1765         if (r < 0)
1766                 return r;
1767
1768         dash = strchr(p, '-');
1769
1770         /* Don't allow initial dashes */
1771         if (dash == p)
1772                 return -EINVAL;
1773
1774         while (dash) {
1775                 _cleanup_free_ char *escaped = NULL;
1776                 char n[dash - p + sizeof(".slice")];
1777
1778                 /* Don't allow trailing or double dashes */
1779                 if (dash[1] == 0 || dash[1] == '-')
1780                         return -EINVAL;
1781
1782                 strcpy(stpncpy(n, p, dash - p), ".slice");
1783                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1784                         return -EINVAL;
1785
1786                 escaped = cg_escape(n);
1787                 if (!escaped)
1788                         return -ENOMEM;
1789
1790                 if (!strextend(&s, escaped, "/", NULL))
1791                         return -ENOMEM;
1792
1793                 dash = strchr(dash+1, '-');
1794         }
1795
1796         e = cg_escape(unit);
1797         if (!e)
1798                 return -ENOMEM;
1799
1800         if (!strextend(&s, e, NULL))
1801                 return -ENOMEM;
1802
1803         *ret = s;
1804         s = NULL;
1805
1806         return 0;
1807 }
1808
1809 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1810         _cleanup_free_ char *p = NULL;
1811         int r;
1812
1813         r = cg_get_path(controller, path, attribute, &p);
1814         if (r < 0)
1815                 return r;
1816
1817         return write_string_file(p, value, 0);
1818 }
1819
1820 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1821         _cleanup_free_ char *p = NULL;
1822         int r;
1823
1824         r = cg_get_path(controller, path, attribute, &p);
1825         if (r < 0)
1826                 return r;
1827
1828         return read_one_line_file(p, ret);
1829 }
1830
1831 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1832         CGroupController c;
1833         int r, unified;
1834
1835         /* This one will create a cgroup in our private tree, but also
1836          * duplicate it in the trees specified in mask, and remove it
1837          * in all others */
1838
1839         /* First create the cgroup in our own hierarchy. */
1840         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1841         if (r < 0)
1842                 return r;
1843
1844         /* If we are in the unified hierarchy, we are done now */
1845         unified = cg_unified();
1846         if (unified < 0)
1847                 return unified;
1848         if (unified > 0)
1849                 return 0;
1850
1851         /* Otherwise, do the same in the other hierarchies */
1852         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1853                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1854                 const char *n;
1855
1856                 n = cgroup_controller_to_string(c);
1857
1858                 if (mask & bit)
1859                         (void) cg_create(n, path);
1860                 else if (supported & bit)
1861                         (void) cg_trim(n, path, true);
1862         }
1863
1864         return 0;
1865 }
1866
1867 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1868         CGroupController c;
1869         int r, unified;
1870
1871         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1872         if (r < 0)
1873                 return r;
1874
1875         unified = cg_unified();
1876         if (unified < 0)
1877                 return unified;
1878         if (unified > 0)
1879                 return 0;
1880
1881         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1882                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1883                 const char *p = NULL;
1884
1885                 if (!(supported & bit))
1886                         continue;
1887
1888                 if (path_callback)
1889                         p = path_callback(bit, userdata);
1890
1891                 if (!p)
1892                         p = path;
1893
1894                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
1895         }
1896
1897         return 0;
1898 }
1899
1900 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1901         Iterator i;
1902         void *pidp;
1903         int r = 0;
1904
1905         SET_FOREACH(pidp, pids, i) {
1906                 pid_t pid = PTR_TO_PID(pidp);
1907                 int q;
1908
1909                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1910                 if (q < 0 && r >= 0)
1911                         r = q;
1912         }
1913
1914         return r;
1915 }
1916
1917 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1918         CGroupController c;
1919         int r = 0, unified;
1920
1921         if (!path_equal(from, to))  {
1922                 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true);
1923                 if (r < 0)
1924                         return r;
1925         }
1926
1927         unified = cg_unified();
1928         if (unified < 0)
1929                 return unified;
1930         if (unified > 0)
1931                 return r;
1932
1933         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1934                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1935                 const char *p = NULL;
1936
1937                 if (!(supported & bit))
1938                         continue;
1939
1940                 if (to_callback)
1941                         p = to_callback(bit, userdata);
1942
1943                 if (!p)
1944                         p = to;
1945
1946                 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, false, false);
1947         }
1948
1949         return 0;
1950 }
1951
1952 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
1953         CGroupController c;
1954         int r, unified;
1955
1956         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
1957         if (r < 0)
1958                 return r;
1959
1960         unified = cg_unified();
1961         if (unified < 0)
1962                 return unified;
1963         if (unified > 0)
1964                 return r;
1965
1966         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1967                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1968
1969                 if (!(supported & bit))
1970                         continue;
1971
1972                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
1973         }
1974
1975         return 0;
1976 }
1977
1978 int cg_mask_supported(CGroupMask *ret) {
1979         CGroupMask mask = 0;
1980         int r, unified;
1981
1982         /* Determines the mask of supported cgroup controllers. Only
1983          * includes controllers we can make sense of and that are
1984          * actually accessible. */
1985
1986         unified = cg_unified();
1987         if (unified < 0)
1988                 return unified;
1989         if (unified > 0) {
1990                 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
1991                 const char *c;
1992
1993                 /* In the unified hierarchy we can read the supported
1994                  * and accessible controllers from a the top-level
1995                  * cgroup attribute */
1996
1997                 r = cg_get_root_path(&root);
1998                 if (r < 0)
1999                         return r;
2000
2001                 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2002                 if (r < 0)
2003                         return r;
2004
2005                 r = read_one_line_file(path, &controllers);
2006                 if (r < 0)
2007                         return r;
2008
2009                 c = controllers;
2010                 for (;;) {
2011                         _cleanup_free_ char *n = NULL;
2012                         CGroupController v;
2013
2014                         r = extract_first_word(&c, &n, NULL, 0);
2015                         if (r < 0)
2016                                 return r;
2017                         if (r == 0)
2018                                 break;
2019
2020                         v = cgroup_controller_from_string(n);
2021                         if (v < 0)
2022                                 continue;
2023
2024                         mask |= CGROUP_CONTROLLER_TO_MASK(v);
2025                 }
2026
2027                 /* Currently, we only support the memory and pids
2028                  * controller in the unified hierarchy, mask
2029                  * everything else off. */
2030                 mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_PIDS;
2031
2032         } else {
2033                 CGroupController c;
2034
2035                 /* In the legacy hierarchy, we check whether which
2036                  * hierarchies are mounted. */
2037
2038                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2039                         const char *n;
2040
2041                         n = cgroup_controller_to_string(c);
2042                         if (controller_is_accessible(n) >= 0)
2043                                 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2044                 }
2045         }
2046
2047         *ret = mask;
2048         return 0;
2049 }
2050
2051 int cg_kernel_controllers(Set *controllers) {
2052         _cleanup_fclose_ FILE *f = NULL;
2053         char buf[LINE_MAX];
2054         int r;
2055
2056         assert(controllers);
2057
2058         /* Determines the full list of kernel-known controllers. Might
2059          * include controllers we don't actually support, arbitrary
2060          * named hierarchies and controllers that aren't currently
2061          * accessible (because not mounted). */
2062
2063         f = fopen("/proc/cgroups", "re");
2064         if (!f) {
2065                 if (errno == ENOENT)
2066                         return 0;
2067                 return -errno;
2068         }
2069
2070         /* Ignore the header line */
2071         (void) fgets(buf, sizeof(buf), f);
2072
2073         for (;;) {
2074                 char *controller;
2075                 int enabled = 0;
2076
2077                 errno = 0;
2078                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2079
2080                         if (feof(f))
2081                                 break;
2082
2083                         if (ferror(f) && errno != 0)
2084                                 return -errno;
2085
2086                         return -EBADMSG;
2087                 }
2088
2089                 if (!enabled) {
2090                         free(controller);
2091                         continue;
2092                 }
2093
2094                 if (!cg_controller_is_valid(controller)) {
2095                         free(controller);
2096                         return -EBADMSG;
2097                 }
2098
2099                 r = set_consume(controllers, controller);
2100                 if (r < 0)
2101                         return r;
2102         }
2103
2104         return 0;
2105 }
2106
2107 static thread_local int unified_cache = -1;
2108
2109 int cg_unified(void) {
2110         struct statfs fs;
2111
2112         /* Checks if we support the unified hierarchy. Returns an
2113          * error when the cgroup hierarchies aren't mounted yet or we
2114          * have any other trouble determining if the unified hierarchy
2115          * is supported. */
2116
2117         if (unified_cache >= 0)
2118                 return unified_cache;
2119
2120         if (statfs("/sys/fs/cgroup/", &fs) < 0)
2121                 return -errno;
2122
2123         if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC))
2124                 unified_cache = true;
2125         else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2126                 unified_cache = false;
2127         else
2128                 return -ENOEXEC;
2129
2130         return unified_cache;
2131 }
2132
2133 void cg_unified_flush(void) {
2134         unified_cache = -1;
2135 }
2136
2137 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2138         _cleanup_free_ char *fs = NULL;
2139         CGroupController c;
2140         int r, unified;
2141
2142         assert(p);
2143
2144         if (supported == 0)
2145                 return 0;
2146
2147         unified = cg_unified();
2148         if (unified < 0)
2149                 return unified;
2150         if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2151                 return 0;
2152
2153         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2154         if (r < 0)
2155                 return r;
2156
2157         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2158                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2159                 const char *n;
2160
2161                 if (!(supported & bit))
2162                         continue;
2163
2164                 n = cgroup_controller_to_string(c);
2165                 {
2166                         char s[1 + strlen(n) + 1];
2167
2168                         s[0] = mask & bit ? '+' : '-';
2169                         strcpy(s + 1, n);
2170
2171                         r = write_string_file(fs, s, 0);
2172                         if (r < 0)
2173                                 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2174                 }
2175         }
2176
2177         return 0;
2178 }
2179
2180 bool cg_is_unified_wanted(void) {
2181         static thread_local int wanted = -1;
2182         int r, unified;
2183
2184         /* If the hierarchy is already mounted, then follow whatever
2185          * was chosen for it. */
2186         unified = cg_unified();
2187         if (unified >= 0)
2188                 return unified;
2189
2190         /* Otherwise, let's see what the kernel command line has to
2191          * say. Since checking that is expensive, let's cache the
2192          * result. */
2193         if (wanted >= 0)
2194                 return wanted;
2195
2196         r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2197         if (r > 0)
2198                 return (wanted = true);
2199         else {
2200                 _cleanup_free_ char *value = NULL;
2201
2202                 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2203                 if (r < 0)
2204                         return false;
2205                 if (r == 0)
2206                         return (wanted = false);
2207
2208                 return (wanted = parse_boolean(value) > 0);
2209         }
2210 }
2211
2212 bool cg_is_legacy_wanted(void) {
2213         return !cg_is_unified_wanted();
2214 }
2215
2216 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2217         uint64_t u;
2218         int r;
2219
2220         if (isempty(s)) {
2221                 *ret = CGROUP_CPU_SHARES_INVALID;
2222                 return 0;
2223         }
2224
2225         r = safe_atou64(s, &u);
2226         if (r < 0)
2227                 return r;
2228
2229         if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2230                 return -ERANGE;
2231
2232         *ret = u;
2233         return 0;
2234 }
2235
2236 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2237         uint64_t u;
2238         int r;
2239
2240         if (isempty(s)) {
2241                 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2242                 return 0;
2243         }
2244
2245         r = safe_atou64(s, &u);
2246         if (r < 0)
2247                 return r;
2248
2249         if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2250                 return -ERANGE;
2251
2252         *ret = u;
2253         return 0;
2254 }
2255
2256 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2257         [CGROUP_CONTROLLER_CPU] = "cpu",
2258         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2259         [CGROUP_CONTROLLER_BLKIO] = "blkio",
2260         [CGROUP_CONTROLLER_MEMORY] = "memory",
2261         [CGROUP_CONTROLLER_DEVICES] = "devices",
2262         [CGROUP_CONTROLLER_PIDS] = "pids",
2263         [CGROUP_CONTROLLER_NET_CLS] = "net_cls",
2264 };
2265
2266 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);