src/basic/cgroup-util.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2010 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <errno.h>
  23 #include <unistd.h>
  24 #include <signal.h>
  25 #include <string.h>
  26 #include <stdlib.h>
  27 #include <dirent.h>
  28 #include <sys/stat.h>
  29 #include <sys/types.h>
  30 #include <ftw.h>
  31
  32 #include "set.h"
  33 #include "macro.h"
  34 #include "util.h"
  35 #include "formats-util.h"
  36 #include "process-util.h"
  37 #include "path-util.h"
  38 #include "unit-name.h"
  39 #include "fileio.h"
  40 #include "special.h"
  41 #include "mkdir.h"
  42 #include "login-util.h"
  43 #include "cgroup-util.h"
  44
  45 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
  46         _cleanup_free_ char *fs = NULL;
  47         FILE *f;
  48         int r;
  49
  50         assert(_f);
  51
  52         r = cg_get_path(controller, path, "cgroup.procs", &fs);
  53         if (r < 0)
  54                 return r;
  55
  56         f = fopen(fs, "re");
  57         if (!f)
  58                 return -errno;
  59
  60         *_f = f;
  61         return 0;
  62 }
  63
  64 int cg_read_pid(FILE *f, pid_t *_pid) {
  65         unsigned long ul;
  66
  67         /* Note that the cgroup.procs might contain duplicates! See
  68          * cgroups.txt for details. */
  69
  70         assert(f);
  71         assert(_pid);
  72
  73         errno = 0;
  74         if (fscanf(f, "%lu", &ul) != 1) {
  75
  76                 if (feof(f))
  77                         return 0;
  78
  79                 return errno ? -errno : -EIO;
  80         }
  81
  82         if (ul <= 0)
  83                 return -EIO;
  84
  85         *_pid = (pid_t) ul;
  86         return 1;
  87 }
  88
  89 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
  90         _cleanup_free_ char *fs = NULL;
  91         int r;
  92         DIR *d;
  93
  94         assert(_d);
  95
  96         /* This is not recursive! */
  97
  98         r = cg_get_path(controller, path, NULL, &fs);
  99         if (r < 0)
 100                 return r;
 101
 102         d = opendir(fs);
 103         if (!d)
 104                 return -errno;
 105
 106         *_d = d;
 107         return 0;
 108 }
 109
 110 int cg_read_subgroup(DIR *d, char **fn) {
 111         struct dirent *de;
 112
 113         assert(d);
 114         assert(fn);
 115
 116         FOREACH_DIRENT_ALL(de, d, return -errno) {
 117                 char *b;
 118
 119                 if (de->d_type != DT_DIR)
 120                         continue;
 121
 122                 if (streq(de->d_name, ".") ||
 123                     streq(de->d_name, ".."))
 124                         continue;
 125
 126                 b = strdup(de->d_name);
 127                 if (!b)
 128                         return -ENOMEM;
 129
 130                 *fn = b;
 131                 return 1;
 132         }
 133
 134         return 0;
 135 }
 136
 137 int cg_rmdir(const char *controller, const char *path) {
 138         _cleanup_free_ char *p = NULL;
 139         int r;
 140
 141         r = cg_get_path(controller, path, NULL, &p);
 142         if (r < 0)
 143                 return r;
 144
 145         r = rmdir(p);
 146         if (r < 0 && errno != ENOENT)
 147                 return -errno;
 148
 149         return 0;
 150 }
 151
 152 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
 153         _cleanup_set_free_ Set *allocated_set = NULL;
 154         bool done = false;
 155         int r, ret = 0;
 156         pid_t my_pid;
 157
 158         assert(sig >= 0);
 159
 160         /* This goes through the tasks list and kills them all. This
 161          * is repeated until no further processes are added to the
 162          * tasks list, to properly handle forking processes */
 163
 164         if (!s) {
 165                 s = allocated_set = set_new(NULL);
 166                 if (!s)
 167                         return -ENOMEM;
 168         }
 169
 170         my_pid = getpid();
 171
 172         do {
 173                 _cleanup_fclose_ FILE *f = NULL;
 174                 pid_t pid = 0;
 175                 done = true;
 176
 177                 r = cg_enumerate_processes(controller, path, &f);
 178                 if (r < 0) {
 179                         if (ret >= 0 && r != -ENOENT)
 180                                 return r;
 181
 182                         return ret;
 183                 }
 184
 185                 while ((r = cg_read_pid(f, &pid)) > 0) {
 186
 187                         if (ignore_self && pid == my_pid)
 188                                 continue;
 189
 190                         if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
 191                                 continue;
 192
 193                         /* If we haven't killed this process yet, kill
 194                          * it */
 195                         if (kill(pid, sig) < 0) {
 196                                 if (ret >= 0 && errno != ESRCH)
 197                                         ret = -errno;
 198                         } else {
 199                                 if (sigcont && sig != SIGKILL)
 200                                         (void) kill(pid, SIGCONT);
 201
 202                                 if (ret == 0)
 203                                         ret = 1;
 204                         }
 205
 206                         done = false;
 207
 208                         r = set_put(s, LONG_TO_PTR(pid));
 209                         if (r < 0) {
 210                                 if (ret >= 0)
 211                                         return r;
 212
 213                                 return ret;
 214                         }
 215                 }
 216
 217                 if (r < 0) {
 218                         if (ret >= 0)
 219                                 return r;
 220
 221                         return ret;
 222                 }
 223
 224                 /* To avoid racing against processes which fork
 225                  * quicker than we can kill them we repeat this until
 226                  * no new pids need to be killed. */
 227
 228         } while (!done);
 229
 230         return ret;
 231 }
 232
 233 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
 234         _cleanup_set_free_ Set *allocated_set = NULL;
 235         _cleanup_closedir_ DIR *d = NULL;
 236         int r, ret;
 237         char *fn;
 238
 239         assert(path);
 240         assert(sig >= 0);
 241
 242         if (!s) {
 243                 s = allocated_set = set_new(NULL);
 244                 if (!s)
 245                         return -ENOMEM;
 246         }
 247
 248         ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
 249
 250         r = cg_enumerate_subgroups(controller, path, &d);
 251         if (r < 0) {
 252                 if (ret >= 0 && r != -ENOENT)
 253                         return r;
 254
 255                 return ret;
 256         }
 257
 258         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 259                 _cleanup_free_ char *p = NULL;
 260
 261                 p = strjoin(path, "/", fn, NULL);
 262                 free(fn);
 263                 if (!p)
 264                         return -ENOMEM;
 265
 266                 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
 267                 if (r != 0 && ret >= 0)
 268                         ret = r;
 269         }
 270
 271         if (ret >= 0 && r < 0)
 272                 ret = r;
 273
 274         if (rem) {
 275                 r = cg_rmdir(controller, path);
 276                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 277                         return r;
 278         }
 279
 280         return ret;
 281 }
 282
 283 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
 284         bool done = false;
 285         _cleanup_set_free_ Set *s = NULL;
 286         int r, ret = 0;
 287         pid_t my_pid;
 288
 289         assert(cfrom);
 290         assert(pfrom);
 291         assert(cto);
 292         assert(pto);
 293
 294         s = set_new(NULL);
 295         if (!s)
 296                 return -ENOMEM;
 297
 298         my_pid = getpid();
 299
 300         do {
 301                 _cleanup_fclose_ FILE *f = NULL;
 302                 pid_t pid = 0;
 303                 done = true;
 304
 305                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 306                 if (r < 0) {
 307                         if (ret >= 0 && r != -ENOENT)
 308                                 return r;
 309
 310                         return ret;
 311                 }
 312
 313                 while ((r = cg_read_pid(f, &pid)) > 0) {
 314
 315                         /* This might do weird stuff if we aren't a
 316                          * single-threaded program. However, we
 317                          * luckily know we are not */
 318                         if (ignore_self && pid == my_pid)
 319                                 continue;
 320
 321                         if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
 322                                 continue;
 323
 324                         /* Ignore kernel threads. Since they can only
 325                          * exist in the root cgroup, we only check for
 326                          * them there. */
 327                         if (cfrom &&
 328                             (isempty(pfrom) || path_equal(pfrom, "/")) &&
 329                             is_kernel_thread(pid) > 0)
 330                                 continue;
 331
 332                         r = cg_attach(cto, pto, pid);
 333                         if (r < 0) {
 334                                 if (ret >= 0 && r != -ESRCH)
 335                                         ret = r;
 336                         } else if (ret == 0)
 337                                 ret = 1;
 338
 339                         done = false;
 340
 341                         r = set_put(s, LONG_TO_PTR(pid));
 342                         if (r < 0) {
 343                                 if (ret >= 0)
 344                                         return r;
 345
 346                                 return ret;
 347                         }
 348                 }
 349
 350                 if (r < 0) {
 351                         if (ret >= 0)
 352                                 return r;
 353
 354                         return ret;
 355                 }
 356         } while (!done);
 357
 358         return ret;
 359 }
 360
 361 int cg_migrate_recursive(
 362                 const char *cfrom,
 363                 const char *pfrom,
 364                 const char *cto,
 365                 const char *pto,
 366                 bool ignore_self,
 367                 bool rem) {
 368
 369         _cleanup_closedir_ DIR *d = NULL;
 370         int r, ret = 0;
 371         char *fn;
 372
 373         assert(cfrom);
 374         assert(pfrom);
 375         assert(cto);
 376         assert(pto);
 377
 378         ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
 379
 380         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 381         if (r < 0) {
 382                 if (ret >= 0 && r != -ENOENT)
 383                         return r;
 384
 385                 return ret;
 386         }
 387
 388         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 389                 _cleanup_free_ char *p = NULL;
 390
 391                 p = strjoin(pfrom, "/", fn, NULL);
 392                 free(fn);
 393                 if (!p)
 394                         return -ENOMEM;
 395
 396                 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
 397                 if (r != 0 && ret >= 0)
 398                         ret = r;
 399         }
 400
 401         if (r < 0 && ret >= 0)
 402                 ret = r;
 403
 404         if (rem) {
 405                 r = cg_rmdir(cfrom, pfrom);
 406                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 407                         return r;
 408         }
 409
 410         return ret;
 411 }
 412
 413 int cg_migrate_recursive_fallback(
 414                 const char *cfrom,
 415                 const char *pfrom,
 416                 const char *cto,
 417                 const char *pto,
 418                 bool ignore_self,
 419                 bool rem) {
 420
 421         int r;
 422
 423         assert(cfrom);
 424         assert(pfrom);
 425         assert(cto);
 426         assert(pto);
 427
 428         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem);
 429         if (r < 0) {
 430                 char prefix[strlen(pto) + 1];
 431
 432                 /* This didn't work? Then let's try all prefixes of the destination */
 433
 434                 PATH_FOREACH_PREFIX(prefix, pto) {
 435                         int q;
 436
 437                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem);
 438                         if (q >= 0)
 439                                 return q;
 440                 }
 441         }
 442
 443         return r;
 444 }
 445
 446 static const char *controller_to_dirname(const char *controller) {
 447         const char *e;
 448
 449         assert(controller);
 450
 451         /* Converts a controller name to the directory name below
 452          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
 453          * just cuts off the name= prefixed used for named
 454          * hierarchies, if it is specified. */
 455
 456         e = startswith(controller, "name=");
 457         if (e)
 458                 return e;
 459
 460         return controller;
 461 }
 462
 463 static int join_path_legacy(const char *controller_dn, const char *path, const char *suffix, char **fs) {
 464         char *t = NULL;
 465
 466         assert(fs);
 467         assert(controller_dn);
 468
 469         if (isempty(path) && isempty(suffix))
 470                 t = strappend("/sys/fs/cgroup/", controller_dn);
 471         else if (isempty(path))
 472                 t = strjoin("/sys/fs/cgroup/", controller_dn, "/", suffix, NULL);
 473         else if (isempty(suffix))
 474                 t = strjoin("/sys/fs/cgroup/", controller_dn, "/", path, NULL);
 475         else
 476                 t = strjoin("/sys/fs/cgroup/", controller_dn, "/", path, "/", suffix, NULL);
 477         if (!t)
 478                 return -ENOMEM;
 479
 480         *fs = t;
 481         return 0;
 482 }
 483
 484 static int join_path_unified(const char *path, const char *suffix, char **fs) {
 485         char *t;
 486
 487         assert(fs);
 488
 489         if (isempty(path) && isempty(suffix))
 490                 t = strdup("/sys/fs/cgroup");
 491         else if (isempty(path))
 492                 t = strappend("/sys/fs/cgroup/", suffix);
 493         else if (isempty(suffix))
 494                 t = strappend("/sys/fs/cgroup/", path);
 495         else
 496                 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
 497         if (!t)
 498                 return -ENOMEM;
 499
 500         *fs = t;
 501         return 0;
 502 }
 503
 504 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
 505         int unified, r;
 506
 507         assert(fs);
 508
 509         if (!controller) {
 510                 char *t;
 511
 512                 /* If no controller is specified, we assume only the
 513                  * path below the controller matters */
 514
 515                 if (!path && !suffix)
 516                         return -EINVAL;
 517
 518                 if (isempty(suffix))
 519                         t = strdup(path);
 520                 else if (isempty(path))
 521                         t = strdup(suffix);
 522                 else
 523                         t = strjoin(path, "/", suffix, NULL);
 524                 if (!t)
 525                         return -ENOMEM;
 526
 527                 *fs = path_kill_slashes(t);
 528                 return 0;
 529         }
 530
 531         if (!cg_controller_is_valid(controller))
 532                 return -EINVAL;
 533
 534         unified = cg_unified();
 535         if (unified < 0)
 536                 return unified;
 537
 538         if (unified > 0)
 539                 r = join_path_unified(path, suffix, fs);
 540         else {
 541                 const char *dn;
 542
 543                 if (controller)
 544                         dn = controller_to_dirname(controller);
 545                 else
 546                         dn = NULL;
 547
 548                 r = join_path_legacy(dn, path, suffix, fs);
 549         }
 550
 551         if (r < 0)
 552                 return r;
 553
 554         path_kill_slashes(*fs);
 555         return 0;
 556 }
 557
 558 static int controller_is_accessible(const char *controller) {
 559         int unified;
 560
 561         assert(controller);
 562
 563         /* Checks whether a specific controller is accessible,
 564          * i.e. its hierarchy mounted. In the unified hierarchy all
 565          * controllers are considered accessible, except for the named
 566          * hierarchies */
 567
 568         if (!cg_controller_is_valid(controller))
 569                 return -EINVAL;
 570
 571         unified = cg_unified();
 572         if (unified < 0)
 573                 return unified;
 574         if (unified > 0) {
 575                 /* We don't support named hierarchies if we are using
 576                  * the unified hierarchy. */
 577
 578                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
 579                         return 0;
 580
 581                 if (startswith(controller, "name="))
 582                         return -EOPNOTSUPP;
 583
 584         } else {
 585                 const char *cc, *dn;
 586
 587                 dn = controller_to_dirname(controller);
 588                 cc = strjoina("/sys/fs/cgroup/", dn);
 589
 590                 if (laccess(cc, F_OK) < 0)
 591                         return -errno;
 592         }
 593
 594         return 0;
 595 }
 596
 597 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
 598         int r;
 599
 600         assert(controller);
 601         assert(fs);
 602
 603         /* Check if the specified controller is actually accessible */
 604         r = controller_is_accessible(controller);
 605         if (r < 0)
 606                 return r;
 607
 608         return cg_get_path(controller, path, suffix, fs);
 609 }
 610
 611 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 612         assert(path);
 613         assert(sb);
 614         assert(ftwbuf);
 615
 616         if (typeflag != FTW_DP)
 617                 return 0;
 618
 619         if (ftwbuf->level < 1)
 620                 return 0;
 621
 622         (void) rmdir(path);
 623         return 0;
 624 }
 625
 626 int cg_trim(const char *controller, const char *path, bool delete_root) {
 627         _cleanup_free_ char *fs = NULL;
 628         int r = 0;
 629
 630         assert(path);
 631
 632         r = cg_get_path(controller, path, NULL, &fs);
 633         if (r < 0)
 634                 return r;
 635
 636         errno = 0;
 637         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 638                 if (errno == ENOENT)
 639                         r = 0;
 640                 else if (errno != 0)
 641                         r = -errno;
 642                 else
 643                         r = -EIO;
 644         }
 645
 646         if (delete_root) {
 647                 if (rmdir(fs) < 0 && errno != ENOENT)
 648                         return -errno;
 649         }
 650
 651         return r;
 652 }
 653
 654 int cg_create(const char *controller, const char *path) {
 655         _cleanup_free_ char *fs = NULL;
 656         int r;
 657
 658         r = cg_get_path_and_check(controller, path, NULL, &fs);
 659         if (r < 0)
 660                 return r;
 661
 662         r = mkdir_parents(fs, 0755);
 663         if (r < 0)
 664                 return r;
 665
 666         if (mkdir(fs, 0755) < 0) {
 667
 668                 if (errno == EEXIST)
 669                         return 0;
 670
 671                 return -errno;
 672         }
 673
 674         return 1;
 675 }
 676
 677 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 678         int r, q;
 679
 680         assert(pid >= 0);
 681
 682         r = cg_create(controller, path);
 683         if (r < 0)
 684                 return r;
 685
 686         q = cg_attach(controller, path, pid);
 687         if (q < 0)
 688                 return q;
 689
 690         /* This does not remove the cgroup on failure */
 691         return r;
 692 }
 693
 694 int cg_attach(const char *controller, const char *path, pid_t pid) {
 695         _cleanup_free_ char *fs = NULL;
 696         char c[DECIMAL_STR_MAX(pid_t) + 2];
 697         int r;
 698
 699         assert(path);
 700         assert(pid >= 0);
 701
 702         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 703         if (r < 0)
 704                 return r;
 705
 706         if (pid == 0)
 707                 pid = getpid();
 708
 709         snprintf(c, sizeof(c), PID_FMT"\n", pid);
 710
 711         return write_string_file(fs, c, 0);
 712 }
 713
 714 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 715         int r;
 716
 717         assert(controller);
 718         assert(path);
 719         assert(pid >= 0);
 720
 721         r = cg_attach(controller, path, pid);
 722         if (r < 0) {
 723                 char prefix[strlen(path) + 1];
 724
 725                 /* This didn't work? Then let's try all prefixes of
 726                  * the destination */
 727
 728                 PATH_FOREACH_PREFIX(prefix, path) {
 729                         int q;
 730
 731                         q = cg_attach(controller, prefix, pid);
 732                         if (q >= 0)
 733                                 return q;
 734                 }
 735         }
 736
 737         return r;
 738 }
 739
 740 int cg_set_group_access(
 741                 const char *controller,
 742                 const char *path,
 743                 mode_t mode,
 744                 uid_t uid,
 745                 gid_t gid) {
 746
 747         _cleanup_free_ char *fs = NULL;
 748         int r;
 749
 750         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 751                 return 0;
 752
 753         if (mode != MODE_INVALID)
 754                 mode &= 0777;
 755
 756         r = cg_get_path(controller, path, NULL, &fs);
 757         if (r < 0)
 758                 return r;
 759
 760         return chmod_and_chown(fs, mode, uid, gid);
 761 }
 762
 763 int cg_set_task_access(
 764                 const char *controller,
 765                 const char *path,
 766                 mode_t mode,
 767                 uid_t uid,
 768                 gid_t gid) {
 769
 770         _cleanup_free_ char *fs = NULL, *procs = NULL;
 771         int r, unified;
 772
 773         assert(path);
 774
 775         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 776                 return 0;
 777
 778         if (mode != MODE_INVALID)
 779                 mode &= 0666;
 780
 781         r = cg_get_path(controller, path, "cgroup.procs", &fs);
 782         if (r < 0)
 783                 return r;
 784
 785         r = chmod_and_chown(fs, mode, uid, gid);
 786         if (r < 0)
 787                 return r;
 788
 789         unified = cg_unified();
 790         if (unified < 0)
 791                 return unified;
 792         if (unified)
 793                 return 0;
 794
 795         /* Compatibility, Always keep values for "tasks" in sync with
 796          * "cgroup.procs" */
 797         if (cg_get_path(controller, path, "tasks", &procs) >= 0)
 798                 (void) chmod_and_chown(procs, mode, uid, gid);
 799
 800         return 0;
 801 }
 802
 803 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
 804         _cleanup_fclose_ FILE *f = NULL;
 805         char line[LINE_MAX];
 806         const char *fs;
 807         size_t cs = 0;
 808         int unified;
 809
 810         assert(path);
 811         assert(pid >= 0);
 812
 813         unified = cg_unified();
 814         if (unified < 0)
 815                 return unified;
 816         if (unified == 0) {
 817                 if (controller) {
 818                         if (!cg_controller_is_valid(controller))
 819                                 return -EINVAL;
 820                 } else
 821                         controller = SYSTEMD_CGROUP_CONTROLLER;
 822
 823                 cs = strlen(controller);
 824         }
 825
 826         fs = procfs_file_alloca(pid, "cgroup");
 827         f = fopen(fs, "re");
 828         if (!f)
 829                 return errno == ENOENT ? -ESRCH : -errno;
 830
 831         FOREACH_LINE(line, f, return -errno) {
 832                 char *e, *p;
 833
 834                 truncate_nl(line);
 835
 836                 if (unified) {
 837                         e = startswith(line, "0:");
 838                         if (!e)
 839                                 continue;
 840
 841                         e = strchr(e, ':');
 842                         if (!e)
 843                                 continue;
 844                 } else {
 845                         char *l;
 846                         size_t k;
 847                         const char *word, *state;
 848                         bool found = false;
 849
 850                         l = strchr(line, ':');
 851                         if (!l)
 852                                 continue;
 853
 854                         l++;
 855                         e = strchr(l, ':');
 856                         if (!e)
 857                                 continue;
 858
 859                         *e = 0;
 860                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
 861                                 if (k == cs && memcmp(word, controller, cs) == 0) {
 862                                         found = true;
 863                                         break;
 864                                 }
 865                         }
 866
 867                         if (!found)
 868                                 continue;
 869                 }
 870
 871                 p = strdup(e + 1);
 872                 if (!p)
 873                         return -ENOMEM;
 874
 875                 *path = p;
 876                 return 0;
 877         }
 878
 879         return -ENOENT;
 880 }
 881
 882 int cg_install_release_agent(const char *controller, const char *agent) {
 883         _cleanup_free_ char *fs = NULL, *contents = NULL;
 884         const char *sc;
 885         int r, unified;
 886
 887         assert(agent);
 888
 889         unified = cg_unified();
 890         if (unified < 0)
 891                 return unified;
 892         if (unified) /* doesn't apply to unified hierarchy */
 893                 return -EOPNOTSUPP;
 894
 895         r = cg_get_path(controller, NULL, "release_agent", &fs);
 896         if (r < 0)
 897                 return r;
 898
 899         r = read_one_line_file(fs, &contents);
 900         if (r < 0)
 901                 return r;
 902
 903         sc = strstrip(contents);
 904         if (isempty(sc)) {
 905                 r = write_string_file(fs, agent, 0);
 906                 if (r < 0)
 907                         return r;
 908         } else if (!streq(sc, agent))
 909                 return -EEXIST;
 910
 911         fs = mfree(fs);
 912         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 913         if (r < 0)
 914                 return r;
 915
 916         contents = mfree(contents);
 917         r = read_one_line_file(fs, &contents);
 918         if (r < 0)
 919                 return r;
 920
 921         sc = strstrip(contents);
 922         if (streq(sc, "0")) {
 923                 r = write_string_file(fs, "1", 0);
 924                 if (r < 0)
 925                         return r;
 926
 927                 return 1;
 928         }
 929
 930         if (!streq(sc, "1"))
 931                 return -EIO;
 932
 933         return 0;
 934 }
 935
 936 int cg_uninstall_release_agent(const char *controller) {
 937         _cleanup_free_ char *fs = NULL;
 938         int r, unified;
 939
 940         unified = cg_unified();
 941         if (unified < 0)
 942                 return unified;
 943         if (unified) /* Doesn't apply to unified hierarchy */
 944                 return -EOPNOTSUPP;
 945
 946         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 947         if (r < 0)
 948                 return r;
 949
 950         r = write_string_file(fs, "0", 0);
 951         if (r < 0)
 952                 return r;
 953
 954         fs = mfree(fs);
 955
 956         r = cg_get_path(controller, NULL, "release_agent", &fs);
 957         if (r < 0)
 958                 return r;
 959
 960         r = write_string_file(fs, "", 0);
 961         if (r < 0)
 962                 return r;
 963
 964         return 0;
 965 }
 966
 967 int cg_is_empty(const char *controller, const char *path) {
 968         _cleanup_fclose_ FILE *f = NULL;
 969         pid_t pid;
 970         int r;
 971
 972         assert(path);
 973
 974         r = cg_enumerate_processes(controller, path, &f);
 975         if (r == -ENOENT)
 976                 return 1;
 977         if (r < 0)
 978                 return r;
 979
 980         r = cg_read_pid(f, &pid);
 981         if (r < 0)
 982                 return r;
 983
 984         return r == 0;
 985 }
 986
 987 int cg_is_empty_recursive(const char *controller, const char *path) {
 988         int unified, r;
 989
 990         assert(path);
 991
 992         /* The root cgroup is always populated */
 993         if (controller && (isempty(path) || path_equal(path, "/")))
 994                 return false;
 995
 996         unified = cg_unified();
 997         if (unified < 0)
 998                 return unified;
 999
1000         if (unified > 0) {
1001                 _cleanup_free_ char *populated = NULL, *t = NULL;
1002
1003                 /* On the unified hierarchy we can check empty state
1004                  * via the "cgroup.populated" attribute. */
1005
1006                 r = cg_get_path(controller, path, "cgroup.populated", &populated);
1007                 if (r < 0)
1008                         return r;
1009
1010                 r = read_one_line_file(populated, &t);
1011                 if (r < 0)
1012                         return r;
1013
1014                 return streq(t, "0");
1015         } else {
1016                 _cleanup_closedir_ DIR *d = NULL;
1017                 char *fn;
1018
1019                 r = cg_is_empty(controller, path);
1020                 if (r <= 0)
1021                         return r;
1022
1023                 r = cg_enumerate_subgroups(controller, path, &d);
1024                 if (r == -ENOENT)
1025                         return 1;
1026                 if (r < 0)
1027                         return r;
1028
1029                 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1030                         _cleanup_free_ char *p = NULL;
1031
1032                         p = strjoin(path, "/", fn, NULL);
1033                         free(fn);
1034                         if (!p)
1035                                 return -ENOMEM;
1036
1037                         r = cg_is_empty_recursive(controller, p);
1038                         if (r <= 0)
1039                                 return r;
1040                 }
1041                 if (r < 0)
1042                         return r;
1043
1044                 return true;
1045         }
1046 }
1047
1048 int cg_split_spec(const char *spec, char **controller, char **path) {
1049         char *t = NULL, *u = NULL;
1050         const char *e;
1051
1052         assert(spec);
1053
1054         if (*spec == '/') {
1055                 if (!path_is_safe(spec))
1056                         return -EINVAL;
1057
1058                 if (path) {
1059                         t = strdup(spec);
1060                         if (!t)
1061                                 return -ENOMEM;
1062
1063                         *path = path_kill_slashes(t);
1064                 }
1065
1066                 if (controller)
1067                         *controller = NULL;
1068
1069                 return 0;
1070         }
1071
1072         e = strchr(spec, ':');
1073         if (!e) {
1074                 if (!cg_controller_is_valid(spec))
1075                         return -EINVAL;
1076
1077                 if (controller) {
1078                         t = strdup(spec);
1079                         if (!t)
1080                                 return -ENOMEM;
1081
1082                         *controller = t;
1083                 }
1084
1085                 if (path)
1086                         *path = NULL;
1087
1088                 return 0;
1089         }
1090
1091         t = strndup(spec, e-spec);
1092         if (!t)
1093                 return -ENOMEM;
1094         if (!cg_controller_is_valid(t)) {
1095                 free(t);
1096                 return -EINVAL;
1097         }
1098
1099         if (isempty(e+1))
1100                 u = NULL;
1101         else {
1102                 u = strdup(e+1);
1103                 if (!u) {
1104                         free(t);
1105                         return -ENOMEM;
1106                 }
1107
1108                 if (!path_is_safe(u) ||
1109                     !path_is_absolute(u)) {
1110                         free(t);
1111                         free(u);
1112                         return -EINVAL;
1113                 }
1114
1115                 path_kill_slashes(u);
1116         }
1117
1118         if (controller)
1119                 *controller = t;
1120         else
1121                 free(t);
1122
1123         if (path)
1124                 *path = u;
1125         else
1126                 free(u);
1127
1128         return 0;
1129 }
1130
1131 int cg_mangle_path(const char *path, char **result) {
1132         _cleanup_free_ char *c = NULL, *p = NULL;
1133         char *t;
1134         int r;
1135
1136         assert(path);
1137         assert(result);
1138
1139         /* First, check if it already is a filesystem path */
1140         if (path_startswith(path, "/sys/fs/cgroup")) {
1141
1142                 t = strdup(path);
1143                 if (!t)
1144                         return -ENOMEM;
1145
1146                 *result = path_kill_slashes(t);
1147                 return 0;
1148         }
1149
1150         /* Otherwise, treat it as cg spec */
1151         r = cg_split_spec(path, &c, &p);
1152         if (r < 0)
1153                 return r;
1154
1155         return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1156 }
1157
1158 int cg_get_root_path(char **path) {
1159         char *p, *e;
1160         int r;
1161
1162         assert(path);
1163
1164         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1165         if (r < 0)
1166                 return r;
1167
1168         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1169         if (!e)
1170                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1171         if (!e)
1172                 e = endswith(p, "/system"); /* even more legacy */
1173         if (e)
1174                 *e = 0;
1175
1176         *path = p;
1177         return 0;
1178 }
1179
1180 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1181         _cleanup_free_ char *rt = NULL;
1182         char *p;
1183         int r;
1184
1185         assert(cgroup);
1186         assert(shifted);
1187
1188         if (!root) {
1189                 /* If the root was specified let's use that, otherwise
1190                  * let's determine it from PID 1 */
1191
1192                 r = cg_get_root_path(&rt);
1193                 if (r < 0)
1194                         return r;
1195
1196                 root = rt;
1197         }
1198
1199         p = path_startswith(cgroup, root);
1200         if (p && p > cgroup)
1201                 *shifted = p - 1;
1202         else
1203                 *shifted = cgroup;
1204
1205         return 0;
1206 }
1207
1208 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1209         _cleanup_free_ char *raw = NULL;
1210         const char *c;
1211         int r;
1212
1213         assert(pid >= 0);
1214         assert(cgroup);
1215
1216         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1217         if (r < 0)
1218                 return r;
1219
1220         r = cg_shift_path(raw, root, &c);
1221         if (r < 0)
1222                 return r;
1223
1224         if (c == raw) {
1225                 *cgroup = raw;
1226                 raw = NULL;
1227         } else {
1228                 char *n;
1229
1230                 n = strdup(c);
1231                 if (!n)
1232                         return -ENOMEM;
1233
1234                 *cgroup = n;
1235         }
1236
1237         return 0;
1238 }
1239
1240 int cg_path_decode_unit(const char *cgroup, char **unit){
1241         char *c, *s;
1242         size_t n;
1243
1244         assert(cgroup);
1245         assert(unit);
1246
1247         n = strcspn(cgroup, "/");
1248         if (n < 3)
1249                 return -ENXIO;
1250
1251         c = strndupa(cgroup, n);
1252         c = cg_unescape(c);
1253
1254         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1255                 return -ENXIO;
1256
1257         s = strdup(c);
1258         if (!s)
1259                 return -ENOMEM;
1260
1261         *unit = s;
1262         return 0;
1263 }
1264
1265 static bool valid_slice_name(const char *p, size_t n) {
1266
1267         if (!p)
1268                 return false;
1269
1270         if (n < strlen("x.slice"))
1271                 return false;
1272
1273         if (memcmp(p + n - 6, ".slice", 6) == 0) {
1274                 char buf[n+1], *c;
1275
1276                 memcpy(buf, p, n);
1277                 buf[n] = 0;
1278
1279                 c = cg_unescape(buf);
1280
1281                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1282         }
1283
1284         return false;
1285 }
1286
1287 static const char *skip_slices(const char *p) {
1288         assert(p);
1289
1290         /* Skips over all slice assignments */
1291
1292         for (;;) {
1293                 size_t n;
1294
1295                 p += strspn(p, "/");
1296
1297                 n = strcspn(p, "/");
1298                 if (!valid_slice_name(p, n))
1299                         return p;
1300
1301                 p += n;
1302         }
1303 }
1304
1305 int cg_path_get_unit(const char *path, char **ret) {
1306         const char *e;
1307         char *unit;
1308         int r;
1309
1310         assert(path);
1311         assert(ret);
1312
1313         e = skip_slices(path);
1314
1315         r = cg_path_decode_unit(e, &unit);
1316         if (r < 0)
1317                 return r;
1318
1319         /* We skipped over the slices, don't accept any now */
1320         if (endswith(unit, ".slice")) {
1321                 free(unit);
1322                 return -ENXIO;
1323         }
1324
1325         *ret = unit;
1326         return 0;
1327 }
1328
1329 int cg_pid_get_unit(pid_t pid, char **unit) {
1330         _cleanup_free_ char *cgroup = NULL;
1331         int r;
1332
1333         assert(unit);
1334
1335         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1336         if (r < 0)
1337                 return r;
1338
1339         return cg_path_get_unit(cgroup, unit);
1340 }
1341
1342 /**
1343  * Skip session-*.scope, but require it to be there.
1344  */
1345 static const char *skip_session(const char *p) {
1346         size_t n;
1347
1348         if (isempty(p))
1349                 return NULL;
1350
1351         p += strspn(p, "/");
1352
1353         n = strcspn(p, "/");
1354         if (n < strlen("session-x.scope"))
1355                 return NULL;
1356
1357         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1358                 char buf[n - 8 - 6 + 1];
1359
1360                 memcpy(buf, p + 8, n - 8 - 6);
1361                 buf[n - 8 - 6] = 0;
1362
1363                 /* Note that session scopes never need unescaping,
1364                  * since they cannot conflict with the kernel's own
1365                  * names, hence we don't need to call cg_unescape()
1366                  * here. */
1367
1368                 if (!session_id_valid(buf))
1369                         return false;
1370
1371                 p += n;
1372                 p += strspn(p, "/");
1373                 return p;
1374         }
1375
1376         return NULL;
1377 }
1378
1379 /**
1380  * Skip user@*.service, but require it to be there.
1381  */
1382 static const char *skip_user_manager(const char *p) {
1383         size_t n;
1384
1385         if (isempty(p))
1386                 return NULL;
1387
1388         p += strspn(p, "/");
1389
1390         n = strcspn(p, "/");
1391         if (n < strlen("user@x.service"))
1392                 return NULL;
1393
1394         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1395                 char buf[n - 5 - 8 + 1];
1396
1397                 memcpy(buf, p + 5, n - 5 - 8);
1398                 buf[n - 5 - 8] = 0;
1399
1400                 /* Note that user manager services never need unescaping,
1401                  * since they cannot conflict with the kernel's own
1402                  * names, hence we don't need to call cg_unescape()
1403                  * here. */
1404
1405                 if (parse_uid(buf, NULL) < 0)
1406                         return NULL;
1407
1408                 p += n;
1409                 p += strspn(p, "/");
1410
1411                 return p;
1412         }
1413
1414         return NULL;
1415 }
1416
1417 static const char *skip_user_prefix(const char *path) {
1418         const char *e, *t;
1419
1420         assert(path);
1421
1422         /* Skip slices, if there are any */
1423         e = skip_slices(path);
1424
1425         /* Skip the user manager, if it's in the path now... */
1426         t = skip_user_manager(e);
1427         if (t)
1428                 return t;
1429
1430         /* Alternatively skip the user session if it is in the path... */
1431         return skip_session(e);
1432 }
1433
1434 int cg_path_get_user_unit(const char *path, char **ret) {
1435         const char *t;
1436
1437         assert(path);
1438         assert(ret);
1439
1440         t = skip_user_prefix(path);
1441         if (!t)
1442                 return -ENXIO;
1443
1444         /* And from here on it looks pretty much the same as for a
1445          * system unit, hence let's use the same parser from here
1446          * on. */
1447         return cg_path_get_unit(t, ret);
1448 }
1449
1450 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1451         _cleanup_free_ char *cgroup = NULL;
1452         int r;
1453
1454         assert(unit);
1455
1456         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1457         if (r < 0)
1458                 return r;
1459
1460         return cg_path_get_user_unit(cgroup, unit);
1461 }
1462
1463 int cg_path_get_machine_name(const char *path, char **machine) {
1464         _cleanup_free_ char *u = NULL;
1465         const char *sl;
1466         int r;
1467
1468         r = cg_path_get_unit(path, &u);
1469         if (r < 0)
1470                 return r;
1471
1472         sl = strjoina("/run/systemd/machines/unit:", u);
1473         return readlink_malloc(sl, machine);
1474 }
1475
1476 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1477         _cleanup_free_ char *cgroup = NULL;
1478         int r;
1479
1480         assert(machine);
1481
1482         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1483         if (r < 0)
1484                 return r;
1485
1486         return cg_path_get_machine_name(cgroup, machine);
1487 }
1488
1489 int cg_path_get_session(const char *path, char **session) {
1490         _cleanup_free_ char *unit = NULL;
1491         char *start, *end;
1492         int r;
1493
1494         assert(path);
1495
1496         r = cg_path_get_unit(path, &unit);
1497         if (r < 0)
1498                 return r;
1499
1500         start = startswith(unit, "session-");
1501         if (!start)
1502                 return -ENXIO;
1503         end = endswith(start, ".scope");
1504         if (!end)
1505                 return -ENXIO;
1506
1507         *end = 0;
1508         if (!session_id_valid(start))
1509                 return -ENXIO;
1510
1511         if (session) {
1512                 char *rr;
1513
1514                 rr = strdup(start);
1515                 if (!rr)
1516                         return -ENOMEM;
1517
1518                 *session = rr;
1519         }
1520
1521         return 0;
1522 }
1523
1524 int cg_pid_get_session(pid_t pid, char **session) {
1525         _cleanup_free_ char *cgroup = NULL;
1526         int r;
1527
1528         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1529         if (r < 0)
1530                 return r;
1531
1532         return cg_path_get_session(cgroup, session);
1533 }
1534
1535 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1536         _cleanup_free_ char *slice = NULL;
1537         char *start, *end;
1538         int r;
1539
1540         assert(path);
1541
1542         r = cg_path_get_slice(path, &slice);
1543         if (r < 0)
1544                 return r;
1545
1546         start = startswith(slice, "user-");
1547         if (!start)
1548                 return -ENXIO;
1549         end = endswith(start, ".slice");
1550         if (!end)
1551                 return -ENXIO;
1552
1553         *end = 0;
1554         if (parse_uid(start, uid) < 0)
1555                 return -ENXIO;
1556
1557         return 0;
1558 }
1559
1560 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1561         _cleanup_free_ char *cgroup = NULL;
1562         int r;
1563
1564         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1565         if (r < 0)
1566                 return r;
1567
1568         return cg_path_get_owner_uid(cgroup, uid);
1569 }
1570
1571 int cg_path_get_slice(const char *p, char **slice) {
1572         const char *e = NULL;
1573
1574         assert(p);
1575         assert(slice);
1576
1577         /* Finds the right-most slice unit from the beginning, but
1578          * stops before we come to the first non-slice unit. */
1579
1580         for (;;) {
1581                 size_t n;
1582
1583                 p += strspn(p, "/");
1584
1585                 n = strcspn(p, "/");
1586                 if (!valid_slice_name(p, n)) {
1587
1588                         if (!e) {
1589                                 char *s;
1590
1591                                 s = strdup("-.slice");
1592                                 if (!s)
1593                                         return -ENOMEM;
1594
1595                                 *slice = s;
1596                                 return 0;
1597                         }
1598
1599                         return cg_path_decode_unit(e, slice);
1600                 }
1601
1602                 e = p;
1603                 p += n;
1604         }
1605 }
1606
1607 int cg_pid_get_slice(pid_t pid, char **slice) {
1608         _cleanup_free_ char *cgroup = NULL;
1609         int r;
1610
1611         assert(slice);
1612
1613         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1614         if (r < 0)
1615                 return r;
1616
1617         return cg_path_get_slice(cgroup, slice);
1618 }
1619
1620 int cg_path_get_user_slice(const char *p, char **slice) {
1621         const char *t;
1622         assert(p);
1623         assert(slice);
1624
1625         t = skip_user_prefix(p);
1626         if (!t)
1627                 return -ENXIO;
1628
1629         /* And now it looks pretty much the same as for a system
1630          * slice, so let's just use the same parser from here on. */
1631         return cg_path_get_slice(t, slice);
1632 }
1633
1634 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1635         _cleanup_free_ char *cgroup = NULL;
1636         int r;
1637
1638         assert(slice);
1639
1640         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1641         if (r < 0)
1642                 return r;
1643
1644         return cg_path_get_user_slice(cgroup, slice);
1645 }
1646
1647 char *cg_escape(const char *p) {
1648         bool need_prefix = false;
1649
1650         /* This implements very minimal escaping for names to be used
1651          * as file names in the cgroup tree: any name which might
1652          * conflict with a kernel name or is prefixed with '_' is
1653          * prefixed with a '_'. That way, when reading cgroup names it
1654          * is sufficient to remove a single prefixing underscore if
1655          * there is one. */
1656
1657         /* The return value of this function (unlike cg_unescape())
1658          * needs free()! */
1659
1660         if (p[0] == 0 ||
1661             p[0] == '_' ||
1662             p[0] == '.' ||
1663             streq(p, "notify_on_release") ||
1664             streq(p, "release_agent") ||
1665             streq(p, "tasks") ||
1666             startswith(p, "cgroup."))
1667                 need_prefix = true;
1668         else {
1669                 const char *dot;
1670
1671                 dot = strrchr(p, '.');
1672                 if (dot) {
1673                         CGroupController c;
1674                         size_t l = dot - p;
1675
1676                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1677                                 const char *n;
1678
1679                                 n = cgroup_controller_to_string(c);
1680
1681                                 if (l != strlen(n))
1682                                         continue;
1683
1684                                 if (memcmp(p, n, l) != 0)
1685                                         continue;
1686
1687                                 need_prefix = true;
1688                                 break;
1689                         }
1690                 }
1691         }
1692
1693         if (need_prefix)
1694                 return strappend("_", p);
1695
1696         return strdup(p);
1697 }
1698
1699 char *cg_unescape(const char *p) {
1700         assert(p);
1701
1702         /* The return value of this function (unlike cg_escape())
1703          * doesn't need free()! */
1704
1705         if (p[0] == '_')
1706                 return (char*) p+1;
1707
1708         return (char*) p;
1709 }
1710
1711 #define CONTROLLER_VALID                        \
1712         DIGITS LETTERS                          \
1713         "_"
1714
1715 bool cg_controller_is_valid(const char *p) {
1716         const char *t, *s;
1717
1718         if (!p)
1719                 return false;
1720
1721         s = startswith(p, "name=");
1722         if (s)
1723                 p = s;
1724
1725         if (*p == 0 || *p == '_')
1726                 return false;
1727
1728         for (t = p; *t; t++)
1729                 if (!strchr(CONTROLLER_VALID, *t))
1730                         return false;
1731
1732         if (t - p > FILENAME_MAX)
1733                 return false;
1734
1735         return true;
1736 }
1737
1738 int cg_slice_to_path(const char *unit, char **ret) {
1739         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1740         const char *dash;
1741         int r;
1742
1743         assert(unit);
1744         assert(ret);
1745
1746         if (streq(unit, "-.slice")) {
1747                 char *x;
1748
1749                 x = strdup("");
1750                 if (!x)
1751                         return -ENOMEM;
1752                 *ret = x;
1753                 return 0;
1754         }
1755
1756         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1757                 return -EINVAL;
1758
1759         if (!endswith(unit, ".slice"))
1760                 return -EINVAL;
1761
1762         r = unit_name_to_prefix(unit, &p);
1763         if (r < 0)
1764                 return r;
1765
1766         dash = strchr(p, '-');
1767
1768         /* Don't allow initial dashes */
1769         if (dash == p)
1770                 return -EINVAL;
1771
1772         while (dash) {
1773                 _cleanup_free_ char *escaped = NULL;
1774                 char n[dash - p + sizeof(".slice")];
1775
1776                 /* Don't allow trailing or double dashes */
1777                 if (dash[1] == 0 || dash[1] == '-')
1778                         return -EINVAL;
1779
1780                 strcpy(stpncpy(n, p, dash - p), ".slice");
1781                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1782                         return -EINVAL;
1783
1784                 escaped = cg_escape(n);
1785                 if (!escaped)
1786                         return -ENOMEM;
1787
1788                 if (!strextend(&s, escaped, "/", NULL))
1789                         return -ENOMEM;
1790
1791                 dash = strchr(dash+1, '-');
1792         }
1793
1794         e = cg_escape(unit);
1795         if (!e)
1796                 return -ENOMEM;
1797
1798         if (!strextend(&s, e, NULL))
1799                 return -ENOMEM;
1800
1801         *ret = s;
1802         s = NULL;
1803
1804         return 0;
1805 }
1806
1807 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1808         _cleanup_free_ char *p = NULL;
1809         int r;
1810
1811         r = cg_get_path(controller, path, attribute, &p);
1812         if (r < 0)
1813                 return r;
1814
1815         return write_string_file(p, value, 0);
1816 }
1817
1818 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1819         _cleanup_free_ char *p = NULL;
1820         int r;
1821
1822         r = cg_get_path(controller, path, attribute, &p);
1823         if (r < 0)
1824                 return r;
1825
1826         return read_one_line_file(p, ret);
1827 }
1828
1829 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1830         CGroupController c;
1831         int r, unified;
1832
1833         /* This one will create a cgroup in our private tree, but also
1834          * duplicate it in the trees specified in mask, and remove it
1835          * in all others */
1836
1837         /* First create the cgroup in our own hierarchy. */
1838         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1839         if (r < 0)
1840                 return r;
1841
1842         /* If we are in the unified hierarchy, we are done now */
1843         unified = cg_unified();
1844         if (unified < 0)
1845                 return unified;
1846         if (unified > 0)
1847                 return 0;
1848
1849         /* Otherwise, do the same in the other hierarchies */
1850         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1851                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1852                 const char *n;
1853
1854                 n = cgroup_controller_to_string(c);
1855
1856                 if (mask & bit)
1857                         (void) cg_create(n, path);
1858                 else if (supported & bit)
1859                         (void) cg_trim(n, path, true);
1860         }
1861
1862         return 0;
1863 }
1864
1865 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1866         CGroupController c;
1867         int r, unified;
1868
1869         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1870         if (r < 0)
1871                 return r;
1872
1873         unified = cg_unified();
1874         if (unified < 0)
1875                 return unified;
1876         if (unified > 0)
1877                 return 0;
1878
1879         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1880                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1881                 const char *p = NULL;
1882
1883                 if (!(supported & bit))
1884                         continue;
1885
1886                 if (path_callback)
1887                         p = path_callback(bit, userdata);
1888
1889                 if (!p)
1890                         p = path;
1891
1892                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
1893         }
1894
1895         return 0;
1896 }
1897
1898 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1899         Iterator i;
1900         void *pidp;
1901         int r = 0;
1902
1903         SET_FOREACH(pidp, pids, i) {
1904                 pid_t pid = PTR_TO_LONG(pidp);
1905                 int q;
1906
1907                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1908                 if (q < 0 && r >= 0)
1909                         r = q;
1910         }
1911
1912         return r;
1913 }
1914
1915 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1916         CGroupController c;
1917         int r, unified;
1918
1919         if (!path_equal(from, to))  {
1920                 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true);
1921                 if (r < 0)
1922                         return r;
1923         }
1924
1925         unified = cg_unified();
1926         if (unified < 0)
1927                 return unified;
1928         if (unified > 0)
1929                 return r;
1930
1931         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1932                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1933                 const char *p = NULL;
1934
1935                 if (!(supported & bit))
1936                         continue;
1937
1938                 if (to_callback)
1939                         p = to_callback(bit, userdata);
1940
1941                 if (!p)
1942                         p = to;
1943
1944                 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, false, false);
1945         }
1946
1947         return 0;
1948 }
1949
1950 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
1951         CGroupController c;
1952         int r, unified;
1953
1954         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
1955         if (r < 0)
1956                 return r;
1957
1958         unified = cg_unified();
1959         if (unified < 0)
1960                 return unified;
1961         if (unified > 0)
1962                 return r;
1963
1964         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1965                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1966
1967                 if (!(supported & bit))
1968                         continue;
1969
1970                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
1971         }
1972
1973         return 0;
1974 }
1975
1976 int cg_mask_supported(CGroupMask *ret) {
1977         CGroupMask mask = 0;
1978         int r, unified;
1979
1980         /* Determines the mask of supported cgroup controllers. Only
1981          * includes controllers we can make sense of and that are
1982          * actually accessible. */
1983
1984         unified = cg_unified();
1985         if (unified < 0)
1986                 return unified;
1987         if (unified > 0) {
1988                 _cleanup_free_ char *controllers = NULL;
1989                 const char *c;
1990
1991                 /* In the unified hierarchy we can read the supported
1992                  * and accessible controllers from a the top-level
1993                  * cgroup attribute */
1994
1995                 r = read_one_line_file("/sys/fs/cgroup/cgroup.controllers", &controllers);
1996                 if (r < 0)
1997                         return r;
1998
1999                 c = controllers;
2000                 for (;;) {
2001                         _cleanup_free_ char *n = NULL;
2002                         CGroupController v;
2003
2004                         r = extract_first_word(&c, &n, NULL, 0);
2005                         if (r < 0)
2006                                 return r;
2007                         if (r == 0)
2008                                 break;
2009
2010                         v = cgroup_controller_from_string(n);
2011                         if (v < 0)
2012                                 continue;
2013
2014                         mask |= CGROUP_CONTROLLER_TO_MASK(v);
2015                 }
2016
2017                 /* Currently, we only support the memory controller in
2018                  * the unified hierarchy, mask everything else off. */
2019                 mask &= CGROUP_MASK_MEMORY;
2020
2021         } else {
2022                 CGroupController c;
2023
2024                 /* In the legacy hierarchy, we check whether which
2025                  * hierarchies are mounted. */
2026
2027                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2028                         const char *n;
2029
2030                         n = cgroup_controller_to_string(c);
2031                         if (controller_is_accessible(n) >= 0)
2032                                 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2033                 }
2034         }
2035
2036         *ret = mask;
2037         return 0;
2038 }
2039
2040 int cg_kernel_controllers(Set *controllers) {
2041         _cleanup_fclose_ FILE *f = NULL;
2042         char buf[LINE_MAX];
2043         int r;
2044
2045         assert(controllers);
2046
2047         /* Determines the full list of kernel-known controllers. Might
2048          * include controllers we don't actually support, arbitrary
2049          * named hierarchies and controllers that aren't currently
2050          * accessible (because not mounted). */
2051
2052         f = fopen("/proc/cgroups", "re");
2053         if (!f) {
2054                 if (errno == ENOENT)
2055                         return 0;
2056                 return -errno;
2057         }
2058
2059         /* Ignore the header line */
2060         (void) fgets(buf, sizeof(buf), f);
2061
2062         for (;;) {
2063                 char *controller;
2064                 int enabled = 0;
2065
2066                 errno = 0;
2067                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2068
2069                         if (feof(f))
2070                                 break;
2071
2072                         if (ferror(f) && errno != 0)
2073                                 return -errno;
2074
2075                         return -EBADMSG;
2076                 }
2077
2078                 if (!enabled) {
2079                         free(controller);
2080                         continue;
2081                 }
2082
2083                 if (!cg_controller_is_valid(controller)) {
2084                         free(controller);
2085                         return -EBADMSG;
2086                 }
2087
2088                 r = set_consume(controllers, controller);
2089                 if (r < 0)
2090                         return r;
2091         }
2092
2093         return 0;
2094 }
2095
2096 static thread_local int unified_cache = -1;
2097
2098 int cg_unified(void) {
2099         struct statfs fs;
2100
2101         /* Checks if we support the unified hierarchy. Returns an
2102          * error when the cgroup hierarchies aren't mounted yet or we
2103          * have any other trouble determining if the unified hierarchy
2104          * is supported. */
2105
2106         if (unified_cache >= 0)
2107                 return unified_cache;
2108
2109         if (statfs("/sys/fs/cgroup/", &fs) < 0)
2110                 return -errno;
2111
2112         if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC))
2113                 unified_cache = true;
2114         else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2115                 unified_cache = false;
2116         else
2117                 return -ENOEXEC;
2118
2119         return unified_cache;
2120 }
2121
2122 void cg_unified_flush(void) {
2123         unified_cache = -1;
2124 }
2125
2126 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2127         _cleanup_free_ char *fs = NULL;
2128         CGroupController c;
2129         int r, unified;
2130
2131         assert(p);
2132
2133         if (supported == 0)
2134                 return 0;
2135
2136         unified = cg_unified();
2137         if (unified < 0)
2138                 return unified;
2139         if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2140                 return 0;
2141
2142         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2143         if (r < 0)
2144                 return r;
2145
2146         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2147                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2148                 const char *n;
2149
2150                 if (!(supported & bit))
2151                         continue;
2152
2153                 n = cgroup_controller_to_string(c);
2154                 {
2155                         char s[1 + strlen(n) + 1];
2156
2157                         s[0] = mask & bit ? '+' : '-';
2158                         strcpy(s + 1, n);
2159
2160                         r = write_string_file(fs, s, 0);
2161                         if (r < 0)
2162                                 log_warning_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2163                 }
2164         }
2165
2166         return 0;
2167 }
2168
2169 bool cg_is_unified_wanted(void) {
2170         static thread_local int wanted = -1;
2171         int r, unified;
2172
2173         /* If the hierarchy is already mounted, then follow whatever
2174          * was chosen for it. */
2175         unified = cg_unified();
2176         if (unified >= 0)
2177                 return unified;
2178
2179         /* Otherwise, let's see what the kernel command line has to
2180          * say. Since checking that is expensive, let's cache the
2181          * result. */
2182         if (wanted >= 0)
2183                 return wanted;
2184
2185         r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2186         if (r > 0)
2187                 return (wanted = true);
2188         else {
2189                 _cleanup_free_ char *value = NULL;
2190
2191                 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2192                 if (r < 0)
2193                         return false;
2194                 if (r == 0)
2195                         return (wanted = false);
2196
2197                 return (wanted = parse_boolean(value) > 0);
2198         }
2199 }
2200
2201 bool cg_is_legacy_wanted(void) {
2202         return !cg_is_unified_wanted();
2203 }
2204
2205 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2206         [CGROUP_CONTROLLER_CPU] = "cpu",
2207         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2208         [CGROUP_CONTROLLER_BLKIO] = "blkio",
2209         [CGROUP_CONTROLLER_MEMORY] = "memory",
2210         [CGROUP_CONTROLLER_DEVICE] = "device",
2211 };
2212
2213 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);