src/basic/cgroup-util.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2010 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <errno.h>
  23 #include <unistd.h>
  24 #include <signal.h>
  25 #include <string.h>
  26 #include <stdlib.h>
  27 #include <dirent.h>
  28 #include <sys/stat.h>
  29 #include <sys/types.h>
  30 #include <ftw.h>
  31
  32 #include "set.h"
  33 #include "macro.h"
  34 #include "util.h"
  35 #include "formats-util.h"
  36 #include "process-util.h"
  37 #include "path-util.h"
  38 #include "unit-name.h"
  39 #include "fileio.h"
  40 #include "special.h"
  41 #include "mkdir.h"
  42 #include "login-util.h"
  43 #include "cgroup-util.h"
  44
  45 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
  46         _cleanup_free_ char *fs = NULL;
  47         FILE *f;
  48         int r;
  49
  50         assert(_f);
  51
  52         r = cg_get_path(controller, path, "cgroup.procs", &fs);
  53         if (r < 0)
  54                 return r;
  55
  56         f = fopen(fs, "re");
  57         if (!f)
  58                 return -errno;
  59
  60         *_f = f;
  61         return 0;
  62 }
  63
  64 int cg_read_pid(FILE *f, pid_t *_pid) {
  65         unsigned long ul;
  66
  67         /* Note that the cgroup.procs might contain duplicates! See
  68          * cgroups.txt for details. */
  69
  70         assert(f);
  71         assert(_pid);
  72
  73         errno = 0;
  74         if (fscanf(f, "%lu", &ul) != 1) {
  75
  76                 if (feof(f))
  77                         return 0;
  78
  79                 return errno ? -errno : -EIO;
  80         }
  81
  82         if (ul <= 0)
  83                 return -EIO;
  84
  85         *_pid = (pid_t) ul;
  86         return 1;
  87 }
  88
  89 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
  90         _cleanup_free_ char *fs = NULL;
  91         int r;
  92         DIR *d;
  93
  94         assert(_d);
  95
  96         /* This is not recursive! */
  97
  98         r = cg_get_path(controller, path, NULL, &fs);
  99         if (r < 0)
 100                 return r;
 101
 102         d = opendir(fs);
 103         if (!d)
 104                 return -errno;
 105
 106         *_d = d;
 107         return 0;
 108 }
 109
 110 int cg_read_subgroup(DIR *d, char **fn) {
 111         struct dirent *de;
 112
 113         assert(d);
 114         assert(fn);
 115
 116         FOREACH_DIRENT_ALL(de, d, return -errno) {
 117                 char *b;
 118
 119                 if (de->d_type != DT_DIR)
 120                         continue;
 121
 122                 if (streq(de->d_name, ".") ||
 123                     streq(de->d_name, ".."))
 124                         continue;
 125
 126                 b = strdup(de->d_name);
 127                 if (!b)
 128                         return -ENOMEM;
 129
 130                 *fn = b;
 131                 return 1;
 132         }
 133
 134         return 0;
 135 }
 136
 137 int cg_rmdir(const char *controller, const char *path) {
 138         _cleanup_free_ char *p = NULL;
 139         int r;
 140
 141         r = cg_get_path(controller, path, NULL, &p);
 142         if (r < 0)
 143                 return r;
 144
 145         r = rmdir(p);
 146         if (r < 0 && errno != ENOENT)
 147                 return -errno;
 148
 149         return 0;
 150 }
 151
 152 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
 153         _cleanup_set_free_ Set *allocated_set = NULL;
 154         bool done = false;
 155         int r, ret = 0;
 156         pid_t my_pid;
 157
 158         assert(sig >= 0);
 159
 160         /* This goes through the tasks list and kills them all. This
 161          * is repeated until no further processes are added to the
 162          * tasks list, to properly handle forking processes */
 163
 164         if (!s) {
 165                 s = allocated_set = set_new(NULL);
 166                 if (!s)
 167                         return -ENOMEM;
 168         }
 169
 170         my_pid = getpid();
 171
 172         do {
 173                 _cleanup_fclose_ FILE *f = NULL;
 174                 pid_t pid = 0;
 175                 done = true;
 176
 177                 r = cg_enumerate_processes(controller, path, &f);
 178                 if (r < 0) {
 179                         if (ret >= 0 && r != -ENOENT)
 180                                 return r;
 181
 182                         return ret;
 183                 }
 184
 185                 while ((r = cg_read_pid(f, &pid)) > 0) {
 186
 187                         if (ignore_self && pid == my_pid)
 188                                 continue;
 189
 190                         if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
 191                                 continue;
 192
 193                         /* If we haven't killed this process yet, kill
 194                          * it */
 195                         if (kill(pid, sig) < 0) {
 196                                 if (ret >= 0 && errno != ESRCH)
 197                                         ret = -errno;
 198                         } else {
 199                                 if (sigcont && sig != SIGKILL)
 200                                         (void) kill(pid, SIGCONT);
 201
 202                                 if (ret == 0)
 203                                         ret = 1;
 204                         }
 205
 206                         done = false;
 207
 208                         r = set_put(s, LONG_TO_PTR(pid));
 209                         if (r < 0) {
 210                                 if (ret >= 0)
 211                                         return r;
 212
 213                                 return ret;
 214                         }
 215                 }
 216
 217                 if (r < 0) {
 218                         if (ret >= 0)
 219                                 return r;
 220
 221                         return ret;
 222                 }
 223
 224                 /* To avoid racing against processes which fork
 225                  * quicker than we can kill them we repeat this until
 226                  * no new pids need to be killed. */
 227
 228         } while (!done);
 229
 230         return ret;
 231 }
 232
 233 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
 234         _cleanup_set_free_ Set *allocated_set = NULL;
 235         _cleanup_closedir_ DIR *d = NULL;
 236         int r, ret;
 237         char *fn;
 238
 239         assert(path);
 240         assert(sig >= 0);
 241
 242         if (!s) {
 243                 s = allocated_set = set_new(NULL);
 244                 if (!s)
 245                         return -ENOMEM;
 246         }
 247
 248         ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
 249
 250         r = cg_enumerate_subgroups(controller, path, &d);
 251         if (r < 0) {
 252                 if (ret >= 0 && r != -ENOENT)
 253                         return r;
 254
 255                 return ret;
 256         }
 257
 258         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 259                 _cleanup_free_ char *p = NULL;
 260
 261                 p = strjoin(path, "/", fn, NULL);
 262                 free(fn);
 263                 if (!p)
 264                         return -ENOMEM;
 265
 266                 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
 267                 if (r != 0 && ret >= 0)
 268                         ret = r;
 269         }
 270
 271         if (ret >= 0 && r < 0)
 272                 ret = r;
 273
 274         if (rem) {
 275                 r = cg_rmdir(controller, path);
 276                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 277                         return r;
 278         }
 279
 280         return ret;
 281 }
 282
 283 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
 284         bool done = false;
 285         _cleanup_set_free_ Set *s = NULL;
 286         int r, ret = 0;
 287         pid_t my_pid;
 288
 289         assert(cfrom);
 290         assert(pfrom);
 291         assert(cto);
 292         assert(pto);
 293
 294         s = set_new(NULL);
 295         if (!s)
 296                 return -ENOMEM;
 297
 298         my_pid = getpid();
 299
 300         do {
 301                 _cleanup_fclose_ FILE *f = NULL;
 302                 pid_t pid = 0;
 303                 done = true;
 304
 305                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 306                 if (r < 0) {
 307                         if (ret >= 0 && r != -ENOENT)
 308                                 return r;
 309
 310                         return ret;
 311                 }
 312
 313                 while ((r = cg_read_pid(f, &pid)) > 0) {
 314
 315                         /* This might do weird stuff if we aren't a
 316                          * single-threaded program. However, we
 317                          * luckily know we are not */
 318                         if (ignore_self && pid == my_pid)
 319                                 continue;
 320
 321                         if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
 322                                 continue;
 323
 324                         /* Ignore kernel threads. Since they can only
 325                          * exist in the root cgroup, we only check for
 326                          * them there. */
 327                         if (cfrom &&
 328                             (isempty(pfrom) || path_equal(pfrom, "/")) &&
 329                             is_kernel_thread(pid) > 0)
 330                                 continue;
 331
 332                         r = cg_attach(cto, pto, pid);
 333                         if (r < 0) {
 334                                 if (ret >= 0 && r != -ESRCH)
 335                                         ret = r;
 336                         } else if (ret == 0)
 337                                 ret = 1;
 338
 339                         done = false;
 340
 341                         r = set_put(s, LONG_TO_PTR(pid));
 342                         if (r < 0) {
 343                                 if (ret >= 0)
 344                                         return r;
 345
 346                                 return ret;
 347                         }
 348                 }
 349
 350                 if (r < 0) {
 351                         if (ret >= 0)
 352                                 return r;
 353
 354                         return ret;
 355                 }
 356         } while (!done);
 357
 358         return ret;
 359 }
 360
 361 int cg_migrate_recursive(
 362                 const char *cfrom,
 363                 const char *pfrom,
 364                 const char *cto,
 365                 const char *pto,
 366                 bool ignore_self,
 367                 bool rem) {
 368
 369         _cleanup_closedir_ DIR *d = NULL;
 370         int r, ret = 0;
 371         char *fn;
 372
 373         assert(cfrom);
 374         assert(pfrom);
 375         assert(cto);
 376         assert(pto);
 377
 378         ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
 379
 380         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 381         if (r < 0) {
 382                 if (ret >= 0 && r != -ENOENT)
 383                         return r;
 384
 385                 return ret;
 386         }
 387
 388         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 389                 _cleanup_free_ char *p = NULL;
 390
 391                 p = strjoin(pfrom, "/", fn, NULL);
 392                 free(fn);
 393                 if (!p)
 394                         return -ENOMEM;
 395
 396                 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
 397                 if (r != 0 && ret >= 0)
 398                         ret = r;
 399         }
 400
 401         if (r < 0 && ret >= 0)
 402                 ret = r;
 403
 404         if (rem) {
 405                 r = cg_rmdir(cfrom, pfrom);
 406                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 407                         return r;
 408         }
 409
 410         return ret;
 411 }
 412
 413 int cg_migrate_recursive_fallback(
 414                 const char *cfrom,
 415                 const char *pfrom,
 416                 const char *cto,
 417                 const char *pto,
 418                 bool ignore_self,
 419                 bool rem) {
 420
 421         int r;
 422
 423         assert(cfrom);
 424         assert(pfrom);
 425         assert(cto);
 426         assert(pto);
 427
 428         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem);
 429         if (r < 0) {
 430                 char prefix[strlen(pto) + 1];
 431
 432                 /* This didn't work? Then let's try all prefixes of the destination */
 433
 434                 PATH_FOREACH_PREFIX(prefix, pto) {
 435                         int q;
 436
 437                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem);
 438                         if (q >= 0)
 439                                 return q;
 440                 }
 441         }
 442
 443         return r;
 444 }
 445
 446 static const char *controller_to_dirname(const char *controller) {
 447         const char *e;
 448
 449         assert(controller);
 450
 451         /* Converts a controller name to the directory name below
 452          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
 453          * just cuts off the name= prefixed used for named
 454          * hierarchies, if it is specified. */
 455
 456         e = startswith(controller, "name=");
 457         if (e)
 458                 return e;
 459
 460         return controller;
 461 }
 462
 463 static int join_path_legacy(const char *controller_dn, const char *path, const char *suffix, char **fs) {
 464         char *t = NULL;
 465
 466         assert(fs);
 467         assert(controller_dn);
 468
 469         if (isempty(path) && isempty(suffix))
 470                 t = strappend("/sys/fs/cgroup/", controller_dn);
 471         else if (isempty(path))
 472                 t = strjoin("/sys/fs/cgroup/", controller_dn, "/", suffix, NULL);
 473         else if (isempty(suffix))
 474                 t = strjoin("/sys/fs/cgroup/", controller_dn, "/", path, NULL);
 475         else
 476                 t = strjoin("/sys/fs/cgroup/", controller_dn, "/", path, "/", suffix, NULL);
 477         if (!t)
 478                 return -ENOMEM;
 479
 480         *fs = t;
 481         return 0;
 482 }
 483
 484 static int join_path_unified(const char *path, const char *suffix, char **fs) {
 485         char *t;
 486
 487         assert(fs);
 488
 489         if (isempty(path) && isempty(suffix))
 490                 t = strdup("/sys/fs/cgroup");
 491         else if (isempty(path))
 492                 t = strappend("/sys/fs/cgroup/", suffix);
 493         else if (isempty(suffix))
 494                 t = strappend("/sys/fs/cgroup/", path);
 495         else
 496                 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
 497         if (!t)
 498                 return -ENOMEM;
 499
 500         *fs = t;
 501         return 0;
 502 }
 503
 504 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
 505         int unified, r;
 506
 507         assert(fs);
 508
 509         if (!controller) {
 510                 char *t;
 511
 512                 /* If no controller is specified, we assume only the
 513                  * path below the controller matters */
 514
 515                 if (!path && !suffix)
 516                         return -EINVAL;
 517
 518                 if (isempty(suffix))
 519                         t = strdup(path);
 520                 else if (isempty(path))
 521                         t = strdup(suffix);
 522                 else
 523                         t = strjoin(path, "/", suffix, NULL);
 524                 if (!t)
 525                         return -ENOMEM;
 526
 527                 *fs = path_kill_slashes(t);
 528                 return 0;
 529         }
 530
 531         if (!cg_controller_is_valid(controller))
 532                 return -EINVAL;
 533
 534         unified = cg_unified();
 535         if (unified < 0)
 536                 return unified;
 537
 538         if (unified > 0)
 539                 r = join_path_unified(path, suffix, fs);
 540         else {
 541                 const char *dn;
 542
 543                 dn = controller_to_dirname(controller);
 544
 545                 r = join_path_legacy(dn, path, suffix, fs);
 546         }
 547
 548         if (r < 0)
 549                 return r;
 550
 551         path_kill_slashes(*fs);
 552         return 0;
 553 }
 554
 555 static int controller_is_accessible(const char *controller) {
 556         int unified;
 557
 558         assert(controller);
 559
 560         /* Checks whether a specific controller is accessible,
 561          * i.e. its hierarchy mounted. In the unified hierarchy all
 562          * controllers are considered accessible, except for the named
 563          * hierarchies */
 564
 565         if (!cg_controller_is_valid(controller))
 566                 return -EINVAL;
 567
 568         unified = cg_unified();
 569         if (unified < 0)
 570                 return unified;
 571         if (unified > 0) {
 572                 /* We don't support named hierarchies if we are using
 573                  * the unified hierarchy. */
 574
 575                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
 576                         return 0;
 577
 578                 if (startswith(controller, "name="))
 579                         return -EOPNOTSUPP;
 580
 581         } else {
 582                 const char *cc, *dn;
 583
 584                 dn = controller_to_dirname(controller);
 585                 cc = strjoina("/sys/fs/cgroup/", dn);
 586
 587                 if (laccess(cc, F_OK) < 0)
 588                         return -errno;
 589         }
 590
 591         return 0;
 592 }
 593
 594 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
 595         int r;
 596
 597         assert(controller);
 598         assert(fs);
 599
 600         /* Check if the specified controller is actually accessible */
 601         r = controller_is_accessible(controller);
 602         if (r < 0)
 603                 return r;
 604
 605         return cg_get_path(controller, path, suffix, fs);
 606 }
 607
 608 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 609         assert(path);
 610         assert(sb);
 611         assert(ftwbuf);
 612
 613         if (typeflag != FTW_DP)
 614                 return 0;
 615
 616         if (ftwbuf->level < 1)
 617                 return 0;
 618
 619         (void) rmdir(path);
 620         return 0;
 621 }
 622
 623 int cg_trim(const char *controller, const char *path, bool delete_root) {
 624         _cleanup_free_ char *fs = NULL;
 625         int r = 0;
 626
 627         assert(path);
 628
 629         r = cg_get_path(controller, path, NULL, &fs);
 630         if (r < 0)
 631                 return r;
 632
 633         errno = 0;
 634         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 635                 if (errno == ENOENT)
 636                         r = 0;
 637                 else if (errno != 0)
 638                         r = -errno;
 639                 else
 640                         r = -EIO;
 641         }
 642
 643         if (delete_root) {
 644                 if (rmdir(fs) < 0 && errno != ENOENT)
 645                         return -errno;
 646         }
 647
 648         return r;
 649 }
 650
 651 int cg_create(const char *controller, const char *path) {
 652         _cleanup_free_ char *fs = NULL;
 653         int r;
 654
 655         r = cg_get_path_and_check(controller, path, NULL, &fs);
 656         if (r < 0)
 657                 return r;
 658
 659         r = mkdir_parents(fs, 0755);
 660         if (r < 0)
 661                 return r;
 662
 663         if (mkdir(fs, 0755) < 0) {
 664
 665                 if (errno == EEXIST)
 666                         return 0;
 667
 668                 return -errno;
 669         }
 670
 671         return 1;
 672 }
 673
 674 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 675         int r, q;
 676
 677         assert(pid >= 0);
 678
 679         r = cg_create(controller, path);
 680         if (r < 0)
 681                 return r;
 682
 683         q = cg_attach(controller, path, pid);
 684         if (q < 0)
 685                 return q;
 686
 687         /* This does not remove the cgroup on failure */
 688         return r;
 689 }
 690
 691 int cg_attach(const char *controller, const char *path, pid_t pid) {
 692         _cleanup_free_ char *fs = NULL;
 693         char c[DECIMAL_STR_MAX(pid_t) + 2];
 694         int r;
 695
 696         assert(path);
 697         assert(pid >= 0);
 698
 699         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 700         if (r < 0)
 701                 return r;
 702
 703         if (pid == 0)
 704                 pid = getpid();
 705
 706         snprintf(c, sizeof(c), PID_FMT"\n", pid);
 707
 708         return write_string_file(fs, c, 0);
 709 }
 710
 711 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 712         int r;
 713
 714         assert(controller);
 715         assert(path);
 716         assert(pid >= 0);
 717
 718         r = cg_attach(controller, path, pid);
 719         if (r < 0) {
 720                 char prefix[strlen(path) + 1];
 721
 722                 /* This didn't work? Then let's try all prefixes of
 723                  * the destination */
 724
 725                 PATH_FOREACH_PREFIX(prefix, path) {
 726                         int q;
 727
 728                         q = cg_attach(controller, prefix, pid);
 729                         if (q >= 0)
 730                                 return q;
 731                 }
 732         }
 733
 734         return r;
 735 }
 736
 737 int cg_set_group_access(
 738                 const char *controller,
 739                 const char *path,
 740                 mode_t mode,
 741                 uid_t uid,
 742                 gid_t gid) {
 743
 744         _cleanup_free_ char *fs = NULL;
 745         int r;
 746
 747         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 748                 return 0;
 749
 750         if (mode != MODE_INVALID)
 751                 mode &= 0777;
 752
 753         r = cg_get_path(controller, path, NULL, &fs);
 754         if (r < 0)
 755                 return r;
 756
 757         return chmod_and_chown(fs, mode, uid, gid);
 758 }
 759
 760 int cg_set_task_access(
 761                 const char *controller,
 762                 const char *path,
 763                 mode_t mode,
 764                 uid_t uid,
 765                 gid_t gid) {
 766
 767         _cleanup_free_ char *fs = NULL, *procs = NULL;
 768         int r, unified;
 769
 770         assert(path);
 771
 772         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 773                 return 0;
 774
 775         if (mode != MODE_INVALID)
 776                 mode &= 0666;
 777
 778         r = cg_get_path(controller, path, "cgroup.procs", &fs);
 779         if (r < 0)
 780                 return r;
 781
 782         r = chmod_and_chown(fs, mode, uid, gid);
 783         if (r < 0)
 784                 return r;
 785
 786         unified = cg_unified();
 787         if (unified < 0)
 788                 return unified;
 789         if (unified)
 790                 return 0;
 791
 792         /* Compatibility, Always keep values for "tasks" in sync with
 793          * "cgroup.procs" */
 794         if (cg_get_path(controller, path, "tasks", &procs) >= 0)
 795                 (void) chmod_and_chown(procs, mode, uid, gid);
 796
 797         return 0;
 798 }
 799
 800 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
 801         _cleanup_fclose_ FILE *f = NULL;
 802         char line[LINE_MAX];
 803         const char *fs;
 804         size_t cs = 0;
 805         int unified;
 806
 807         assert(path);
 808         assert(pid >= 0);
 809
 810         unified = cg_unified();
 811         if (unified < 0)
 812                 return unified;
 813         if (unified == 0) {
 814                 if (controller) {
 815                         if (!cg_controller_is_valid(controller))
 816                                 return -EINVAL;
 817                 } else
 818                         controller = SYSTEMD_CGROUP_CONTROLLER;
 819
 820                 cs = strlen(controller);
 821         }
 822
 823         fs = procfs_file_alloca(pid, "cgroup");
 824         f = fopen(fs, "re");
 825         if (!f)
 826                 return errno == ENOENT ? -ESRCH : -errno;
 827
 828         FOREACH_LINE(line, f, return -errno) {
 829                 char *e, *p;
 830
 831                 truncate_nl(line);
 832
 833                 if (unified) {
 834                         e = startswith(line, "0:");
 835                         if (!e)
 836                                 continue;
 837
 838                         e = strchr(e, ':');
 839                         if (!e)
 840                                 continue;
 841                 } else {
 842                         char *l;
 843                         size_t k;
 844                         const char *word, *state;
 845                         bool found = false;
 846
 847                         l = strchr(line, ':');
 848                         if (!l)
 849                                 continue;
 850
 851                         l++;
 852                         e = strchr(l, ':');
 853                         if (!e)
 854                                 continue;
 855
 856                         *e = 0;
 857                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
 858                                 if (k == cs && memcmp(word, controller, cs) == 0) {
 859                                         found = true;
 860                                         break;
 861                                 }
 862                         }
 863
 864                         if (!found)
 865                                 continue;
 866                 }
 867
 868                 p = strdup(e + 1);
 869                 if (!p)
 870                         return -ENOMEM;
 871
 872                 *path = p;
 873                 return 0;
 874         }
 875
 876         return -ENOENT;
 877 }
 878
 879 int cg_install_release_agent(const char *controller, const char *agent) {
 880         _cleanup_free_ char *fs = NULL, *contents = NULL;
 881         const char *sc;
 882         int r, unified;
 883
 884         assert(agent);
 885
 886         unified = cg_unified();
 887         if (unified < 0)
 888                 return unified;
 889         if (unified) /* doesn't apply to unified hierarchy */
 890                 return -EOPNOTSUPP;
 891
 892         r = cg_get_path(controller, NULL, "release_agent", &fs);
 893         if (r < 0)
 894                 return r;
 895
 896         r = read_one_line_file(fs, &contents);
 897         if (r < 0)
 898                 return r;
 899
 900         sc = strstrip(contents);
 901         if (isempty(sc)) {
 902                 r = write_string_file(fs, agent, 0);
 903                 if (r < 0)
 904                         return r;
 905         } else if (!streq(sc, agent))
 906                 return -EEXIST;
 907
 908         fs = mfree(fs);
 909         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 910         if (r < 0)
 911                 return r;
 912
 913         contents = mfree(contents);
 914         r = read_one_line_file(fs, &contents);
 915         if (r < 0)
 916                 return r;
 917
 918         sc = strstrip(contents);
 919         if (streq(sc, "0")) {
 920                 r = write_string_file(fs, "1", 0);
 921                 if (r < 0)
 922                         return r;
 923
 924                 return 1;
 925         }
 926
 927         if (!streq(sc, "1"))
 928                 return -EIO;
 929
 930         return 0;
 931 }
 932
 933 int cg_uninstall_release_agent(const char *controller) {
 934         _cleanup_free_ char *fs = NULL;
 935         int r, unified;
 936
 937         unified = cg_unified();
 938         if (unified < 0)
 939                 return unified;
 940         if (unified) /* Doesn't apply to unified hierarchy */
 941                 return -EOPNOTSUPP;
 942
 943         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 944         if (r < 0)
 945                 return r;
 946
 947         r = write_string_file(fs, "0", 0);
 948         if (r < 0)
 949                 return r;
 950
 951         fs = mfree(fs);
 952
 953         r = cg_get_path(controller, NULL, "release_agent", &fs);
 954         if (r < 0)
 955                 return r;
 956
 957         r = write_string_file(fs, "", 0);
 958         if (r < 0)
 959                 return r;
 960
 961         return 0;
 962 }
 963
 964 int cg_is_empty(const char *controller, const char *path) {
 965         _cleanup_fclose_ FILE *f = NULL;
 966         pid_t pid;
 967         int r;
 968
 969         assert(path);
 970
 971         r = cg_enumerate_processes(controller, path, &f);
 972         if (r == -ENOENT)
 973                 return 1;
 974         if (r < 0)
 975                 return r;
 976
 977         r = cg_read_pid(f, &pid);
 978         if (r < 0)
 979                 return r;
 980
 981         return r == 0;
 982 }
 983
 984 int cg_is_empty_recursive(const char *controller, const char *path) {
 985         int unified, r;
 986
 987         assert(path);
 988
 989         /* The root cgroup is always populated */
 990         if (controller && (isempty(path) || path_equal(path, "/")))
 991                 return false;
 992
 993         unified = cg_unified();
 994         if (unified < 0)
 995                 return unified;
 996
 997         if (unified > 0) {
 998                 _cleanup_free_ char *populated = NULL, *t = NULL;
 999
1000                 /* On the unified hierarchy we can check empty state
1001                  * via the "cgroup.populated" attribute. */
1002
1003                 r = cg_get_path(controller, path, "cgroup.populated", &populated);
1004                 if (r < 0)
1005                         return r;
1006
1007                 r = read_one_line_file(populated, &t);
1008                 if (r < 0)
1009                         return r;
1010
1011                 return streq(t, "0");
1012         } else {
1013                 _cleanup_closedir_ DIR *d = NULL;
1014                 char *fn;
1015
1016                 r = cg_is_empty(controller, path);
1017                 if (r <= 0)
1018                         return r;
1019
1020                 r = cg_enumerate_subgroups(controller, path, &d);
1021                 if (r == -ENOENT)
1022                         return 1;
1023                 if (r < 0)
1024                         return r;
1025
1026                 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1027                         _cleanup_free_ char *p = NULL;
1028
1029                         p = strjoin(path, "/", fn, NULL);
1030                         free(fn);
1031                         if (!p)
1032                                 return -ENOMEM;
1033
1034                         r = cg_is_empty_recursive(controller, p);
1035                         if (r <= 0)
1036                                 return r;
1037                 }
1038                 if (r < 0)
1039                         return r;
1040
1041                 return true;
1042         }
1043 }
1044
1045 int cg_split_spec(const char *spec, char **controller, char **path) {
1046         char *t = NULL, *u = NULL;
1047         const char *e;
1048
1049         assert(spec);
1050
1051         if (*spec == '/') {
1052                 if (!path_is_safe(spec))
1053                         return -EINVAL;
1054
1055                 if (path) {
1056                         t = strdup(spec);
1057                         if (!t)
1058                                 return -ENOMEM;
1059
1060                         *path = path_kill_slashes(t);
1061                 }
1062
1063                 if (controller)
1064                         *controller = NULL;
1065
1066                 return 0;
1067         }
1068
1069         e = strchr(spec, ':');
1070         if (!e) {
1071                 if (!cg_controller_is_valid(spec))
1072                         return -EINVAL;
1073
1074                 if (controller) {
1075                         t = strdup(spec);
1076                         if (!t)
1077                                 return -ENOMEM;
1078
1079                         *controller = t;
1080                 }
1081
1082                 if (path)
1083                         *path = NULL;
1084
1085                 return 0;
1086         }
1087
1088         t = strndup(spec, e-spec);
1089         if (!t)
1090                 return -ENOMEM;
1091         if (!cg_controller_is_valid(t)) {
1092                 free(t);
1093                 return -EINVAL;
1094         }
1095
1096         if (isempty(e+1))
1097                 u = NULL;
1098         else {
1099                 u = strdup(e+1);
1100                 if (!u) {
1101                         free(t);
1102                         return -ENOMEM;
1103                 }
1104
1105                 if (!path_is_safe(u) ||
1106                     !path_is_absolute(u)) {
1107                         free(t);
1108                         free(u);
1109                         return -EINVAL;
1110                 }
1111
1112                 path_kill_slashes(u);
1113         }
1114
1115         if (controller)
1116                 *controller = t;
1117         else
1118                 free(t);
1119
1120         if (path)
1121                 *path = u;
1122         else
1123                 free(u);
1124
1125         return 0;
1126 }
1127
1128 int cg_mangle_path(const char *path, char **result) {
1129         _cleanup_free_ char *c = NULL, *p = NULL;
1130         char *t;
1131         int r;
1132
1133         assert(path);
1134         assert(result);
1135
1136         /* First, check if it already is a filesystem path */
1137         if (path_startswith(path, "/sys/fs/cgroup")) {
1138
1139                 t = strdup(path);
1140                 if (!t)
1141                         return -ENOMEM;
1142
1143                 *result = path_kill_slashes(t);
1144                 return 0;
1145         }
1146
1147         /* Otherwise, treat it as cg spec */
1148         r = cg_split_spec(path, &c, &p);
1149         if (r < 0)
1150                 return r;
1151
1152         return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1153 }
1154
1155 int cg_get_root_path(char **path) {
1156         char *p, *e;
1157         int r;
1158
1159         assert(path);
1160
1161         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1162         if (r < 0)
1163                 return r;
1164
1165         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1166         if (!e)
1167                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1168         if (!e)
1169                 e = endswith(p, "/system"); /* even more legacy */
1170         if (e)
1171                 *e = 0;
1172
1173         *path = p;
1174         return 0;
1175 }
1176
1177 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1178         _cleanup_free_ char *rt = NULL;
1179         char *p;
1180         int r;
1181
1182         assert(cgroup);
1183         assert(shifted);
1184
1185         if (!root) {
1186                 /* If the root was specified let's use that, otherwise
1187                  * let's determine it from PID 1 */
1188
1189                 r = cg_get_root_path(&rt);
1190                 if (r < 0)
1191                         return r;
1192
1193                 root = rt;
1194         }
1195
1196         p = path_startswith(cgroup, root);
1197         if (p && p > cgroup)
1198                 *shifted = p - 1;
1199         else
1200                 *shifted = cgroup;
1201
1202         return 0;
1203 }
1204
1205 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1206         _cleanup_free_ char *raw = NULL;
1207         const char *c;
1208         int r;
1209
1210         assert(pid >= 0);
1211         assert(cgroup);
1212
1213         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1214         if (r < 0)
1215                 return r;
1216
1217         r = cg_shift_path(raw, root, &c);
1218         if (r < 0)
1219                 return r;
1220
1221         if (c == raw) {
1222                 *cgroup = raw;
1223                 raw = NULL;
1224         } else {
1225                 char *n;
1226
1227                 n = strdup(c);
1228                 if (!n)
1229                         return -ENOMEM;
1230
1231                 *cgroup = n;
1232         }
1233
1234         return 0;
1235 }
1236
1237 int cg_path_decode_unit(const char *cgroup, char **unit){
1238         char *c, *s;
1239         size_t n;
1240
1241         assert(cgroup);
1242         assert(unit);
1243
1244         n = strcspn(cgroup, "/");
1245         if (n < 3)
1246                 return -ENXIO;
1247
1248         c = strndupa(cgroup, n);
1249         c = cg_unescape(c);
1250
1251         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1252                 return -ENXIO;
1253
1254         s = strdup(c);
1255         if (!s)
1256                 return -ENOMEM;
1257
1258         *unit = s;
1259         return 0;
1260 }
1261
1262 static bool valid_slice_name(const char *p, size_t n) {
1263
1264         if (!p)
1265                 return false;
1266
1267         if (n < strlen("x.slice"))
1268                 return false;
1269
1270         if (memcmp(p + n - 6, ".slice", 6) == 0) {
1271                 char buf[n+1], *c;
1272
1273                 memcpy(buf, p, n);
1274                 buf[n] = 0;
1275
1276                 c = cg_unescape(buf);
1277
1278                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1279         }
1280
1281         return false;
1282 }
1283
1284 static const char *skip_slices(const char *p) {
1285         assert(p);
1286
1287         /* Skips over all slice assignments */
1288
1289         for (;;) {
1290                 size_t n;
1291
1292                 p += strspn(p, "/");
1293
1294                 n = strcspn(p, "/");
1295                 if (!valid_slice_name(p, n))
1296                         return p;
1297
1298                 p += n;
1299         }
1300 }
1301
1302 int cg_path_get_unit(const char *path, char **ret) {
1303         const char *e;
1304         char *unit;
1305         int r;
1306
1307         assert(path);
1308         assert(ret);
1309
1310         e = skip_slices(path);
1311
1312         r = cg_path_decode_unit(e, &unit);
1313         if (r < 0)
1314                 return r;
1315
1316         /* We skipped over the slices, don't accept any now */
1317         if (endswith(unit, ".slice")) {
1318                 free(unit);
1319                 return -ENXIO;
1320         }
1321
1322         *ret = unit;
1323         return 0;
1324 }
1325
1326 int cg_pid_get_unit(pid_t pid, char **unit) {
1327         _cleanup_free_ char *cgroup = NULL;
1328         int r;
1329
1330         assert(unit);
1331
1332         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1333         if (r < 0)
1334                 return r;
1335
1336         return cg_path_get_unit(cgroup, unit);
1337 }
1338
1339 /**
1340  * Skip session-*.scope, but require it to be there.
1341  */
1342 static const char *skip_session(const char *p) {
1343         size_t n;
1344
1345         if (isempty(p))
1346                 return NULL;
1347
1348         p += strspn(p, "/");
1349
1350         n = strcspn(p, "/");
1351         if (n < strlen("session-x.scope"))
1352                 return NULL;
1353
1354         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1355                 char buf[n - 8 - 6 + 1];
1356
1357                 memcpy(buf, p + 8, n - 8 - 6);
1358                 buf[n - 8 - 6] = 0;
1359
1360                 /* Note that session scopes never need unescaping,
1361                  * since they cannot conflict with the kernel's own
1362                  * names, hence we don't need to call cg_unescape()
1363                  * here. */
1364
1365                 if (!session_id_valid(buf))
1366                         return false;
1367
1368                 p += n;
1369                 p += strspn(p, "/");
1370                 return p;
1371         }
1372
1373         return NULL;
1374 }
1375
1376 /**
1377  * Skip user@*.service, but require it to be there.
1378  */
1379 static const char *skip_user_manager(const char *p) {
1380         size_t n;
1381
1382         if (isempty(p))
1383                 return NULL;
1384
1385         p += strspn(p, "/");
1386
1387         n = strcspn(p, "/");
1388         if (n < strlen("user@x.service"))
1389                 return NULL;
1390
1391         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1392                 char buf[n - 5 - 8 + 1];
1393
1394                 memcpy(buf, p + 5, n - 5 - 8);
1395                 buf[n - 5 - 8] = 0;
1396
1397                 /* Note that user manager services never need unescaping,
1398                  * since they cannot conflict with the kernel's own
1399                  * names, hence we don't need to call cg_unescape()
1400                  * here. */
1401
1402                 if (parse_uid(buf, NULL) < 0)
1403                         return NULL;
1404
1405                 p += n;
1406                 p += strspn(p, "/");
1407
1408                 return p;
1409         }
1410
1411         return NULL;
1412 }
1413
1414 static const char *skip_user_prefix(const char *path) {
1415         const char *e, *t;
1416
1417         assert(path);
1418
1419         /* Skip slices, if there are any */
1420         e = skip_slices(path);
1421
1422         /* Skip the user manager, if it's in the path now... */
1423         t = skip_user_manager(e);
1424         if (t)
1425                 return t;
1426
1427         /* Alternatively skip the user session if it is in the path... */
1428         return skip_session(e);
1429 }
1430
1431 int cg_path_get_user_unit(const char *path, char **ret) {
1432         const char *t;
1433
1434         assert(path);
1435         assert(ret);
1436
1437         t = skip_user_prefix(path);
1438         if (!t)
1439                 return -ENXIO;
1440
1441         /* And from here on it looks pretty much the same as for a
1442          * system unit, hence let's use the same parser from here
1443          * on. */
1444         return cg_path_get_unit(t, ret);
1445 }
1446
1447 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1448         _cleanup_free_ char *cgroup = NULL;
1449         int r;
1450
1451         assert(unit);
1452
1453         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1454         if (r < 0)
1455                 return r;
1456
1457         return cg_path_get_user_unit(cgroup, unit);
1458 }
1459
1460 int cg_path_get_machine_name(const char *path, char **machine) {
1461         _cleanup_free_ char *u = NULL;
1462         const char *sl;
1463         int r;
1464
1465         r = cg_path_get_unit(path, &u);
1466         if (r < 0)
1467                 return r;
1468
1469         sl = strjoina("/run/systemd/machines/unit:", u);
1470         return readlink_malloc(sl, machine);
1471 }
1472
1473 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1474         _cleanup_free_ char *cgroup = NULL;
1475         int r;
1476
1477         assert(machine);
1478
1479         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1480         if (r < 0)
1481                 return r;
1482
1483         return cg_path_get_machine_name(cgroup, machine);
1484 }
1485
1486 int cg_path_get_session(const char *path, char **session) {
1487         _cleanup_free_ char *unit = NULL;
1488         char *start, *end;
1489         int r;
1490
1491         assert(path);
1492
1493         r = cg_path_get_unit(path, &unit);
1494         if (r < 0)
1495                 return r;
1496
1497         start = startswith(unit, "session-");
1498         if (!start)
1499                 return -ENXIO;
1500         end = endswith(start, ".scope");
1501         if (!end)
1502                 return -ENXIO;
1503
1504         *end = 0;
1505         if (!session_id_valid(start))
1506                 return -ENXIO;
1507
1508         if (session) {
1509                 char *rr;
1510
1511                 rr = strdup(start);
1512                 if (!rr)
1513                         return -ENOMEM;
1514
1515                 *session = rr;
1516         }
1517
1518         return 0;
1519 }
1520
1521 int cg_pid_get_session(pid_t pid, char **session) {
1522         _cleanup_free_ char *cgroup = NULL;
1523         int r;
1524
1525         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1526         if (r < 0)
1527                 return r;
1528
1529         return cg_path_get_session(cgroup, session);
1530 }
1531
1532 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1533         _cleanup_free_ char *slice = NULL;
1534         char *start, *end;
1535         int r;
1536
1537         assert(path);
1538
1539         r = cg_path_get_slice(path, &slice);
1540         if (r < 0)
1541                 return r;
1542
1543         start = startswith(slice, "user-");
1544         if (!start)
1545                 return -ENXIO;
1546         end = endswith(start, ".slice");
1547         if (!end)
1548                 return -ENXIO;
1549
1550         *end = 0;
1551         if (parse_uid(start, uid) < 0)
1552                 return -ENXIO;
1553
1554         return 0;
1555 }
1556
1557 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1558         _cleanup_free_ char *cgroup = NULL;
1559         int r;
1560
1561         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1562         if (r < 0)
1563                 return r;
1564
1565         return cg_path_get_owner_uid(cgroup, uid);
1566 }
1567
1568 int cg_path_get_slice(const char *p, char **slice) {
1569         const char *e = NULL;
1570
1571         assert(p);
1572         assert(slice);
1573
1574         /* Finds the right-most slice unit from the beginning, but
1575          * stops before we come to the first non-slice unit. */
1576
1577         for (;;) {
1578                 size_t n;
1579
1580                 p += strspn(p, "/");
1581
1582                 n = strcspn(p, "/");
1583                 if (!valid_slice_name(p, n)) {
1584
1585                         if (!e) {
1586                                 char *s;
1587
1588                                 s = strdup("-.slice");
1589                                 if (!s)
1590                                         return -ENOMEM;
1591
1592                                 *slice = s;
1593                                 return 0;
1594                         }
1595
1596                         return cg_path_decode_unit(e, slice);
1597                 }
1598
1599                 e = p;
1600                 p += n;
1601         }
1602 }
1603
1604 int cg_pid_get_slice(pid_t pid, char **slice) {
1605         _cleanup_free_ char *cgroup = NULL;
1606         int r;
1607
1608         assert(slice);
1609
1610         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1611         if (r < 0)
1612                 return r;
1613
1614         return cg_path_get_slice(cgroup, slice);
1615 }
1616
1617 int cg_path_get_user_slice(const char *p, char **slice) {
1618         const char *t;
1619         assert(p);
1620         assert(slice);
1621
1622         t = skip_user_prefix(p);
1623         if (!t)
1624                 return -ENXIO;
1625
1626         /* And now it looks pretty much the same as for a system
1627          * slice, so let's just use the same parser from here on. */
1628         return cg_path_get_slice(t, slice);
1629 }
1630
1631 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1632         _cleanup_free_ char *cgroup = NULL;
1633         int r;
1634
1635         assert(slice);
1636
1637         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1638         if (r < 0)
1639                 return r;
1640
1641         return cg_path_get_user_slice(cgroup, slice);
1642 }
1643
1644 char *cg_escape(const char *p) {
1645         bool need_prefix = false;
1646
1647         /* This implements very minimal escaping for names to be used
1648          * as file names in the cgroup tree: any name which might
1649          * conflict with a kernel name or is prefixed with '_' is
1650          * prefixed with a '_'. That way, when reading cgroup names it
1651          * is sufficient to remove a single prefixing underscore if
1652          * there is one. */
1653
1654         /* The return value of this function (unlike cg_unescape())
1655          * needs free()! */
1656
1657         if (p[0] == 0 ||
1658             p[0] == '_' ||
1659             p[0] == '.' ||
1660             streq(p, "notify_on_release") ||
1661             streq(p, "release_agent") ||
1662             streq(p, "tasks") ||
1663             startswith(p, "cgroup."))
1664                 need_prefix = true;
1665         else {
1666                 const char *dot;
1667
1668                 dot = strrchr(p, '.');
1669                 if (dot) {
1670                         CGroupController c;
1671                         size_t l = dot - p;
1672
1673                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1674                                 const char *n;
1675
1676                                 n = cgroup_controller_to_string(c);
1677
1678                                 if (l != strlen(n))
1679                                         continue;
1680
1681                                 if (memcmp(p, n, l) != 0)
1682                                         continue;
1683
1684                                 need_prefix = true;
1685                                 break;
1686                         }
1687                 }
1688         }
1689
1690         if (need_prefix)
1691                 return strappend("_", p);
1692
1693         return strdup(p);
1694 }
1695
1696 char *cg_unescape(const char *p) {
1697         assert(p);
1698
1699         /* The return value of this function (unlike cg_escape())
1700          * doesn't need free()! */
1701
1702         if (p[0] == '_')
1703                 return (char*) p+1;
1704
1705         return (char*) p;
1706 }
1707
1708 #define CONTROLLER_VALID                        \
1709         DIGITS LETTERS                          \
1710         "_"
1711
1712 bool cg_controller_is_valid(const char *p) {
1713         const char *t, *s;
1714
1715         if (!p)
1716                 return false;
1717
1718         s = startswith(p, "name=");
1719         if (s)
1720                 p = s;
1721
1722         if (*p == 0 || *p == '_')
1723                 return false;
1724
1725         for (t = p; *t; t++)
1726                 if (!strchr(CONTROLLER_VALID, *t))
1727                         return false;
1728
1729         if (t - p > FILENAME_MAX)
1730                 return false;
1731
1732         return true;
1733 }
1734
1735 int cg_slice_to_path(const char *unit, char **ret) {
1736         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1737         const char *dash;
1738         int r;
1739
1740         assert(unit);
1741         assert(ret);
1742
1743         if (streq(unit, "-.slice")) {
1744                 char *x;
1745
1746                 x = strdup("");
1747                 if (!x)
1748                         return -ENOMEM;
1749                 *ret = x;
1750                 return 0;
1751         }
1752
1753         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1754                 return -EINVAL;
1755
1756         if (!endswith(unit, ".slice"))
1757                 return -EINVAL;
1758
1759         r = unit_name_to_prefix(unit, &p);
1760         if (r < 0)
1761                 return r;
1762
1763         dash = strchr(p, '-');
1764
1765         /* Don't allow initial dashes */
1766         if (dash == p)
1767                 return -EINVAL;
1768
1769         while (dash) {
1770                 _cleanup_free_ char *escaped = NULL;
1771                 char n[dash - p + sizeof(".slice")];
1772
1773                 /* Don't allow trailing or double dashes */
1774                 if (dash[1] == 0 || dash[1] == '-')
1775                         return -EINVAL;
1776
1777                 strcpy(stpncpy(n, p, dash - p), ".slice");
1778                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1779                         return -EINVAL;
1780
1781                 escaped = cg_escape(n);
1782                 if (!escaped)
1783                         return -ENOMEM;
1784
1785                 if (!strextend(&s, escaped, "/", NULL))
1786                         return -ENOMEM;
1787
1788                 dash = strchr(dash+1, '-');
1789         }
1790
1791         e = cg_escape(unit);
1792         if (!e)
1793                 return -ENOMEM;
1794
1795         if (!strextend(&s, e, NULL))
1796                 return -ENOMEM;
1797
1798         *ret = s;
1799         s = NULL;
1800
1801         return 0;
1802 }
1803
1804 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1805         _cleanup_free_ char *p = NULL;
1806         int r;
1807
1808         r = cg_get_path(controller, path, attribute, &p);
1809         if (r < 0)
1810                 return r;
1811
1812         return write_string_file(p, value, 0);
1813 }
1814
1815 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1816         _cleanup_free_ char *p = NULL;
1817         int r;
1818
1819         r = cg_get_path(controller, path, attribute, &p);
1820         if (r < 0)
1821                 return r;
1822
1823         return read_one_line_file(p, ret);
1824 }
1825
1826 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1827         CGroupController c;
1828         int r, unified;
1829
1830         /* This one will create a cgroup in our private tree, but also
1831          * duplicate it in the trees specified in mask, and remove it
1832          * in all others */
1833
1834         /* First create the cgroup in our own hierarchy. */
1835         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1836         if (r < 0)
1837                 return r;
1838
1839         /* If we are in the unified hierarchy, we are done now */
1840         unified = cg_unified();
1841         if (unified < 0)
1842                 return unified;
1843         if (unified > 0)
1844                 return 0;
1845
1846         /* Otherwise, do the same in the other hierarchies */
1847         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1848                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1849                 const char *n;
1850
1851                 n = cgroup_controller_to_string(c);
1852
1853                 if (mask & bit)
1854                         (void) cg_create(n, path);
1855                 else if (supported & bit)
1856                         (void) cg_trim(n, path, true);
1857         }
1858
1859         return 0;
1860 }
1861
1862 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1863         CGroupController c;
1864         int r, unified;
1865
1866         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1867         if (r < 0)
1868                 return r;
1869
1870         unified = cg_unified();
1871         if (unified < 0)
1872                 return unified;
1873         if (unified > 0)
1874                 return 0;
1875
1876         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1877                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1878                 const char *p = NULL;
1879
1880                 if (!(supported & bit))
1881                         continue;
1882
1883                 if (path_callback)
1884                         p = path_callback(bit, userdata);
1885
1886                 if (!p)
1887                         p = path;
1888
1889                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
1890         }
1891
1892         return 0;
1893 }
1894
1895 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1896         Iterator i;
1897         void *pidp;
1898         int r = 0;
1899
1900         SET_FOREACH(pidp, pids, i) {
1901                 pid_t pid = PTR_TO_LONG(pidp);
1902                 int q;
1903
1904                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1905                 if (q < 0 && r >= 0)
1906                         r = q;
1907         }
1908
1909         return r;
1910 }
1911
1912 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1913         CGroupController c;
1914         int r, unified;
1915
1916         if (!path_equal(from, to))  {
1917                 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true);
1918                 if (r < 0)
1919                         return r;
1920         }
1921
1922         unified = cg_unified();
1923         if (unified < 0)
1924                 return unified;
1925         if (unified > 0)
1926                 return r;
1927
1928         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1929                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1930                 const char *p = NULL;
1931
1932                 if (!(supported & bit))
1933                         continue;
1934
1935                 if (to_callback)
1936                         p = to_callback(bit, userdata);
1937
1938                 if (!p)
1939                         p = to;
1940
1941                 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, false, false);
1942         }
1943
1944         return 0;
1945 }
1946
1947 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
1948         CGroupController c;
1949         int r, unified;
1950
1951         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
1952         if (r < 0)
1953                 return r;
1954
1955         unified = cg_unified();
1956         if (unified < 0)
1957                 return unified;
1958         if (unified > 0)
1959                 return r;
1960
1961         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1962                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1963
1964                 if (!(supported & bit))
1965                         continue;
1966
1967                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
1968         }
1969
1970         return 0;
1971 }
1972
1973 int cg_mask_supported(CGroupMask *ret) {
1974         CGroupMask mask = 0;
1975         int r, unified;
1976
1977         /* Determines the mask of supported cgroup controllers. Only
1978          * includes controllers we can make sense of and that are
1979          * actually accessible. */
1980
1981         unified = cg_unified();
1982         if (unified < 0)
1983                 return unified;
1984         if (unified > 0) {
1985                 _cleanup_free_ char *controllers = NULL;
1986                 const char *c;
1987
1988                 /* In the unified hierarchy we can read the supported
1989                  * and accessible controllers from a the top-level
1990                  * cgroup attribute */
1991
1992                 r = read_one_line_file("/sys/fs/cgroup/cgroup.controllers", &controllers);
1993                 if (r < 0)
1994                         return r;
1995
1996                 c = controllers;
1997                 for (;;) {
1998                         _cleanup_free_ char *n = NULL;
1999                         CGroupController v;
2000
2001                         r = extract_first_word(&c, &n, NULL, 0);
2002                         if (r < 0)
2003                                 return r;
2004                         if (r == 0)
2005                                 break;
2006
2007                         v = cgroup_controller_from_string(n);
2008                         if (v < 0)
2009                                 continue;
2010
2011                         mask |= CGROUP_CONTROLLER_TO_MASK(v);
2012                 }
2013
2014                 /* Currently, we only support the memory controller in
2015                  * the unified hierarchy, mask everything else off. */
2016                 mask &= CGROUP_MASK_MEMORY;
2017
2018         } else {
2019                 CGroupController c;
2020
2021                 /* In the legacy hierarchy, we check whether which
2022                  * hierarchies are mounted. */
2023
2024                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2025                         const char *n;
2026
2027                         n = cgroup_controller_to_string(c);
2028                         if (controller_is_accessible(n) >= 0)
2029                                 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2030                 }
2031         }
2032
2033         *ret = mask;
2034         return 0;
2035 }
2036
2037 int cg_kernel_controllers(Set *controllers) {
2038         _cleanup_fclose_ FILE *f = NULL;
2039         char buf[LINE_MAX];
2040         int r;
2041
2042         assert(controllers);
2043
2044         /* Determines the full list of kernel-known controllers. Might
2045          * include controllers we don't actually support, arbitrary
2046          * named hierarchies and controllers that aren't currently
2047          * accessible (because not mounted). */
2048
2049         f = fopen("/proc/cgroups", "re");
2050         if (!f) {
2051                 if (errno == ENOENT)
2052                         return 0;
2053                 return -errno;
2054         }
2055
2056         /* Ignore the header line */
2057         (void) fgets(buf, sizeof(buf), f);
2058
2059         for (;;) {
2060                 char *controller;
2061                 int enabled = 0;
2062
2063                 errno = 0;
2064                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2065
2066                         if (feof(f))
2067                                 break;
2068
2069                         if (ferror(f) && errno != 0)
2070                                 return -errno;
2071
2072                         return -EBADMSG;
2073                 }
2074
2075                 if (!enabled) {
2076                         free(controller);
2077                         continue;
2078                 }
2079
2080                 if (!cg_controller_is_valid(controller)) {
2081                         free(controller);
2082                         return -EBADMSG;
2083                 }
2084
2085                 r = set_consume(controllers, controller);
2086                 if (r < 0)
2087                         return r;
2088         }
2089
2090         return 0;
2091 }
2092
2093 static thread_local int unified_cache = -1;
2094
2095 int cg_unified(void) {
2096         struct statfs fs;
2097
2098         /* Checks if we support the unified hierarchy. Returns an
2099          * error when the cgroup hierarchies aren't mounted yet or we
2100          * have any other trouble determining if the unified hierarchy
2101          * is supported. */
2102
2103         if (unified_cache >= 0)
2104                 return unified_cache;
2105
2106         if (statfs("/sys/fs/cgroup/", &fs) < 0)
2107                 return -errno;
2108
2109         if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC))
2110                 unified_cache = true;
2111         else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2112                 unified_cache = false;
2113         else
2114                 return -ENOEXEC;
2115
2116         return unified_cache;
2117 }
2118
2119 void cg_unified_flush(void) {
2120         unified_cache = -1;
2121 }
2122
2123 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2124         _cleanup_free_ char *fs = NULL;
2125         CGroupController c;
2126         int r, unified;
2127
2128         assert(p);
2129
2130         if (supported == 0)
2131                 return 0;
2132
2133         unified = cg_unified();
2134         if (unified < 0)
2135                 return unified;
2136         if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2137                 return 0;
2138
2139         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2140         if (r < 0)
2141                 return r;
2142
2143         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2144                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2145                 const char *n;
2146
2147                 if (!(supported & bit))
2148                         continue;
2149
2150                 n = cgroup_controller_to_string(c);
2151                 {
2152                         char s[1 + strlen(n) + 1];
2153
2154                         s[0] = mask & bit ? '+' : '-';
2155                         strcpy(s + 1, n);
2156
2157                         r = write_string_file(fs, s, 0);
2158                         if (r < 0)
2159                                 log_warning_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2160                 }
2161         }
2162
2163         return 0;
2164 }
2165
2166 bool cg_is_unified_wanted(void) {
2167         static thread_local int wanted = -1;
2168         int r, unified;
2169
2170         /* If the hierarchy is already mounted, then follow whatever
2171          * was chosen for it. */
2172         unified = cg_unified();
2173         if (unified >= 0)
2174                 return unified;
2175
2176         /* Otherwise, let's see what the kernel command line has to
2177          * say. Since checking that is expensive, let's cache the
2178          * result. */
2179         if (wanted >= 0)
2180                 return wanted;
2181
2182         r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2183         if (r > 0)
2184                 return (wanted = true);
2185         else {
2186                 _cleanup_free_ char *value = NULL;
2187
2188                 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2189                 if (r < 0)
2190                         return false;
2191                 if (r == 0)
2192                         return (wanted = false);
2193
2194                 return (wanted = parse_boolean(value) > 0);
2195         }
2196 }
2197
2198 bool cg_is_legacy_wanted(void) {
2199         return !cg_is_unified_wanted();
2200 }
2201
2202 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2203         [CGROUP_CONTROLLER_CPU] = "cpu",
2204         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2205         [CGROUP_CONTROLLER_BLKIO] = "blkio",
2206         [CGROUP_CONTROLLER_MEMORY] = "memory",
2207         [CGROUP_CONTROLLER_DEVICE] = "device",
2208 };
2209
2210 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);