src/basic/cgroup-util.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2010 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <errno.h>
  23 #include <unistd.h>
  24 #include <signal.h>
  25 #include <string.h>
  26 #include <stdlib.h>
  27 #include <dirent.h>
  28 #include <sys/stat.h>
  29 #include <sys/types.h>
  30 #include <ftw.h>
  31
  32 #include "set.h"
  33 #include "macro.h"
  34 #include "util.h"
  35 #include "formats-util.h"
  36 #include "process-util.h"
  37 #include "path-util.h"
  38 #include "unit-name.h"
  39 #include "fileio.h"
  40 #include "special.h"
  41 #include "mkdir.h"
  42 #include "login-util.h"
  43 #include "cgroup-util.h"
  44
  45 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
  46         _cleanup_free_ char *fs = NULL;
  47         FILE *f;
  48         int r;
  49
  50         assert(_f);
  51
  52         r = cg_get_path(controller, path, "cgroup.procs", &fs);
  53         if (r < 0)
  54                 return r;
  55
  56         f = fopen(fs, "re");
  57         if (!f)
  58                 return -errno;
  59
  60         *_f = f;
  61         return 0;
  62 }
  63
  64 int cg_read_pid(FILE *f, pid_t *_pid) {
  65         unsigned long ul;
  66
  67         /* Note that the cgroup.procs might contain duplicates! See
  68          * cgroups.txt for details. */
  69
  70         assert(f);
  71         assert(_pid);
  72
  73         errno = 0;
  74         if (fscanf(f, "%lu", &ul) != 1) {
  75
  76                 if (feof(f))
  77                         return 0;
  78
  79                 return errno ? -errno : -EIO;
  80         }
  81
  82         if (ul <= 0)
  83                 return -EIO;
  84
  85         *_pid = (pid_t) ul;
  86         return 1;
  87 }
  88
  89 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
  90         _cleanup_free_ char *fs = NULL;
  91         int r;
  92         DIR *d;
  93
  94         assert(_d);
  95
  96         /* This is not recursive! */
  97
  98         r = cg_get_path(controller, path, NULL, &fs);
  99         if (r < 0)
 100                 return r;
 101
 102         d = opendir(fs);
 103         if (!d)
 104                 return -errno;
 105
 106         *_d = d;
 107         return 0;
 108 }
 109
 110 int cg_read_subgroup(DIR *d, char **fn) {
 111         struct dirent *de;
 112
 113         assert(d);
 114         assert(fn);
 115
 116         FOREACH_DIRENT_ALL(de, d, return -errno) {
 117                 char *b;
 118
 119                 if (de->d_type != DT_DIR)
 120                         continue;
 121
 122                 if (streq(de->d_name, ".") ||
 123                     streq(de->d_name, ".."))
 124                         continue;
 125
 126                 b = strdup(de->d_name);
 127                 if (!b)
 128                         return -ENOMEM;
 129
 130                 *fn = b;
 131                 return 1;
 132         }
 133
 134         return 0;
 135 }
 136
 137 int cg_rmdir(const char *controller, const char *path) {
 138         _cleanup_free_ char *p = NULL;
 139         int r;
 140
 141         r = cg_get_path(controller, path, NULL, &p);
 142         if (r < 0)
 143                 return r;
 144
 145         r = rmdir(p);
 146         if (r < 0 && errno != ENOENT)
 147                 return -errno;
 148
 149         return 0;
 150 }
 151
 152 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
 153         _cleanup_set_free_ Set *allocated_set = NULL;
 154         bool done = false;
 155         int r, ret = 0;
 156         pid_t my_pid;
 157
 158         assert(sig >= 0);
 159
 160         /* This goes through the tasks list and kills them all. This
 161          * is repeated until no further processes are added to the
 162          * tasks list, to properly handle forking processes */
 163
 164         if (!s) {
 165                 s = allocated_set = set_new(NULL);
 166                 if (!s)
 167                         return -ENOMEM;
 168         }
 169
 170         my_pid = getpid();
 171
 172         do {
 173                 _cleanup_fclose_ FILE *f = NULL;
 174                 pid_t pid = 0;
 175                 done = true;
 176
 177                 r = cg_enumerate_processes(controller, path, &f);
 178                 if (r < 0) {
 179                         if (ret >= 0 && r != -ENOENT)
 180                                 return r;
 181
 182                         return ret;
 183                 }
 184
 185                 while ((r = cg_read_pid(f, &pid)) > 0) {
 186
 187                         if (ignore_self && pid == my_pid)
 188                                 continue;
 189
 190                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 191                                 continue;
 192
 193                         /* If we haven't killed this process yet, kill
 194                          * it */
 195                         if (kill(pid, sig) < 0) {
 196                                 if (ret >= 0 && errno != ESRCH)
 197                                         ret = -errno;
 198                         } else {
 199                                 if (sigcont && sig != SIGKILL)
 200                                         (void) kill(pid, SIGCONT);
 201
 202                                 if (ret == 0)
 203                                         ret = 1;
 204                         }
 205
 206                         done = false;
 207
 208                         r = set_put(s, PID_TO_PTR(pid));
 209                         if (r < 0) {
 210                                 if (ret >= 0)
 211                                         return r;
 212
 213                                 return ret;
 214                         }
 215                 }
 216
 217                 if (r < 0) {
 218                         if (ret >= 0)
 219                                 return r;
 220
 221                         return ret;
 222                 }
 223
 224                 /* To avoid racing against processes which fork
 225                  * quicker than we can kill them we repeat this until
 226                  * no new pids need to be killed. */
 227
 228         } while (!done);
 229
 230         return ret;
 231 }
 232
 233 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
 234         _cleanup_set_free_ Set *allocated_set = NULL;
 235         _cleanup_closedir_ DIR *d = NULL;
 236         int r, ret;
 237         char *fn;
 238
 239         assert(path);
 240         assert(sig >= 0);
 241
 242         if (!s) {
 243                 s = allocated_set = set_new(NULL);
 244                 if (!s)
 245                         return -ENOMEM;
 246         }
 247
 248         ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
 249
 250         r = cg_enumerate_subgroups(controller, path, &d);
 251         if (r < 0) {
 252                 if (ret >= 0 && r != -ENOENT)
 253                         return r;
 254
 255                 return ret;
 256         }
 257
 258         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 259                 _cleanup_free_ char *p = NULL;
 260
 261                 p = strjoin(path, "/", fn, NULL);
 262                 free(fn);
 263                 if (!p)
 264                         return -ENOMEM;
 265
 266                 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
 267                 if (r != 0 && ret >= 0)
 268                         ret = r;
 269         }
 270
 271         if (ret >= 0 && r < 0)
 272                 ret = r;
 273
 274         if (rem) {
 275                 r = cg_rmdir(controller, path);
 276                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 277                         return r;
 278         }
 279
 280         return ret;
 281 }
 282
 283 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
 284         bool done = false;
 285         _cleanup_set_free_ Set *s = NULL;
 286         int r, ret = 0;
 287         pid_t my_pid;
 288
 289         assert(cfrom);
 290         assert(pfrom);
 291         assert(cto);
 292         assert(pto);
 293
 294         s = set_new(NULL);
 295         if (!s)
 296                 return -ENOMEM;
 297
 298         my_pid = getpid();
 299
 300         do {
 301                 _cleanup_fclose_ FILE *f = NULL;
 302                 pid_t pid = 0;
 303                 done = true;
 304
 305                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 306                 if (r < 0) {
 307                         if (ret >= 0 && r != -ENOENT)
 308                                 return r;
 309
 310                         return ret;
 311                 }
 312
 313                 while ((r = cg_read_pid(f, &pid)) > 0) {
 314
 315                         /* This might do weird stuff if we aren't a
 316                          * single-threaded program. However, we
 317                          * luckily know we are not */
 318                         if (ignore_self && pid == my_pid)
 319                                 continue;
 320
 321                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 322                                 continue;
 323
 324                         /* Ignore kernel threads. Since they can only
 325                          * exist in the root cgroup, we only check for
 326                          * them there. */
 327                         if (cfrom &&
 328                             (isempty(pfrom) || path_equal(pfrom, "/")) &&
 329                             is_kernel_thread(pid) > 0)
 330                                 continue;
 331
 332                         r = cg_attach(cto, pto, pid);
 333                         if (r < 0) {
 334                                 if (ret >= 0 && r != -ESRCH)
 335                                         ret = r;
 336                         } else if (ret == 0)
 337                                 ret = 1;
 338
 339                         done = false;
 340
 341                         r = set_put(s, PID_TO_PTR(pid));
 342                         if (r < 0) {
 343                                 if (ret >= 0)
 344                                         return r;
 345
 346                                 return ret;
 347                         }
 348                 }
 349
 350                 if (r < 0) {
 351                         if (ret >= 0)
 352                                 return r;
 353
 354                         return ret;
 355                 }
 356         } while (!done);
 357
 358         return ret;
 359 }
 360
 361 int cg_migrate_recursive(
 362                 const char *cfrom,
 363                 const char *pfrom,
 364                 const char *cto,
 365                 const char *pto,
 366                 bool ignore_self,
 367                 bool rem) {
 368
 369         _cleanup_closedir_ DIR *d = NULL;
 370         int r, ret = 0;
 371         char *fn;
 372
 373         assert(cfrom);
 374         assert(pfrom);
 375         assert(cto);
 376         assert(pto);
 377
 378         ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
 379
 380         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 381         if (r < 0) {
 382                 if (ret >= 0 && r != -ENOENT)
 383                         return r;
 384
 385                 return ret;
 386         }
 387
 388         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 389                 _cleanup_free_ char *p = NULL;
 390
 391                 p = strjoin(pfrom, "/", fn, NULL);
 392                 free(fn);
 393                 if (!p)
 394                         return -ENOMEM;
 395
 396                 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
 397                 if (r != 0 && ret >= 0)
 398                         ret = r;
 399         }
 400
 401         if (r < 0 && ret >= 0)
 402                 ret = r;
 403
 404         if (rem) {
 405                 r = cg_rmdir(cfrom, pfrom);
 406                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 407                         return r;
 408         }
 409
 410         return ret;
 411 }
 412
 413 int cg_migrate_recursive_fallback(
 414                 const char *cfrom,
 415                 const char *pfrom,
 416                 const char *cto,
 417                 const char *pto,
 418                 bool ignore_self,
 419                 bool rem) {
 420
 421         int r;
 422
 423         assert(cfrom);
 424         assert(pfrom);
 425         assert(cto);
 426         assert(pto);
 427
 428         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem);
 429         if (r < 0) {
 430                 char prefix[strlen(pto) + 1];
 431
 432                 /* This didn't work? Then let's try all prefixes of the destination */
 433
 434                 PATH_FOREACH_PREFIX(prefix, pto) {
 435                         int q;
 436
 437                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem);
 438                         if (q >= 0)
 439                                 return q;
 440                 }
 441         }
 442
 443         return r;
 444 }
 445
 446 static const char *controller_to_dirname(const char *controller) {
 447         const char *e;
 448
 449         assert(controller);
 450
 451         /* Converts a controller name to the directory name below
 452          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
 453          * just cuts off the name= prefixed used for named
 454          * hierarchies, if it is specified. */
 455
 456         e = startswith(controller, "name=");
 457         if (e)
 458                 return e;
 459
 460         return controller;
 461 }
 462
 463 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
 464         const char *dn;
 465         char *t = NULL;
 466
 467         assert(fs);
 468         assert(controller);
 469
 470         dn = controller_to_dirname(controller);
 471
 472         if (isempty(path) && isempty(suffix))
 473                 t = strappend("/sys/fs/cgroup/", dn);
 474         else if (isempty(path))
 475                 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL);
 476         else if (isempty(suffix))
 477                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL);
 478         else
 479                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL);
 480         if (!t)
 481                 return -ENOMEM;
 482
 483         *fs = t;
 484         return 0;
 485 }
 486
 487 static int join_path_unified(const char *path, const char *suffix, char **fs) {
 488         char *t;
 489
 490         assert(fs);
 491
 492         if (isempty(path) && isempty(suffix))
 493                 t = strdup("/sys/fs/cgroup");
 494         else if (isempty(path))
 495                 t = strappend("/sys/fs/cgroup/", suffix);
 496         else if (isempty(suffix))
 497                 t = strappend("/sys/fs/cgroup/", path);
 498         else
 499                 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
 500         if (!t)
 501                 return -ENOMEM;
 502
 503         *fs = t;
 504         return 0;
 505 }
 506
 507 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
 508         int unified, r;
 509
 510         assert(fs);
 511
 512         if (!controller) {
 513                 char *t;
 514
 515                 /* If no controller is specified, we return the path
 516                  * *below* the controllers, without any prefix. */
 517
 518                 if (!path && !suffix)
 519                         return -EINVAL;
 520
 521                 if (!suffix)
 522                         t = strdup(path);
 523                 else if (!path)
 524                         t = strdup(suffix);
 525                 else
 526                         t = strjoin(path, "/", suffix, NULL);
 527                 if (!t)
 528                         return -ENOMEM;
 529
 530                 *fs = path_kill_slashes(t);
 531                 return 0;
 532         }
 533
 534         if (!cg_controller_is_valid(controller))
 535                 return -EINVAL;
 536
 537         unified = cg_unified();
 538         if (unified < 0)
 539                 return unified;
 540
 541         if (unified > 0)
 542                 r = join_path_unified(path, suffix, fs);
 543         else
 544                 r = join_path_legacy(controller, path, suffix, fs);
 545         if (r < 0)
 546                 return r;
 547
 548         path_kill_slashes(*fs);
 549         return 0;
 550 }
 551
 552 static int controller_is_accessible(const char *controller) {
 553         int unified;
 554
 555         assert(controller);
 556
 557         /* Checks whether a specific controller is accessible,
 558          * i.e. its hierarchy mounted. In the unified hierarchy all
 559          * controllers are considered accessible, except for the named
 560          * hierarchies */
 561
 562         if (!cg_controller_is_valid(controller))
 563                 return -EINVAL;
 564
 565         unified = cg_unified();
 566         if (unified < 0)
 567                 return unified;
 568         if (unified > 0) {
 569                 /* We don't support named hierarchies if we are using
 570                  * the unified hierarchy. */
 571
 572                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
 573                         return 0;
 574
 575                 if (startswith(controller, "name="))
 576                         return -EOPNOTSUPP;
 577
 578         } else {
 579                 const char *cc, *dn;
 580
 581                 dn = controller_to_dirname(controller);
 582                 cc = strjoina("/sys/fs/cgroup/", dn);
 583
 584                 if (laccess(cc, F_OK) < 0)
 585                         return -errno;
 586         }
 587
 588         return 0;
 589 }
 590
 591 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
 592         int r;
 593
 594         assert(controller);
 595         assert(fs);
 596
 597         /* Check if the specified controller is actually accessible */
 598         r = controller_is_accessible(controller);
 599         if (r < 0)
 600                 return r;
 601
 602         return cg_get_path(controller, path, suffix, fs);
 603 }
 604
 605 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 606         assert(path);
 607         assert(sb);
 608         assert(ftwbuf);
 609
 610         if (typeflag != FTW_DP)
 611                 return 0;
 612
 613         if (ftwbuf->level < 1)
 614                 return 0;
 615
 616         (void) rmdir(path);
 617         return 0;
 618 }
 619
 620 int cg_trim(const char *controller, const char *path, bool delete_root) {
 621         _cleanup_free_ char *fs = NULL;
 622         int r = 0;
 623
 624         assert(path);
 625
 626         r = cg_get_path(controller, path, NULL, &fs);
 627         if (r < 0)
 628                 return r;
 629
 630         errno = 0;
 631         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 632                 if (errno == ENOENT)
 633                         r = 0;
 634                 else if (errno != 0)
 635                         r = -errno;
 636                 else
 637                         r = -EIO;
 638         }
 639
 640         if (delete_root) {
 641                 if (rmdir(fs) < 0 && errno != ENOENT)
 642                         return -errno;
 643         }
 644
 645         return r;
 646 }
 647
 648 int cg_create(const char *controller, const char *path) {
 649         _cleanup_free_ char *fs = NULL;
 650         int r;
 651
 652         r = cg_get_path_and_check(controller, path, NULL, &fs);
 653         if (r < 0)
 654                 return r;
 655
 656         r = mkdir_parents(fs, 0755);
 657         if (r < 0)
 658                 return r;
 659
 660         if (mkdir(fs, 0755) < 0) {
 661
 662                 if (errno == EEXIST)
 663                         return 0;
 664
 665                 return -errno;
 666         }
 667
 668         return 1;
 669 }
 670
 671 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 672         int r, q;
 673
 674         assert(pid >= 0);
 675
 676         r = cg_create(controller, path);
 677         if (r < 0)
 678                 return r;
 679
 680         q = cg_attach(controller, path, pid);
 681         if (q < 0)
 682                 return q;
 683
 684         /* This does not remove the cgroup on failure */
 685         return r;
 686 }
 687
 688 int cg_attach(const char *controller, const char *path, pid_t pid) {
 689         _cleanup_free_ char *fs = NULL;
 690         char c[DECIMAL_STR_MAX(pid_t) + 2];
 691         int r;
 692
 693         assert(path);
 694         assert(pid >= 0);
 695
 696         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 697         if (r < 0)
 698                 return r;
 699
 700         if (pid == 0)
 701                 pid = getpid();
 702
 703         snprintf(c, sizeof(c), PID_FMT"\n", pid);
 704
 705         return write_string_file(fs, c, 0);
 706 }
 707
 708 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 709         int r;
 710
 711         assert(controller);
 712         assert(path);
 713         assert(pid >= 0);
 714
 715         r = cg_attach(controller, path, pid);
 716         if (r < 0) {
 717                 char prefix[strlen(path) + 1];
 718
 719                 /* This didn't work? Then let's try all prefixes of
 720                  * the destination */
 721
 722                 PATH_FOREACH_PREFIX(prefix, path) {
 723                         int q;
 724
 725                         q = cg_attach(controller, prefix, pid);
 726                         if (q >= 0)
 727                                 return q;
 728                 }
 729         }
 730
 731         return r;
 732 }
 733
 734 int cg_set_group_access(
 735                 const char *controller,
 736                 const char *path,
 737                 mode_t mode,
 738                 uid_t uid,
 739                 gid_t gid) {
 740
 741         _cleanup_free_ char *fs = NULL;
 742         int r;
 743
 744         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 745                 return 0;
 746
 747         if (mode != MODE_INVALID)
 748                 mode &= 0777;
 749
 750         r = cg_get_path(controller, path, NULL, &fs);
 751         if (r < 0)
 752                 return r;
 753
 754         return chmod_and_chown(fs, mode, uid, gid);
 755 }
 756
 757 int cg_set_task_access(
 758                 const char *controller,
 759                 const char *path,
 760                 mode_t mode,
 761                 uid_t uid,
 762                 gid_t gid) {
 763
 764         _cleanup_free_ char *fs = NULL, *procs = NULL;
 765         int r, unified;
 766
 767         assert(path);
 768
 769         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 770                 return 0;
 771
 772         if (mode != MODE_INVALID)
 773                 mode &= 0666;
 774
 775         r = cg_get_path(controller, path, "cgroup.procs", &fs);
 776         if (r < 0)
 777                 return r;
 778
 779         r = chmod_and_chown(fs, mode, uid, gid);
 780         if (r < 0)
 781                 return r;
 782
 783         unified = cg_unified();
 784         if (unified < 0)
 785                 return unified;
 786         if (unified)
 787                 return 0;
 788
 789         /* Compatibility, Always keep values for "tasks" in sync with
 790          * "cgroup.procs" */
 791         if (cg_get_path(controller, path, "tasks", &procs) >= 0)
 792                 (void) chmod_and_chown(procs, mode, uid, gid);
 793
 794         return 0;
 795 }
 796
 797 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
 798         _cleanup_fclose_ FILE *f = NULL;
 799         char line[LINE_MAX];
 800         const char *fs;
 801         size_t cs = 0;
 802         int unified;
 803
 804         assert(path);
 805         assert(pid >= 0);
 806
 807         unified = cg_unified();
 808         if (unified < 0)
 809                 return unified;
 810         if (unified == 0) {
 811                 if (controller) {
 812                         if (!cg_controller_is_valid(controller))
 813                                 return -EINVAL;
 814                 } else
 815                         controller = SYSTEMD_CGROUP_CONTROLLER;
 816
 817                 cs = strlen(controller);
 818         }
 819
 820         fs = procfs_file_alloca(pid, "cgroup");
 821         f = fopen(fs, "re");
 822         if (!f)
 823                 return errno == ENOENT ? -ESRCH : -errno;
 824
 825         FOREACH_LINE(line, f, return -errno) {
 826                 char *e, *p;
 827
 828                 truncate_nl(line);
 829
 830                 if (unified) {
 831                         e = startswith(line, "0:");
 832                         if (!e)
 833                                 continue;
 834
 835                         e = strchr(e, ':');
 836                         if (!e)
 837                                 continue;
 838                 } else {
 839                         char *l;
 840                         size_t k;
 841                         const char *word, *state;
 842                         bool found = false;
 843
 844                         l = strchr(line, ':');
 845                         if (!l)
 846                                 continue;
 847
 848                         l++;
 849                         e = strchr(l, ':');
 850                         if (!e)
 851                                 continue;
 852
 853                         *e = 0;
 854                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
 855                                 if (k == cs && memcmp(word, controller, cs) == 0) {
 856                                         found = true;
 857                                         break;
 858                                 }
 859                         }
 860
 861                         if (!found)
 862                                 continue;
 863                 }
 864
 865                 p = strdup(e + 1);
 866                 if (!p)
 867                         return -ENOMEM;
 868
 869                 *path = p;
 870                 return 0;
 871         }
 872
 873         return -ENODATA;
 874 }
 875
 876 int cg_install_release_agent(const char *controller, const char *agent) {
 877         _cleanup_free_ char *fs = NULL, *contents = NULL;
 878         const char *sc;
 879         int r, unified;
 880
 881         assert(agent);
 882
 883         unified = cg_unified();
 884         if (unified < 0)
 885                 return unified;
 886         if (unified) /* doesn't apply to unified hierarchy */
 887                 return -EOPNOTSUPP;
 888
 889         r = cg_get_path(controller, NULL, "release_agent", &fs);
 890         if (r < 0)
 891                 return r;
 892
 893         r = read_one_line_file(fs, &contents);
 894         if (r < 0)
 895                 return r;
 896
 897         sc = strstrip(contents);
 898         if (isempty(sc)) {
 899                 r = write_string_file(fs, agent, 0);
 900                 if (r < 0)
 901                         return r;
 902         } else if (!path_equal(sc, agent))
 903                 return -EEXIST;
 904
 905         fs = mfree(fs);
 906         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 907         if (r < 0)
 908                 return r;
 909
 910         contents = mfree(contents);
 911         r = read_one_line_file(fs, &contents);
 912         if (r < 0)
 913                 return r;
 914
 915         sc = strstrip(contents);
 916         if (streq(sc, "0")) {
 917                 r = write_string_file(fs, "1", 0);
 918                 if (r < 0)
 919                         return r;
 920
 921                 return 1;
 922         }
 923
 924         if (!streq(sc, "1"))
 925                 return -EIO;
 926
 927         return 0;
 928 }
 929
 930 int cg_uninstall_release_agent(const char *controller) {
 931         _cleanup_free_ char *fs = NULL;
 932         int r, unified;
 933
 934         unified = cg_unified();
 935         if (unified < 0)
 936                 return unified;
 937         if (unified) /* Doesn't apply to unified hierarchy */
 938                 return -EOPNOTSUPP;
 939
 940         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 941         if (r < 0)
 942                 return r;
 943
 944         r = write_string_file(fs, "0", 0);
 945         if (r < 0)
 946                 return r;
 947
 948         fs = mfree(fs);
 949
 950         r = cg_get_path(controller, NULL, "release_agent", &fs);
 951         if (r < 0)
 952                 return r;
 953
 954         r = write_string_file(fs, "", 0);
 955         if (r < 0)
 956                 return r;
 957
 958         return 0;
 959 }
 960
 961 int cg_is_empty(const char *controller, const char *path) {
 962         _cleanup_fclose_ FILE *f = NULL;
 963         pid_t pid;
 964         int r;
 965
 966         assert(path);
 967
 968         r = cg_enumerate_processes(controller, path, &f);
 969         if (r == -ENOENT)
 970                 return 1;
 971         if (r < 0)
 972                 return r;
 973
 974         r = cg_read_pid(f, &pid);
 975         if (r < 0)
 976                 return r;
 977
 978         return r == 0;
 979 }
 980
 981 int cg_is_empty_recursive(const char *controller, const char *path) {
 982         int unified, r;
 983
 984         assert(path);
 985
 986         /* The root cgroup is always populated */
 987         if (controller && (isempty(path) || path_equal(path, "/")))
 988                 return false;
 989
 990         unified = cg_unified();
 991         if (unified < 0)
 992                 return unified;
 993
 994         if (unified > 0) {
 995                 _cleanup_free_ char *populated = NULL, *t = NULL;
 996
 997                 /* On the unified hierarchy we can check empty state
 998                  * via the "cgroup.populated" attribute. */
 999
1000                 r = cg_get_path(controller, path, "cgroup.populated", &populated);
1001                 if (r < 0)
1002                         return r;
1003
1004                 r = read_one_line_file(populated, &t);
1005                 if (r == -ENOENT)
1006                         return 1;
1007                 if (r < 0)
1008                         return r;
1009
1010                 return streq(t, "0");
1011         } else {
1012                 _cleanup_closedir_ DIR *d = NULL;
1013                 char *fn;
1014
1015                 r = cg_is_empty(controller, path);
1016                 if (r <= 0)
1017                         return r;
1018
1019                 r = cg_enumerate_subgroups(controller, path, &d);
1020                 if (r == -ENOENT)
1021                         return 1;
1022                 if (r < 0)
1023                         return r;
1024
1025                 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1026                         _cleanup_free_ char *p = NULL;
1027
1028                         p = strjoin(path, "/", fn, NULL);
1029                         free(fn);
1030                         if (!p)
1031                                 return -ENOMEM;
1032
1033                         r = cg_is_empty_recursive(controller, p);
1034                         if (r <= 0)
1035                                 return r;
1036                 }
1037                 if (r < 0)
1038                         return r;
1039
1040                 return true;
1041         }
1042 }
1043
1044 int cg_split_spec(const char *spec, char **controller, char **path) {
1045         char *t = NULL, *u = NULL;
1046         const char *e;
1047
1048         assert(spec);
1049
1050         if (*spec == '/') {
1051                 if (!path_is_safe(spec))
1052                         return -EINVAL;
1053
1054                 if (path) {
1055                         t = strdup(spec);
1056                         if (!t)
1057                                 return -ENOMEM;
1058
1059                         *path = path_kill_slashes(t);
1060                 }
1061
1062                 if (controller)
1063                         *controller = NULL;
1064
1065                 return 0;
1066         }
1067
1068         e = strchr(spec, ':');
1069         if (!e) {
1070                 if (!cg_controller_is_valid(spec))
1071                         return -EINVAL;
1072
1073                 if (controller) {
1074                         t = strdup(spec);
1075                         if (!t)
1076                                 return -ENOMEM;
1077
1078                         *controller = t;
1079                 }
1080
1081                 if (path)
1082                         *path = NULL;
1083
1084                 return 0;
1085         }
1086
1087         t = strndup(spec, e-spec);
1088         if (!t)
1089                 return -ENOMEM;
1090         if (!cg_controller_is_valid(t)) {
1091                 free(t);
1092                 return -EINVAL;
1093         }
1094
1095         if (isempty(e+1))
1096                 u = NULL;
1097         else {
1098                 u = strdup(e+1);
1099                 if (!u) {
1100                         free(t);
1101                         return -ENOMEM;
1102                 }
1103
1104                 if (!path_is_safe(u) ||
1105                     !path_is_absolute(u)) {
1106                         free(t);
1107                         free(u);
1108                         return -EINVAL;
1109                 }
1110
1111                 path_kill_slashes(u);
1112         }
1113
1114         if (controller)
1115                 *controller = t;
1116         else
1117                 free(t);
1118
1119         if (path)
1120                 *path = u;
1121         else
1122                 free(u);
1123
1124         return 0;
1125 }
1126
1127 int cg_mangle_path(const char *path, char **result) {
1128         _cleanup_free_ char *c = NULL, *p = NULL;
1129         char *t;
1130         int r;
1131
1132         assert(path);
1133         assert(result);
1134
1135         /* First, check if it already is a filesystem path */
1136         if (path_startswith(path, "/sys/fs/cgroup")) {
1137
1138                 t = strdup(path);
1139                 if (!t)
1140                         return -ENOMEM;
1141
1142                 *result = path_kill_slashes(t);
1143                 return 0;
1144         }
1145
1146         /* Otherwise, treat it as cg spec */
1147         r = cg_split_spec(path, &c, &p);
1148         if (r < 0)
1149                 return r;
1150
1151         return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1152 }
1153
1154 int cg_get_root_path(char **path) {
1155         char *p, *e;
1156         int r;
1157
1158         assert(path);
1159
1160         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1161         if (r < 0)
1162                 return r;
1163
1164         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1165         if (!e)
1166                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1167         if (!e)
1168                 e = endswith(p, "/system"); /* even more legacy */
1169         if (e)
1170                 *e = 0;
1171
1172         *path = p;
1173         return 0;
1174 }
1175
1176 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1177         _cleanup_free_ char *rt = NULL;
1178         char *p;
1179         int r;
1180
1181         assert(cgroup);
1182         assert(shifted);
1183
1184         if (!root) {
1185                 /* If the root was specified let's use that, otherwise
1186                  * let's determine it from PID 1 */
1187
1188                 r = cg_get_root_path(&rt);
1189                 if (r < 0)
1190                         return r;
1191
1192                 root = rt;
1193         }
1194
1195         p = path_startswith(cgroup, root);
1196         if (p && p > cgroup)
1197                 *shifted = p - 1;
1198         else
1199                 *shifted = cgroup;
1200
1201         return 0;
1202 }
1203
1204 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1205         _cleanup_free_ char *raw = NULL;
1206         const char *c;
1207         int r;
1208
1209         assert(pid >= 0);
1210         assert(cgroup);
1211
1212         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1213         if (r < 0)
1214                 return r;
1215
1216         r = cg_shift_path(raw, root, &c);
1217         if (r < 0)
1218                 return r;
1219
1220         if (c == raw) {
1221                 *cgroup = raw;
1222                 raw = NULL;
1223         } else {
1224                 char *n;
1225
1226                 n = strdup(c);
1227                 if (!n)
1228                         return -ENOMEM;
1229
1230                 *cgroup = n;
1231         }
1232
1233         return 0;
1234 }
1235
1236 int cg_path_decode_unit(const char *cgroup, char **unit){
1237         char *c, *s;
1238         size_t n;
1239
1240         assert(cgroup);
1241         assert(unit);
1242
1243         n = strcspn(cgroup, "/");
1244         if (n < 3)
1245                 return -ENXIO;
1246
1247         c = strndupa(cgroup, n);
1248         c = cg_unescape(c);
1249
1250         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1251                 return -ENXIO;
1252
1253         s = strdup(c);
1254         if (!s)
1255                 return -ENOMEM;
1256
1257         *unit = s;
1258         return 0;
1259 }
1260
1261 static bool valid_slice_name(const char *p, size_t n) {
1262
1263         if (!p)
1264                 return false;
1265
1266         if (n < strlen("x.slice"))
1267                 return false;
1268
1269         if (memcmp(p + n - 6, ".slice", 6) == 0) {
1270                 char buf[n+1], *c;
1271
1272                 memcpy(buf, p, n);
1273                 buf[n] = 0;
1274
1275                 c = cg_unescape(buf);
1276
1277                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1278         }
1279
1280         return false;
1281 }
1282
1283 static const char *skip_slices(const char *p) {
1284         assert(p);
1285
1286         /* Skips over all slice assignments */
1287
1288         for (;;) {
1289                 size_t n;
1290
1291                 p += strspn(p, "/");
1292
1293                 n = strcspn(p, "/");
1294                 if (!valid_slice_name(p, n))
1295                         return p;
1296
1297                 p += n;
1298         }
1299 }
1300
1301 int cg_path_get_unit(const char *path, char **ret) {
1302         const char *e;
1303         char *unit;
1304         int r;
1305
1306         assert(path);
1307         assert(ret);
1308
1309         e = skip_slices(path);
1310
1311         r = cg_path_decode_unit(e, &unit);
1312         if (r < 0)
1313                 return r;
1314
1315         /* We skipped over the slices, don't accept any now */
1316         if (endswith(unit, ".slice")) {
1317                 free(unit);
1318                 return -ENXIO;
1319         }
1320
1321         *ret = unit;
1322         return 0;
1323 }
1324
1325 int cg_pid_get_unit(pid_t pid, char **unit) {
1326         _cleanup_free_ char *cgroup = NULL;
1327         int r;
1328
1329         assert(unit);
1330
1331         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1332         if (r < 0)
1333                 return r;
1334
1335         return cg_path_get_unit(cgroup, unit);
1336 }
1337
1338 /**
1339  * Skip session-*.scope, but require it to be there.
1340  */
1341 static const char *skip_session(const char *p) {
1342         size_t n;
1343
1344         if (isempty(p))
1345                 return NULL;
1346
1347         p += strspn(p, "/");
1348
1349         n = strcspn(p, "/");
1350         if (n < strlen("session-x.scope"))
1351                 return NULL;
1352
1353         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1354                 char buf[n - 8 - 6 + 1];
1355
1356                 memcpy(buf, p + 8, n - 8 - 6);
1357                 buf[n - 8 - 6] = 0;
1358
1359                 /* Note that session scopes never need unescaping,
1360                  * since they cannot conflict with the kernel's own
1361                  * names, hence we don't need to call cg_unescape()
1362                  * here. */
1363
1364                 if (!session_id_valid(buf))
1365                         return false;
1366
1367                 p += n;
1368                 p += strspn(p, "/");
1369                 return p;
1370         }
1371
1372         return NULL;
1373 }
1374
1375 /**
1376  * Skip user@*.service, but require it to be there.
1377  */
1378 static const char *skip_user_manager(const char *p) {
1379         size_t n;
1380
1381         if (isempty(p))
1382                 return NULL;
1383
1384         p += strspn(p, "/");
1385
1386         n = strcspn(p, "/");
1387         if (n < strlen("user@x.service"))
1388                 return NULL;
1389
1390         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1391                 char buf[n - 5 - 8 + 1];
1392
1393                 memcpy(buf, p + 5, n - 5 - 8);
1394                 buf[n - 5 - 8] = 0;
1395
1396                 /* Note that user manager services never need unescaping,
1397                  * since they cannot conflict with the kernel's own
1398                  * names, hence we don't need to call cg_unescape()
1399                  * here. */
1400
1401                 if (parse_uid(buf, NULL) < 0)
1402                         return NULL;
1403
1404                 p += n;
1405                 p += strspn(p, "/");
1406
1407                 return p;
1408         }
1409
1410         return NULL;
1411 }
1412
1413 static const char *skip_user_prefix(const char *path) {
1414         const char *e, *t;
1415
1416         assert(path);
1417
1418         /* Skip slices, if there are any */
1419         e = skip_slices(path);
1420
1421         /* Skip the user manager, if it's in the path now... */
1422         t = skip_user_manager(e);
1423         if (t)
1424                 return t;
1425
1426         /* Alternatively skip the user session if it is in the path... */
1427         return skip_session(e);
1428 }
1429
1430 int cg_path_get_user_unit(const char *path, char **ret) {
1431         const char *t;
1432
1433         assert(path);
1434         assert(ret);
1435
1436         t = skip_user_prefix(path);
1437         if (!t)
1438                 return -ENXIO;
1439
1440         /* And from here on it looks pretty much the same as for a
1441          * system unit, hence let's use the same parser from here
1442          * on. */
1443         return cg_path_get_unit(t, ret);
1444 }
1445
1446 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1447         _cleanup_free_ char *cgroup = NULL;
1448         int r;
1449
1450         assert(unit);
1451
1452         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1453         if (r < 0)
1454                 return r;
1455
1456         return cg_path_get_user_unit(cgroup, unit);
1457 }
1458
1459 int cg_path_get_machine_name(const char *path, char **machine) {
1460         _cleanup_free_ char *u = NULL;
1461         const char *sl;
1462         int r;
1463
1464         r = cg_path_get_unit(path, &u);
1465         if (r < 0)
1466                 return r;
1467
1468         sl = strjoina("/run/systemd/machines/unit:", u);
1469         return readlink_malloc(sl, machine);
1470 }
1471
1472 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1473         _cleanup_free_ char *cgroup = NULL;
1474         int r;
1475
1476         assert(machine);
1477
1478         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1479         if (r < 0)
1480                 return r;
1481
1482         return cg_path_get_machine_name(cgroup, machine);
1483 }
1484
1485 int cg_path_get_session(const char *path, char **session) {
1486         _cleanup_free_ char *unit = NULL;
1487         char *start, *end;
1488         int r;
1489
1490         assert(path);
1491
1492         r = cg_path_get_unit(path, &unit);
1493         if (r < 0)
1494                 return r;
1495
1496         start = startswith(unit, "session-");
1497         if (!start)
1498                 return -ENXIO;
1499         end = endswith(start, ".scope");
1500         if (!end)
1501                 return -ENXIO;
1502
1503         *end = 0;
1504         if (!session_id_valid(start))
1505                 return -ENXIO;
1506
1507         if (session) {
1508                 char *rr;
1509
1510                 rr = strdup(start);
1511                 if (!rr)
1512                         return -ENOMEM;
1513
1514                 *session = rr;
1515         }
1516
1517         return 0;
1518 }
1519
1520 int cg_pid_get_session(pid_t pid, char **session) {
1521         _cleanup_free_ char *cgroup = NULL;
1522         int r;
1523
1524         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1525         if (r < 0)
1526                 return r;
1527
1528         return cg_path_get_session(cgroup, session);
1529 }
1530
1531 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1532         _cleanup_free_ char *slice = NULL;
1533         char *start, *end;
1534         int r;
1535
1536         assert(path);
1537
1538         r = cg_path_get_slice(path, &slice);
1539         if (r < 0)
1540                 return r;
1541
1542         start = startswith(slice, "user-");
1543         if (!start)
1544                 return -ENXIO;
1545         end = endswith(start, ".slice");
1546         if (!end)
1547                 return -ENXIO;
1548
1549         *end = 0;
1550         if (parse_uid(start, uid) < 0)
1551                 return -ENXIO;
1552
1553         return 0;
1554 }
1555
1556 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1557         _cleanup_free_ char *cgroup = NULL;
1558         int r;
1559
1560         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1561         if (r < 0)
1562                 return r;
1563
1564         return cg_path_get_owner_uid(cgroup, uid);
1565 }
1566
1567 int cg_path_get_slice(const char *p, char **slice) {
1568         const char *e = NULL;
1569
1570         assert(p);
1571         assert(slice);
1572
1573         /* Finds the right-most slice unit from the beginning, but
1574          * stops before we come to the first non-slice unit. */
1575
1576         for (;;) {
1577                 size_t n;
1578
1579                 p += strspn(p, "/");
1580
1581                 n = strcspn(p, "/");
1582                 if (!valid_slice_name(p, n)) {
1583
1584                         if (!e) {
1585                                 char *s;
1586
1587                                 s = strdup("-.slice");
1588                                 if (!s)
1589                                         return -ENOMEM;
1590
1591                                 *slice = s;
1592                                 return 0;
1593                         }
1594
1595                         return cg_path_decode_unit(e, slice);
1596                 }
1597
1598                 e = p;
1599                 p += n;
1600         }
1601 }
1602
1603 int cg_pid_get_slice(pid_t pid, char **slice) {
1604         _cleanup_free_ char *cgroup = NULL;
1605         int r;
1606
1607         assert(slice);
1608
1609         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1610         if (r < 0)
1611                 return r;
1612
1613         return cg_path_get_slice(cgroup, slice);
1614 }
1615
1616 int cg_path_get_user_slice(const char *p, char **slice) {
1617         const char *t;
1618         assert(p);
1619         assert(slice);
1620
1621         t = skip_user_prefix(p);
1622         if (!t)
1623                 return -ENXIO;
1624
1625         /* And now it looks pretty much the same as for a system
1626          * slice, so let's just use the same parser from here on. */
1627         return cg_path_get_slice(t, slice);
1628 }
1629
1630 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1631         _cleanup_free_ char *cgroup = NULL;
1632         int r;
1633
1634         assert(slice);
1635
1636         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1637         if (r < 0)
1638                 return r;
1639
1640         return cg_path_get_user_slice(cgroup, slice);
1641 }
1642
1643 char *cg_escape(const char *p) {
1644         bool need_prefix = false;
1645
1646         /* This implements very minimal escaping for names to be used
1647          * as file names in the cgroup tree: any name which might
1648          * conflict with a kernel name or is prefixed with '_' is
1649          * prefixed with a '_'. That way, when reading cgroup names it
1650          * is sufficient to remove a single prefixing underscore if
1651          * there is one. */
1652
1653         /* The return value of this function (unlike cg_unescape())
1654          * needs free()! */
1655
1656         if (p[0] == 0 ||
1657             p[0] == '_' ||
1658             p[0] == '.' ||
1659             streq(p, "notify_on_release") ||
1660             streq(p, "release_agent") ||
1661             streq(p, "tasks") ||
1662             startswith(p, "cgroup."))
1663                 need_prefix = true;
1664         else {
1665                 const char *dot;
1666
1667                 dot = strrchr(p, '.');
1668                 if (dot) {
1669                         CGroupController c;
1670                         size_t l = dot - p;
1671
1672                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1673                                 const char *n;
1674
1675                                 n = cgroup_controller_to_string(c);
1676
1677                                 if (l != strlen(n))
1678                                         continue;
1679
1680                                 if (memcmp(p, n, l) != 0)
1681                                         continue;
1682
1683                                 need_prefix = true;
1684                                 break;
1685                         }
1686                 }
1687         }
1688
1689         if (need_prefix)
1690                 return strappend("_", p);
1691
1692         return strdup(p);
1693 }
1694
1695 char *cg_unescape(const char *p) {
1696         assert(p);
1697
1698         /* The return value of this function (unlike cg_escape())
1699          * doesn't need free()! */
1700
1701         if (p[0] == '_')
1702                 return (char*) p+1;
1703
1704         return (char*) p;
1705 }
1706
1707 #define CONTROLLER_VALID                        \
1708         DIGITS LETTERS                          \
1709         "_"
1710
1711 bool cg_controller_is_valid(const char *p) {
1712         const char *t, *s;
1713
1714         if (!p)
1715                 return false;
1716
1717         s = startswith(p, "name=");
1718         if (s)
1719                 p = s;
1720
1721         if (*p == 0 || *p == '_')
1722                 return false;
1723
1724         for (t = p; *t; t++)
1725                 if (!strchr(CONTROLLER_VALID, *t))
1726                         return false;
1727
1728         if (t - p > FILENAME_MAX)
1729                 return false;
1730
1731         return true;
1732 }
1733
1734 int cg_slice_to_path(const char *unit, char **ret) {
1735         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1736         const char *dash;
1737         int r;
1738
1739         assert(unit);
1740         assert(ret);
1741
1742         if (streq(unit, "-.slice")) {
1743                 char *x;
1744
1745                 x = strdup("");
1746                 if (!x)
1747                         return -ENOMEM;
1748                 *ret = x;
1749                 return 0;
1750         }
1751
1752         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1753                 return -EINVAL;
1754
1755         if (!endswith(unit, ".slice"))
1756                 return -EINVAL;
1757
1758         r = unit_name_to_prefix(unit, &p);
1759         if (r < 0)
1760                 return r;
1761
1762         dash = strchr(p, '-');
1763
1764         /* Don't allow initial dashes */
1765         if (dash == p)
1766                 return -EINVAL;
1767
1768         while (dash) {
1769                 _cleanup_free_ char *escaped = NULL;
1770                 char n[dash - p + sizeof(".slice")];
1771
1772                 /* Don't allow trailing or double dashes */
1773                 if (dash[1] == 0 || dash[1] == '-')
1774                         return -EINVAL;
1775
1776                 strcpy(stpncpy(n, p, dash - p), ".slice");
1777                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1778                         return -EINVAL;
1779
1780                 escaped = cg_escape(n);
1781                 if (!escaped)
1782                         return -ENOMEM;
1783
1784                 if (!strextend(&s, escaped, "/", NULL))
1785                         return -ENOMEM;
1786
1787                 dash = strchr(dash+1, '-');
1788         }
1789
1790         e = cg_escape(unit);
1791         if (!e)
1792                 return -ENOMEM;
1793
1794         if (!strextend(&s, e, NULL))
1795                 return -ENOMEM;
1796
1797         *ret = s;
1798         s = NULL;
1799
1800         return 0;
1801 }
1802
1803 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1804         _cleanup_free_ char *p = NULL;
1805         int r;
1806
1807         r = cg_get_path(controller, path, attribute, &p);
1808         if (r < 0)
1809                 return r;
1810
1811         return write_string_file(p, value, 0);
1812 }
1813
1814 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1815         _cleanup_free_ char *p = NULL;
1816         int r;
1817
1818         r = cg_get_path(controller, path, attribute, &p);
1819         if (r < 0)
1820                 return r;
1821
1822         return read_one_line_file(p, ret);
1823 }
1824
1825 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1826         CGroupController c;
1827         int r, unified;
1828
1829         /* This one will create a cgroup in our private tree, but also
1830          * duplicate it in the trees specified in mask, and remove it
1831          * in all others */
1832
1833         /* First create the cgroup in our own hierarchy. */
1834         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1835         if (r < 0)
1836                 return r;
1837
1838         /* If we are in the unified hierarchy, we are done now */
1839         unified = cg_unified();
1840         if (unified < 0)
1841                 return unified;
1842         if (unified > 0)
1843                 return 0;
1844
1845         /* Otherwise, do the same in the other hierarchies */
1846         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1847                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1848                 const char *n;
1849
1850                 n = cgroup_controller_to_string(c);
1851
1852                 if (mask & bit)
1853                         (void) cg_create(n, path);
1854                 else if (supported & bit)
1855                         (void) cg_trim(n, path, true);
1856         }
1857
1858         return 0;
1859 }
1860
1861 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1862         CGroupController c;
1863         int r, unified;
1864
1865         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1866         if (r < 0)
1867                 return r;
1868
1869         unified = cg_unified();
1870         if (unified < 0)
1871                 return unified;
1872         if (unified > 0)
1873                 return 0;
1874
1875         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1876                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1877                 const char *p = NULL;
1878
1879                 if (!(supported & bit))
1880                         continue;
1881
1882                 if (path_callback)
1883                         p = path_callback(bit, userdata);
1884
1885                 if (!p)
1886                         p = path;
1887
1888                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
1889         }
1890
1891         return 0;
1892 }
1893
1894 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1895         Iterator i;
1896         void *pidp;
1897         int r = 0;
1898
1899         SET_FOREACH(pidp, pids, i) {
1900                 pid_t pid = PTR_TO_PID(pidp);
1901                 int q;
1902
1903                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1904                 if (q < 0 && r >= 0)
1905                         r = q;
1906         }
1907
1908         return r;
1909 }
1910
1911 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1912         CGroupController c;
1913         int r = 0, unified;
1914
1915         if (!path_equal(from, to))  {
1916                 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true);
1917                 if (r < 0)
1918                         return r;
1919         }
1920
1921         unified = cg_unified();
1922         if (unified < 0)
1923                 return unified;
1924         if (unified > 0)
1925                 return r;
1926
1927         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1928                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1929                 const char *p = NULL;
1930
1931                 if (!(supported & bit))
1932                         continue;
1933
1934                 if (to_callback)
1935                         p = to_callback(bit, userdata);
1936
1937                 if (!p)
1938                         p = to;
1939
1940                 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, false, false);
1941         }
1942
1943         return 0;
1944 }
1945
1946 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
1947         CGroupController c;
1948         int r, unified;
1949
1950         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
1951         if (r < 0)
1952                 return r;
1953
1954         unified = cg_unified();
1955         if (unified < 0)
1956                 return unified;
1957         if (unified > 0)
1958                 return r;
1959
1960         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1961                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1962
1963                 if (!(supported & bit))
1964                         continue;
1965
1966                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
1967         }
1968
1969         return 0;
1970 }
1971
1972 int cg_mask_supported(CGroupMask *ret) {
1973         CGroupMask mask = 0;
1974         int r, unified;
1975
1976         /* Determines the mask of supported cgroup controllers. Only
1977          * includes controllers we can make sense of and that are
1978          * actually accessible. */
1979
1980         unified = cg_unified();
1981         if (unified < 0)
1982                 return unified;
1983         if (unified > 0) {
1984                 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
1985                 const char *c;
1986
1987                 /* In the unified hierarchy we can read the supported
1988                  * and accessible controllers from a the top-level
1989                  * cgroup attribute */
1990
1991                 r = cg_get_root_path(&root);
1992                 if (r < 0)
1993                         return r;
1994
1995                 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
1996                 if (r < 0)
1997                         return r;
1998
1999                 r = read_one_line_file(path, &controllers);
2000                 if (r < 0)
2001                         return r;
2002
2003                 c = controllers;
2004                 for (;;) {
2005                         _cleanup_free_ char *n = NULL;
2006                         CGroupController v;
2007
2008                         r = extract_first_word(&c, &n, NULL, 0);
2009                         if (r < 0)
2010                                 return r;
2011                         if (r == 0)
2012                                 break;
2013
2014                         v = cgroup_controller_from_string(n);
2015                         if (v < 0)
2016                                 continue;
2017
2018                         mask |= CGROUP_CONTROLLER_TO_MASK(v);
2019                 }
2020
2021                 /* Currently, we only support the memory and pids
2022                  * controller in the unified hierarchy, mask
2023                  * everything else off. */
2024                 mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_PIDS;
2025
2026         } else {
2027                 CGroupController c;
2028
2029                 /* In the legacy hierarchy, we check whether which
2030                  * hierarchies are mounted. */
2031
2032                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2033                         const char *n;
2034
2035                         n = cgroup_controller_to_string(c);
2036                         if (controller_is_accessible(n) >= 0)
2037                                 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2038                 }
2039         }
2040
2041         *ret = mask;
2042         return 0;
2043 }
2044
2045 int cg_kernel_controllers(Set *controllers) {
2046         _cleanup_fclose_ FILE *f = NULL;
2047         char buf[LINE_MAX];
2048         int r;
2049
2050         assert(controllers);
2051
2052         /* Determines the full list of kernel-known controllers. Might
2053          * include controllers we don't actually support, arbitrary
2054          * named hierarchies and controllers that aren't currently
2055          * accessible (because not mounted). */
2056
2057         f = fopen("/proc/cgroups", "re");
2058         if (!f) {
2059                 if (errno == ENOENT)
2060                         return 0;
2061                 return -errno;
2062         }
2063
2064         /* Ignore the header line */
2065         (void) fgets(buf, sizeof(buf), f);
2066
2067         for (;;) {
2068                 char *controller;
2069                 int enabled = 0;
2070
2071                 errno = 0;
2072                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2073
2074                         if (feof(f))
2075                                 break;
2076
2077                         if (ferror(f) && errno != 0)
2078                                 return -errno;
2079
2080                         return -EBADMSG;
2081                 }
2082
2083                 if (!enabled) {
2084                         free(controller);
2085                         continue;
2086                 }
2087
2088                 if (!cg_controller_is_valid(controller)) {
2089                         free(controller);
2090                         return -EBADMSG;
2091                 }
2092
2093                 r = set_consume(controllers, controller);
2094                 if (r < 0)
2095                         return r;
2096         }
2097
2098         return 0;
2099 }
2100
2101 static thread_local int unified_cache = -1;
2102
2103 int cg_unified(void) {
2104         struct statfs fs;
2105
2106         /* Checks if we support the unified hierarchy. Returns an
2107          * error when the cgroup hierarchies aren't mounted yet or we
2108          * have any other trouble determining if the unified hierarchy
2109          * is supported. */
2110
2111         if (unified_cache >= 0)
2112                 return unified_cache;
2113
2114         if (statfs("/sys/fs/cgroup/", &fs) < 0)
2115                 return -errno;
2116
2117         if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC))
2118                 unified_cache = true;
2119         else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2120                 unified_cache = false;
2121         else
2122                 return -ENOEXEC;
2123
2124         return unified_cache;
2125 }
2126
2127 void cg_unified_flush(void) {
2128         unified_cache = -1;
2129 }
2130
2131 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2132         _cleanup_free_ char *fs = NULL;
2133         CGroupController c;
2134         int r, unified;
2135
2136         assert(p);
2137
2138         if (supported == 0)
2139                 return 0;
2140
2141         unified = cg_unified();
2142         if (unified < 0)
2143                 return unified;
2144         if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2145                 return 0;
2146
2147         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2148         if (r < 0)
2149                 return r;
2150
2151         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2152                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2153                 const char *n;
2154
2155                 if (!(supported & bit))
2156                         continue;
2157
2158                 n = cgroup_controller_to_string(c);
2159                 {
2160                         char s[1 + strlen(n) + 1];
2161
2162                         s[0] = mask & bit ? '+' : '-';
2163                         strcpy(s + 1, n);
2164
2165                         r = write_string_file(fs, s, 0);
2166                         if (r < 0)
2167                                 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2168                 }
2169         }
2170
2171         return 0;
2172 }
2173
2174 bool cg_is_unified_wanted(void) {
2175         static thread_local int wanted = -1;
2176         int r, unified;
2177
2178         /* If the hierarchy is already mounted, then follow whatever
2179          * was chosen for it. */
2180         unified = cg_unified();
2181         if (unified >= 0)
2182                 return unified;
2183
2184         /* Otherwise, let's see what the kernel command line has to
2185          * say. Since checking that is expensive, let's cache the
2186          * result. */
2187         if (wanted >= 0)
2188                 return wanted;
2189
2190         r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2191         if (r > 0)
2192                 return (wanted = true);
2193         else {
2194                 _cleanup_free_ char *value = NULL;
2195
2196                 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2197                 if (r < 0)
2198                         return false;
2199                 if (r == 0)
2200                         return (wanted = false);
2201
2202                 return (wanted = parse_boolean(value) > 0);
2203         }
2204 }
2205
2206 bool cg_is_legacy_wanted(void) {
2207         return !cg_is_unified_wanted();
2208 }
2209
2210 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2211         [CGROUP_CONTROLLER_CPU] = "cpu",
2212         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2213         [CGROUP_CONTROLLER_BLKIO] = "blkio",
2214         [CGROUP_CONTROLLER_MEMORY] = "memory",
2215         [CGROUP_CONTROLLER_DEVICES] = "devices",
2216         [CGROUP_CONTROLLER_PIDS] = "pids",
2217 };
2218
2219 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);