src/basic/cgroup-util.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2010 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <dirent.h>
  23 #include <errno.h>
  24 #include <ftw.h>
  25 #include <signal.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28 #include <sys/stat.h>
  29 #include <sys/types.h>
  30 #include <unistd.h>
  31
  32 #include "cgroup-util.h"
  33 #include "extract-word.h"
  34 #include "fd-util.h"
  35 #include "fileio.h"
  36 #include "formats-util.h"
  37 #include "login-util.h"
  38 #include "macro.h"
  39 #include "mkdir.h"
  40 #include "parse-util.h"
  41 #include "path-util.h"
  42 #include "process-util.h"
  43 #include "set.h"
  44 #include "special.h"
  45 #include "string-util.h"
  46 #include "unit-name.h"
  47 #include "user-util.h"
  48 #include "util.h"
  49
  50 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
  51         _cleanup_free_ char *fs = NULL;
  52         FILE *f;
  53         int r;
  54
  55         assert(_f);
  56
  57         r = cg_get_path(controller, path, "cgroup.procs", &fs);
  58         if (r < 0)
  59                 return r;
  60
  61         f = fopen(fs, "re");
  62         if (!f)
  63                 return -errno;
  64
  65         *_f = f;
  66         return 0;
  67 }
  68
  69 int cg_read_pid(FILE *f, pid_t *_pid) {
  70         unsigned long ul;
  71
  72         /* Note that the cgroup.procs might contain duplicates! See
  73          * cgroups.txt for details. */
  74
  75         assert(f);
  76         assert(_pid);
  77
  78         errno = 0;
  79         if (fscanf(f, "%lu", &ul) != 1) {
  80
  81                 if (feof(f))
  82                         return 0;
  83
  84                 return errno ? -errno : -EIO;
  85         }
  86
  87         if (ul <= 0)
  88                 return -EIO;
  89
  90         *_pid = (pid_t) ul;
  91         return 1;
  92 }
  93
  94 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
  95         _cleanup_free_ char *fs = NULL;
  96         int r;
  97         DIR *d;
  98
  99         assert(_d);
 100
 101         /* This is not recursive! */
 102
 103         r = cg_get_path(controller, path, NULL, &fs);
 104         if (r < 0)
 105                 return r;
 106
 107         d = opendir(fs);
 108         if (!d)
 109                 return -errno;
 110
 111         *_d = d;
 112         return 0;
 113 }
 114
 115 int cg_read_subgroup(DIR *d, char **fn) {
 116         struct dirent *de;
 117
 118         assert(d);
 119         assert(fn);
 120
 121         FOREACH_DIRENT_ALL(de, d, return -errno) {
 122                 char *b;
 123
 124                 if (de->d_type != DT_DIR)
 125                         continue;
 126
 127                 if (streq(de->d_name, ".") ||
 128                     streq(de->d_name, ".."))
 129                         continue;
 130
 131                 b = strdup(de->d_name);
 132                 if (!b)
 133                         return -ENOMEM;
 134
 135                 *fn = b;
 136                 return 1;
 137         }
 138
 139         return 0;
 140 }
 141
 142 int cg_rmdir(const char *controller, const char *path) {
 143         _cleanup_free_ char *p = NULL;
 144         int r;
 145
 146         r = cg_get_path(controller, path, NULL, &p);
 147         if (r < 0)
 148                 return r;
 149
 150         r = rmdir(p);
 151         if (r < 0 && errno != ENOENT)
 152                 return -errno;
 153
 154         return 0;
 155 }
 156
 157 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
 158         _cleanup_set_free_ Set *allocated_set = NULL;
 159         bool done = false;
 160         int r, ret = 0;
 161         pid_t my_pid;
 162
 163         assert(sig >= 0);
 164
 165         /* This goes through the tasks list and kills them all. This
 166          * is repeated until no further processes are added to the
 167          * tasks list, to properly handle forking processes */
 168
 169         if (!s) {
 170                 s = allocated_set = set_new(NULL);
 171                 if (!s)
 172                         return -ENOMEM;
 173         }
 174
 175         my_pid = getpid();
 176
 177         do {
 178                 _cleanup_fclose_ FILE *f = NULL;
 179                 pid_t pid = 0;
 180                 done = true;
 181
 182                 r = cg_enumerate_processes(controller, path, &f);
 183                 if (r < 0) {
 184                         if (ret >= 0 && r != -ENOENT)
 185                                 return r;
 186
 187                         return ret;
 188                 }
 189
 190                 while ((r = cg_read_pid(f, &pid)) > 0) {
 191
 192                         if (ignore_self && pid == my_pid)
 193                                 continue;
 194
 195                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 196                                 continue;
 197
 198                         /* If we haven't killed this process yet, kill
 199                          * it */
 200                         if (kill(pid, sig) < 0) {
 201                                 if (ret >= 0 && errno != ESRCH)
 202                                         ret = -errno;
 203                         } else {
 204                                 if (sigcont && sig != SIGKILL)
 205                                         (void) kill(pid, SIGCONT);
 206
 207                                 if (ret == 0)
 208                                         ret = 1;
 209                         }
 210
 211                         done = false;
 212
 213                         r = set_put(s, PID_TO_PTR(pid));
 214                         if (r < 0) {
 215                                 if (ret >= 0)
 216                                         return r;
 217
 218                                 return ret;
 219                         }
 220                 }
 221
 222                 if (r < 0) {
 223                         if (ret >= 0)
 224                                 return r;
 225
 226                         return ret;
 227                 }
 228
 229                 /* To avoid racing against processes which fork
 230                  * quicker than we can kill them we repeat this until
 231                  * no new pids need to be killed. */
 232
 233         } while (!done);
 234
 235         return ret;
 236 }
 237
 238 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
 239         _cleanup_set_free_ Set *allocated_set = NULL;
 240         _cleanup_closedir_ DIR *d = NULL;
 241         int r, ret;
 242         char *fn;
 243
 244         assert(path);
 245         assert(sig >= 0);
 246
 247         if (!s) {
 248                 s = allocated_set = set_new(NULL);
 249                 if (!s)
 250                         return -ENOMEM;
 251         }
 252
 253         ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
 254
 255         r = cg_enumerate_subgroups(controller, path, &d);
 256         if (r < 0) {
 257                 if (ret >= 0 && r != -ENOENT)
 258                         return r;
 259
 260                 return ret;
 261         }
 262
 263         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 264                 _cleanup_free_ char *p = NULL;
 265
 266                 p = strjoin(path, "/", fn, NULL);
 267                 free(fn);
 268                 if (!p)
 269                         return -ENOMEM;
 270
 271                 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
 272                 if (r != 0 && ret >= 0)
 273                         ret = r;
 274         }
 275
 276         if (ret >= 0 && r < 0)
 277                 ret = r;
 278
 279         if (rem) {
 280                 r = cg_rmdir(controller, path);
 281                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 282                         return r;
 283         }
 284
 285         return ret;
 286 }
 287
 288 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
 289         bool done = false;
 290         _cleanup_set_free_ Set *s = NULL;
 291         int r, ret = 0;
 292         pid_t my_pid;
 293
 294         assert(cfrom);
 295         assert(pfrom);
 296         assert(cto);
 297         assert(pto);
 298
 299         s = set_new(NULL);
 300         if (!s)
 301                 return -ENOMEM;
 302
 303         my_pid = getpid();
 304
 305         do {
 306                 _cleanup_fclose_ FILE *f = NULL;
 307                 pid_t pid = 0;
 308                 done = true;
 309
 310                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 311                 if (r < 0) {
 312                         if (ret >= 0 && r != -ENOENT)
 313                                 return r;
 314
 315                         return ret;
 316                 }
 317
 318                 while ((r = cg_read_pid(f, &pid)) > 0) {
 319
 320                         /* This might do weird stuff if we aren't a
 321                          * single-threaded program. However, we
 322                          * luckily know we are not */
 323                         if (ignore_self && pid == my_pid)
 324                                 continue;
 325
 326                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 327                                 continue;
 328
 329                         /* Ignore kernel threads. Since they can only
 330                          * exist in the root cgroup, we only check for
 331                          * them there. */
 332                         if (cfrom &&
 333                             (isempty(pfrom) || path_equal(pfrom, "/")) &&
 334                             is_kernel_thread(pid) > 0)
 335                                 continue;
 336
 337                         r = cg_attach(cto, pto, pid);
 338                         if (r < 0) {
 339                                 if (ret >= 0 && r != -ESRCH)
 340                                         ret = r;
 341                         } else if (ret == 0)
 342                                 ret = 1;
 343
 344                         done = false;
 345
 346                         r = set_put(s, PID_TO_PTR(pid));
 347                         if (r < 0) {
 348                                 if (ret >= 0)
 349                                         return r;
 350
 351                                 return ret;
 352                         }
 353                 }
 354
 355                 if (r < 0) {
 356                         if (ret >= 0)
 357                                 return r;
 358
 359                         return ret;
 360                 }
 361         } while (!done);
 362
 363         return ret;
 364 }
 365
 366 int cg_migrate_recursive(
 367                 const char *cfrom,
 368                 const char *pfrom,
 369                 const char *cto,
 370                 const char *pto,
 371                 bool ignore_self,
 372                 bool rem) {
 373
 374         _cleanup_closedir_ DIR *d = NULL;
 375         int r, ret = 0;
 376         char *fn;
 377
 378         assert(cfrom);
 379         assert(pfrom);
 380         assert(cto);
 381         assert(pto);
 382
 383         ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
 384
 385         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 386         if (r < 0) {
 387                 if (ret >= 0 && r != -ENOENT)
 388                         return r;
 389
 390                 return ret;
 391         }
 392
 393         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 394                 _cleanup_free_ char *p = NULL;
 395
 396                 p = strjoin(pfrom, "/", fn, NULL);
 397                 free(fn);
 398                 if (!p)
 399                         return -ENOMEM;
 400
 401                 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
 402                 if (r != 0 && ret >= 0)
 403                         ret = r;
 404         }
 405
 406         if (r < 0 && ret >= 0)
 407                 ret = r;
 408
 409         if (rem) {
 410                 r = cg_rmdir(cfrom, pfrom);
 411                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 412                         return r;
 413         }
 414
 415         return ret;
 416 }
 417
 418 int cg_migrate_recursive_fallback(
 419                 const char *cfrom,
 420                 const char *pfrom,
 421                 const char *cto,
 422                 const char *pto,
 423                 bool ignore_self,
 424                 bool rem) {
 425
 426         int r;
 427
 428         assert(cfrom);
 429         assert(pfrom);
 430         assert(cto);
 431         assert(pto);
 432
 433         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem);
 434         if (r < 0) {
 435                 char prefix[strlen(pto) + 1];
 436
 437                 /* This didn't work? Then let's try all prefixes of the destination */
 438
 439                 PATH_FOREACH_PREFIX(prefix, pto) {
 440                         int q;
 441
 442                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem);
 443                         if (q >= 0)
 444                                 return q;
 445                 }
 446         }
 447
 448         return r;
 449 }
 450
 451 static const char *controller_to_dirname(const char *controller) {
 452         const char *e;
 453
 454         assert(controller);
 455
 456         /* Converts a controller name to the directory name below
 457          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
 458          * just cuts off the name= prefixed used for named
 459          * hierarchies, if it is specified. */
 460
 461         e = startswith(controller, "name=");
 462         if (e)
 463                 return e;
 464
 465         return controller;
 466 }
 467
 468 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
 469         const char *dn;
 470         char *t = NULL;
 471
 472         assert(fs);
 473         assert(controller);
 474
 475         dn = controller_to_dirname(controller);
 476
 477         if (isempty(path) && isempty(suffix))
 478                 t = strappend("/sys/fs/cgroup/", dn);
 479         else if (isempty(path))
 480                 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL);
 481         else if (isempty(suffix))
 482                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL);
 483         else
 484                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL);
 485         if (!t)
 486                 return -ENOMEM;
 487
 488         *fs = t;
 489         return 0;
 490 }
 491
 492 static int join_path_unified(const char *path, const char *suffix, char **fs) {
 493         char *t;
 494
 495         assert(fs);
 496
 497         if (isempty(path) && isempty(suffix))
 498                 t = strdup("/sys/fs/cgroup");
 499         else if (isempty(path))
 500                 t = strappend("/sys/fs/cgroup/", suffix);
 501         else if (isempty(suffix))
 502                 t = strappend("/sys/fs/cgroup/", path);
 503         else
 504                 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
 505         if (!t)
 506                 return -ENOMEM;
 507
 508         *fs = t;
 509         return 0;
 510 }
 511
 512 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
 513         int unified, r;
 514
 515         assert(fs);
 516
 517         if (!controller) {
 518                 char *t;
 519
 520                 /* If no controller is specified, we return the path
 521                  * *below* the controllers, without any prefix. */
 522
 523                 if (!path && !suffix)
 524                         return -EINVAL;
 525
 526                 if (!suffix)
 527                         t = strdup(path);
 528                 else if (!path)
 529                         t = strdup(suffix);
 530                 else
 531                         t = strjoin(path, "/", suffix, NULL);
 532                 if (!t)
 533                         return -ENOMEM;
 534
 535                 *fs = path_kill_slashes(t);
 536                 return 0;
 537         }
 538
 539         if (!cg_controller_is_valid(controller))
 540                 return -EINVAL;
 541
 542         unified = cg_unified();
 543         if (unified < 0)
 544                 return unified;
 545
 546         if (unified > 0)
 547                 r = join_path_unified(path, suffix, fs);
 548         else
 549                 r = join_path_legacy(controller, path, suffix, fs);
 550         if (r < 0)
 551                 return r;
 552
 553         path_kill_slashes(*fs);
 554         return 0;
 555 }
 556
 557 static int controller_is_accessible(const char *controller) {
 558         int unified;
 559
 560         assert(controller);
 561
 562         /* Checks whether a specific controller is accessible,
 563          * i.e. its hierarchy mounted. In the unified hierarchy all
 564          * controllers are considered accessible, except for the named
 565          * hierarchies */
 566
 567         if (!cg_controller_is_valid(controller))
 568                 return -EINVAL;
 569
 570         unified = cg_unified();
 571         if (unified < 0)
 572                 return unified;
 573         if (unified > 0) {
 574                 /* We don't support named hierarchies if we are using
 575                  * the unified hierarchy. */
 576
 577                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
 578                         return 0;
 579
 580                 if (startswith(controller, "name="))
 581                         return -EOPNOTSUPP;
 582
 583         } else {
 584                 const char *cc, *dn;
 585
 586                 dn = controller_to_dirname(controller);
 587                 cc = strjoina("/sys/fs/cgroup/", dn);
 588
 589                 if (laccess(cc, F_OK) < 0)
 590                         return -errno;
 591         }
 592
 593         return 0;
 594 }
 595
 596 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
 597         int r;
 598
 599         assert(controller);
 600         assert(fs);
 601
 602         /* Check if the specified controller is actually accessible */
 603         r = controller_is_accessible(controller);
 604         if (r < 0)
 605                 return r;
 606
 607         return cg_get_path(controller, path, suffix, fs);
 608 }
 609
 610 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 611         assert(path);
 612         assert(sb);
 613         assert(ftwbuf);
 614
 615         if (typeflag != FTW_DP)
 616                 return 0;
 617
 618         if (ftwbuf->level < 1)
 619                 return 0;
 620
 621         (void) rmdir(path);
 622         return 0;
 623 }
 624
 625 int cg_trim(const char *controller, const char *path, bool delete_root) {
 626         _cleanup_free_ char *fs = NULL;
 627         int r = 0;
 628
 629         assert(path);
 630
 631         r = cg_get_path(controller, path, NULL, &fs);
 632         if (r < 0)
 633                 return r;
 634
 635         errno = 0;
 636         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 637                 if (errno == ENOENT)
 638                         r = 0;
 639                 else if (errno != 0)
 640                         r = -errno;
 641                 else
 642                         r = -EIO;
 643         }
 644
 645         if (delete_root) {
 646                 if (rmdir(fs) < 0 && errno != ENOENT)
 647                         return -errno;
 648         }
 649
 650         return r;
 651 }
 652
 653 int cg_create(const char *controller, const char *path) {
 654         _cleanup_free_ char *fs = NULL;
 655         int r;
 656
 657         r = cg_get_path_and_check(controller, path, NULL, &fs);
 658         if (r < 0)
 659                 return r;
 660
 661         r = mkdir_parents(fs, 0755);
 662         if (r < 0)
 663                 return r;
 664
 665         if (mkdir(fs, 0755) < 0) {
 666
 667                 if (errno == EEXIST)
 668                         return 0;
 669
 670                 return -errno;
 671         }
 672
 673         return 1;
 674 }
 675
 676 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 677         int r, q;
 678
 679         assert(pid >= 0);
 680
 681         r = cg_create(controller, path);
 682         if (r < 0)
 683                 return r;
 684
 685         q = cg_attach(controller, path, pid);
 686         if (q < 0)
 687                 return q;
 688
 689         /* This does not remove the cgroup on failure */
 690         return r;
 691 }
 692
 693 int cg_attach(const char *controller, const char *path, pid_t pid) {
 694         _cleanup_free_ char *fs = NULL;
 695         char c[DECIMAL_STR_MAX(pid_t) + 2];
 696         int r;
 697
 698         assert(path);
 699         assert(pid >= 0);
 700
 701         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 702         if (r < 0)
 703                 return r;
 704
 705         if (pid == 0)
 706                 pid = getpid();
 707
 708         snprintf(c, sizeof(c), PID_FMT"\n", pid);
 709
 710         return write_string_file(fs, c, 0);
 711 }
 712
 713 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 714         int r;
 715
 716         assert(controller);
 717         assert(path);
 718         assert(pid >= 0);
 719
 720         r = cg_attach(controller, path, pid);
 721         if (r < 0) {
 722                 char prefix[strlen(path) + 1];
 723
 724                 /* This didn't work? Then let's try all prefixes of
 725                  * the destination */
 726
 727                 PATH_FOREACH_PREFIX(prefix, path) {
 728                         int q;
 729
 730                         q = cg_attach(controller, prefix, pid);
 731                         if (q >= 0)
 732                                 return q;
 733                 }
 734         }
 735
 736         return r;
 737 }
 738
 739 int cg_set_group_access(
 740                 const char *controller,
 741                 const char *path,
 742                 mode_t mode,
 743                 uid_t uid,
 744                 gid_t gid) {
 745
 746         _cleanup_free_ char *fs = NULL;
 747         int r;
 748
 749         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 750                 return 0;
 751
 752         if (mode != MODE_INVALID)
 753                 mode &= 0777;
 754
 755         r = cg_get_path(controller, path, NULL, &fs);
 756         if (r < 0)
 757                 return r;
 758
 759         return chmod_and_chown(fs, mode, uid, gid);
 760 }
 761
 762 int cg_set_task_access(
 763                 const char *controller,
 764                 const char *path,
 765                 mode_t mode,
 766                 uid_t uid,
 767                 gid_t gid) {
 768
 769         _cleanup_free_ char *fs = NULL, *procs = NULL;
 770         int r, unified;
 771
 772         assert(path);
 773
 774         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 775                 return 0;
 776
 777         if (mode != MODE_INVALID)
 778                 mode &= 0666;
 779
 780         r = cg_get_path(controller, path, "cgroup.procs", &fs);
 781         if (r < 0)
 782                 return r;
 783
 784         r = chmod_and_chown(fs, mode, uid, gid);
 785         if (r < 0)
 786                 return r;
 787
 788         unified = cg_unified();
 789         if (unified < 0)
 790                 return unified;
 791         if (unified)
 792                 return 0;
 793
 794         /* Compatibility, Always keep values for "tasks" in sync with
 795          * "cgroup.procs" */
 796         if (cg_get_path(controller, path, "tasks", &procs) >= 0)
 797                 (void) chmod_and_chown(procs, mode, uid, gid);
 798
 799         return 0;
 800 }
 801
 802 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
 803         _cleanup_fclose_ FILE *f = NULL;
 804         char line[LINE_MAX];
 805         const char *fs;
 806         size_t cs = 0;
 807         int unified;
 808
 809         assert(path);
 810         assert(pid >= 0);
 811
 812         unified = cg_unified();
 813         if (unified < 0)
 814                 return unified;
 815         if (unified == 0) {
 816                 if (controller) {
 817                         if (!cg_controller_is_valid(controller))
 818                                 return -EINVAL;
 819                 } else
 820                         controller = SYSTEMD_CGROUP_CONTROLLER;
 821
 822                 cs = strlen(controller);
 823         }
 824
 825         fs = procfs_file_alloca(pid, "cgroup");
 826         f = fopen(fs, "re");
 827         if (!f)
 828                 return errno == ENOENT ? -ESRCH : -errno;
 829
 830         FOREACH_LINE(line, f, return -errno) {
 831                 char *e, *p;
 832
 833                 truncate_nl(line);
 834
 835                 if (unified) {
 836                         e = startswith(line, "0:");
 837                         if (!e)
 838                                 continue;
 839
 840                         e = strchr(e, ':');
 841                         if (!e)
 842                                 continue;
 843                 } else {
 844                         char *l;
 845                         size_t k;
 846                         const char *word, *state;
 847                         bool found = false;
 848
 849                         l = strchr(line, ':');
 850                         if (!l)
 851                                 continue;
 852
 853                         l++;
 854                         e = strchr(l, ':');
 855                         if (!e)
 856                                 continue;
 857
 858                         *e = 0;
 859                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
 860                                 if (k == cs && memcmp(word, controller, cs) == 0) {
 861                                         found = true;
 862                                         break;
 863                                 }
 864                         }
 865
 866                         if (!found)
 867                                 continue;
 868                 }
 869
 870                 p = strdup(e + 1);
 871                 if (!p)
 872                         return -ENOMEM;
 873
 874                 *path = p;
 875                 return 0;
 876         }
 877
 878         return -ENODATA;
 879 }
 880
 881 int cg_install_release_agent(const char *controller, const char *agent) {
 882         _cleanup_free_ char *fs = NULL, *contents = NULL;
 883         const char *sc;
 884         int r, unified;
 885
 886         assert(agent);
 887
 888         unified = cg_unified();
 889         if (unified < 0)
 890                 return unified;
 891         if (unified) /* doesn't apply to unified hierarchy */
 892                 return -EOPNOTSUPP;
 893
 894         r = cg_get_path(controller, NULL, "release_agent", &fs);
 895         if (r < 0)
 896                 return r;
 897
 898         r = read_one_line_file(fs, &contents);
 899         if (r < 0)
 900                 return r;
 901
 902         sc = strstrip(contents);
 903         if (isempty(sc)) {
 904                 r = write_string_file(fs, agent, 0);
 905                 if (r < 0)
 906                         return r;
 907         } else if (!path_equal(sc, agent))
 908                 return -EEXIST;
 909
 910         fs = mfree(fs);
 911         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 912         if (r < 0)
 913                 return r;
 914
 915         contents = mfree(contents);
 916         r = read_one_line_file(fs, &contents);
 917         if (r < 0)
 918                 return r;
 919
 920         sc = strstrip(contents);
 921         if (streq(sc, "0")) {
 922                 r = write_string_file(fs, "1", 0);
 923                 if (r < 0)
 924                         return r;
 925
 926                 return 1;
 927         }
 928
 929         if (!streq(sc, "1"))
 930                 return -EIO;
 931
 932         return 0;
 933 }
 934
 935 int cg_uninstall_release_agent(const char *controller) {
 936         _cleanup_free_ char *fs = NULL;
 937         int r, unified;
 938
 939         unified = cg_unified();
 940         if (unified < 0)
 941                 return unified;
 942         if (unified) /* Doesn't apply to unified hierarchy */
 943                 return -EOPNOTSUPP;
 944
 945         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 946         if (r < 0)
 947                 return r;
 948
 949         r = write_string_file(fs, "0", 0);
 950         if (r < 0)
 951                 return r;
 952
 953         fs = mfree(fs);
 954
 955         r = cg_get_path(controller, NULL, "release_agent", &fs);
 956         if (r < 0)
 957                 return r;
 958
 959         r = write_string_file(fs, "", 0);
 960         if (r < 0)
 961                 return r;
 962
 963         return 0;
 964 }
 965
 966 int cg_is_empty(const char *controller, const char *path) {
 967         _cleanup_fclose_ FILE *f = NULL;
 968         pid_t pid;
 969         int r;
 970
 971         assert(path);
 972
 973         r = cg_enumerate_processes(controller, path, &f);
 974         if (r == -ENOENT)
 975                 return 1;
 976         if (r < 0)
 977                 return r;
 978
 979         r = cg_read_pid(f, &pid);
 980         if (r < 0)
 981                 return r;
 982
 983         return r == 0;
 984 }
 985
 986 int cg_is_empty_recursive(const char *controller, const char *path) {
 987         int unified, r;
 988
 989         assert(path);
 990
 991         /* The root cgroup is always populated */
 992         if (controller && (isempty(path) || path_equal(path, "/")))
 993                 return false;
 994
 995         unified = cg_unified();
 996         if (unified < 0)
 997                 return unified;
 998
 999         if (unified > 0) {
1000                 _cleanup_free_ char *populated = NULL, *t = NULL;
1001
1002                 /* On the unified hierarchy we can check empty state
1003                  * via the "cgroup.populated" attribute. */
1004
1005                 r = cg_get_path(controller, path, "cgroup.populated", &populated);
1006                 if (r < 0)
1007                         return r;
1008
1009                 r = read_one_line_file(populated, &t);
1010                 if (r == -ENOENT)
1011                         return 1;
1012                 if (r < 0)
1013                         return r;
1014
1015                 return streq(t, "0");
1016         } else {
1017                 _cleanup_closedir_ DIR *d = NULL;
1018                 char *fn;
1019
1020                 r = cg_is_empty(controller, path);
1021                 if (r <= 0)
1022                         return r;
1023
1024                 r = cg_enumerate_subgroups(controller, path, &d);
1025                 if (r == -ENOENT)
1026                         return 1;
1027                 if (r < 0)
1028                         return r;
1029
1030                 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1031                         _cleanup_free_ char *p = NULL;
1032
1033                         p = strjoin(path, "/", fn, NULL);
1034                         free(fn);
1035                         if (!p)
1036                                 return -ENOMEM;
1037
1038                         r = cg_is_empty_recursive(controller, p);
1039                         if (r <= 0)
1040                                 return r;
1041                 }
1042                 if (r < 0)
1043                         return r;
1044
1045                 return true;
1046         }
1047 }
1048
1049 int cg_split_spec(const char *spec, char **controller, char **path) {
1050         char *t = NULL, *u = NULL;
1051         const char *e;
1052
1053         assert(spec);
1054
1055         if (*spec == '/') {
1056                 if (!path_is_safe(spec))
1057                         return -EINVAL;
1058
1059                 if (path) {
1060                         t = strdup(spec);
1061                         if (!t)
1062                                 return -ENOMEM;
1063
1064                         *path = path_kill_slashes(t);
1065                 }
1066
1067                 if (controller)
1068                         *controller = NULL;
1069
1070                 return 0;
1071         }
1072
1073         e = strchr(spec, ':');
1074         if (!e) {
1075                 if (!cg_controller_is_valid(spec))
1076                         return -EINVAL;
1077
1078                 if (controller) {
1079                         t = strdup(spec);
1080                         if (!t)
1081                                 return -ENOMEM;
1082
1083                         *controller = t;
1084                 }
1085
1086                 if (path)
1087                         *path = NULL;
1088
1089                 return 0;
1090         }
1091
1092         t = strndup(spec, e-spec);
1093         if (!t)
1094                 return -ENOMEM;
1095         if (!cg_controller_is_valid(t)) {
1096                 free(t);
1097                 return -EINVAL;
1098         }
1099
1100         if (isempty(e+1))
1101                 u = NULL;
1102         else {
1103                 u = strdup(e+1);
1104                 if (!u) {
1105                         free(t);
1106                         return -ENOMEM;
1107                 }
1108
1109                 if (!path_is_safe(u) ||
1110                     !path_is_absolute(u)) {
1111                         free(t);
1112                         free(u);
1113                         return -EINVAL;
1114                 }
1115
1116                 path_kill_slashes(u);
1117         }
1118
1119         if (controller)
1120                 *controller = t;
1121         else
1122                 free(t);
1123
1124         if (path)
1125                 *path = u;
1126         else
1127                 free(u);
1128
1129         return 0;
1130 }
1131
1132 int cg_mangle_path(const char *path, char **result) {
1133         _cleanup_free_ char *c = NULL, *p = NULL;
1134         char *t;
1135         int r;
1136
1137         assert(path);
1138         assert(result);
1139
1140         /* First, check if it already is a filesystem path */
1141         if (path_startswith(path, "/sys/fs/cgroup")) {
1142
1143                 t = strdup(path);
1144                 if (!t)
1145                         return -ENOMEM;
1146
1147                 *result = path_kill_slashes(t);
1148                 return 0;
1149         }
1150
1151         /* Otherwise, treat it as cg spec */
1152         r = cg_split_spec(path, &c, &p);
1153         if (r < 0)
1154                 return r;
1155
1156         return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1157 }
1158
1159 int cg_get_root_path(char **path) {
1160         char *p, *e;
1161         int r;
1162
1163         assert(path);
1164
1165         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1166         if (r < 0)
1167                 return r;
1168
1169         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1170         if (!e)
1171                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1172         if (!e)
1173                 e = endswith(p, "/system"); /* even more legacy */
1174         if (e)
1175                 *e = 0;
1176
1177         *path = p;
1178         return 0;
1179 }
1180
1181 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1182         _cleanup_free_ char *rt = NULL;
1183         char *p;
1184         int r;
1185
1186         assert(cgroup);
1187         assert(shifted);
1188
1189         if (!root) {
1190                 /* If the root was specified let's use that, otherwise
1191                  * let's determine it from PID 1 */
1192
1193                 r = cg_get_root_path(&rt);
1194                 if (r < 0)
1195                         return r;
1196
1197                 root = rt;
1198         }
1199
1200         p = path_startswith(cgroup, root);
1201         if (p && p > cgroup)
1202                 *shifted = p - 1;
1203         else
1204                 *shifted = cgroup;
1205
1206         return 0;
1207 }
1208
1209 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1210         _cleanup_free_ char *raw = NULL;
1211         const char *c;
1212         int r;
1213
1214         assert(pid >= 0);
1215         assert(cgroup);
1216
1217         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1218         if (r < 0)
1219                 return r;
1220
1221         r = cg_shift_path(raw, root, &c);
1222         if (r < 0)
1223                 return r;
1224
1225         if (c == raw) {
1226                 *cgroup = raw;
1227                 raw = NULL;
1228         } else {
1229                 char *n;
1230
1231                 n = strdup(c);
1232                 if (!n)
1233                         return -ENOMEM;
1234
1235                 *cgroup = n;
1236         }
1237
1238         return 0;
1239 }
1240
1241 int cg_path_decode_unit(const char *cgroup, char **unit){
1242         char *c, *s;
1243         size_t n;
1244
1245         assert(cgroup);
1246         assert(unit);
1247
1248         n = strcspn(cgroup, "/");
1249         if (n < 3)
1250                 return -ENXIO;
1251
1252         c = strndupa(cgroup, n);
1253         c = cg_unescape(c);
1254
1255         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1256                 return -ENXIO;
1257
1258         s = strdup(c);
1259         if (!s)
1260                 return -ENOMEM;
1261
1262         *unit = s;
1263         return 0;
1264 }
1265
1266 static bool valid_slice_name(const char *p, size_t n) {
1267
1268         if (!p)
1269                 return false;
1270
1271         if (n < strlen("x.slice"))
1272                 return false;
1273
1274         if (memcmp(p + n - 6, ".slice", 6) == 0) {
1275                 char buf[n+1], *c;
1276
1277                 memcpy(buf, p, n);
1278                 buf[n] = 0;
1279
1280                 c = cg_unescape(buf);
1281
1282                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1283         }
1284
1285         return false;
1286 }
1287
1288 static const char *skip_slices(const char *p) {
1289         assert(p);
1290
1291         /* Skips over all slice assignments */
1292
1293         for (;;) {
1294                 size_t n;
1295
1296                 p += strspn(p, "/");
1297
1298                 n = strcspn(p, "/");
1299                 if (!valid_slice_name(p, n))
1300                         return p;
1301
1302                 p += n;
1303         }
1304 }
1305
1306 int cg_path_get_unit(const char *path, char **ret) {
1307         const char *e;
1308         char *unit;
1309         int r;
1310
1311         assert(path);
1312         assert(ret);
1313
1314         e = skip_slices(path);
1315
1316         r = cg_path_decode_unit(e, &unit);
1317         if (r < 0)
1318                 return r;
1319
1320         /* We skipped over the slices, don't accept any now */
1321         if (endswith(unit, ".slice")) {
1322                 free(unit);
1323                 return -ENXIO;
1324         }
1325
1326         *ret = unit;
1327         return 0;
1328 }
1329
1330 int cg_pid_get_unit(pid_t pid, char **unit) {
1331         _cleanup_free_ char *cgroup = NULL;
1332         int r;
1333
1334         assert(unit);
1335
1336         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1337         if (r < 0)
1338                 return r;
1339
1340         return cg_path_get_unit(cgroup, unit);
1341 }
1342
1343 /**
1344  * Skip session-*.scope, but require it to be there.
1345  */
1346 static const char *skip_session(const char *p) {
1347         size_t n;
1348
1349         if (isempty(p))
1350                 return NULL;
1351
1352         p += strspn(p, "/");
1353
1354         n = strcspn(p, "/");
1355         if (n < strlen("session-x.scope"))
1356                 return NULL;
1357
1358         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1359                 char buf[n - 8 - 6 + 1];
1360
1361                 memcpy(buf, p + 8, n - 8 - 6);
1362                 buf[n - 8 - 6] = 0;
1363
1364                 /* Note that session scopes never need unescaping,
1365                  * since they cannot conflict with the kernel's own
1366                  * names, hence we don't need to call cg_unescape()
1367                  * here. */
1368
1369                 if (!session_id_valid(buf))
1370                         return false;
1371
1372                 p += n;
1373                 p += strspn(p, "/");
1374                 return p;
1375         }
1376
1377         return NULL;
1378 }
1379
1380 /**
1381  * Skip user@*.service, but require it to be there.
1382  */
1383 static const char *skip_user_manager(const char *p) {
1384         size_t n;
1385
1386         if (isempty(p))
1387                 return NULL;
1388
1389         p += strspn(p, "/");
1390
1391         n = strcspn(p, "/");
1392         if (n < strlen("user@x.service"))
1393                 return NULL;
1394
1395         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1396                 char buf[n - 5 - 8 + 1];
1397
1398                 memcpy(buf, p + 5, n - 5 - 8);
1399                 buf[n - 5 - 8] = 0;
1400
1401                 /* Note that user manager services never need unescaping,
1402                  * since they cannot conflict with the kernel's own
1403                  * names, hence we don't need to call cg_unescape()
1404                  * here. */
1405
1406                 if (parse_uid(buf, NULL) < 0)
1407                         return NULL;
1408
1409                 p += n;
1410                 p += strspn(p, "/");
1411
1412                 return p;
1413         }
1414
1415         return NULL;
1416 }
1417
1418 static const char *skip_user_prefix(const char *path) {
1419         const char *e, *t;
1420
1421         assert(path);
1422
1423         /* Skip slices, if there are any */
1424         e = skip_slices(path);
1425
1426         /* Skip the user manager, if it's in the path now... */
1427         t = skip_user_manager(e);
1428         if (t)
1429                 return t;
1430
1431         /* Alternatively skip the user session if it is in the path... */
1432         return skip_session(e);
1433 }
1434
1435 int cg_path_get_user_unit(const char *path, char **ret) {
1436         const char *t;
1437
1438         assert(path);
1439         assert(ret);
1440
1441         t = skip_user_prefix(path);
1442         if (!t)
1443                 return -ENXIO;
1444
1445         /* And from here on it looks pretty much the same as for a
1446          * system unit, hence let's use the same parser from here
1447          * on. */
1448         return cg_path_get_unit(t, ret);
1449 }
1450
1451 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1452         _cleanup_free_ char *cgroup = NULL;
1453         int r;
1454
1455         assert(unit);
1456
1457         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1458         if (r < 0)
1459                 return r;
1460
1461         return cg_path_get_user_unit(cgroup, unit);
1462 }
1463
1464 int cg_path_get_machine_name(const char *path, char **machine) {
1465         _cleanup_free_ char *u = NULL;
1466         const char *sl;
1467         int r;
1468
1469         r = cg_path_get_unit(path, &u);
1470         if (r < 0)
1471                 return r;
1472
1473         sl = strjoina("/run/systemd/machines/unit:", u);
1474         return readlink_malloc(sl, machine);
1475 }
1476
1477 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1478         _cleanup_free_ char *cgroup = NULL;
1479         int r;
1480
1481         assert(machine);
1482
1483         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1484         if (r < 0)
1485                 return r;
1486
1487         return cg_path_get_machine_name(cgroup, machine);
1488 }
1489
1490 int cg_path_get_session(const char *path, char **session) {
1491         _cleanup_free_ char *unit = NULL;
1492         char *start, *end;
1493         int r;
1494
1495         assert(path);
1496
1497         r = cg_path_get_unit(path, &unit);
1498         if (r < 0)
1499                 return r;
1500
1501         start = startswith(unit, "session-");
1502         if (!start)
1503                 return -ENXIO;
1504         end = endswith(start, ".scope");
1505         if (!end)
1506                 return -ENXIO;
1507
1508         *end = 0;
1509         if (!session_id_valid(start))
1510                 return -ENXIO;
1511
1512         if (session) {
1513                 char *rr;
1514
1515                 rr = strdup(start);
1516                 if (!rr)
1517                         return -ENOMEM;
1518
1519                 *session = rr;
1520         }
1521
1522         return 0;
1523 }
1524
1525 int cg_pid_get_session(pid_t pid, char **session) {
1526         _cleanup_free_ char *cgroup = NULL;
1527         int r;
1528
1529         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1530         if (r < 0)
1531                 return r;
1532
1533         return cg_path_get_session(cgroup, session);
1534 }
1535
1536 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1537         _cleanup_free_ char *slice = NULL;
1538         char *start, *end;
1539         int r;
1540
1541         assert(path);
1542
1543         r = cg_path_get_slice(path, &slice);
1544         if (r < 0)
1545                 return r;
1546
1547         start = startswith(slice, "user-");
1548         if (!start)
1549                 return -ENXIO;
1550         end = endswith(start, ".slice");
1551         if (!end)
1552                 return -ENXIO;
1553
1554         *end = 0;
1555         if (parse_uid(start, uid) < 0)
1556                 return -ENXIO;
1557
1558         return 0;
1559 }
1560
1561 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1562         _cleanup_free_ char *cgroup = NULL;
1563         int r;
1564
1565         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1566         if (r < 0)
1567                 return r;
1568
1569         return cg_path_get_owner_uid(cgroup, uid);
1570 }
1571
1572 int cg_path_get_slice(const char *p, char **slice) {
1573         const char *e = NULL;
1574
1575         assert(p);
1576         assert(slice);
1577
1578         /* Finds the right-most slice unit from the beginning, but
1579          * stops before we come to the first non-slice unit. */
1580
1581         for (;;) {
1582                 size_t n;
1583
1584                 p += strspn(p, "/");
1585
1586                 n = strcspn(p, "/");
1587                 if (!valid_slice_name(p, n)) {
1588
1589                         if (!e) {
1590                                 char *s;
1591
1592                                 s = strdup("-.slice");
1593                                 if (!s)
1594                                         return -ENOMEM;
1595
1596                                 *slice = s;
1597                                 return 0;
1598                         }
1599
1600                         return cg_path_decode_unit(e, slice);
1601                 }
1602
1603                 e = p;
1604                 p += n;
1605         }
1606 }
1607
1608 int cg_pid_get_slice(pid_t pid, char **slice) {
1609         _cleanup_free_ char *cgroup = NULL;
1610         int r;
1611
1612         assert(slice);
1613
1614         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1615         if (r < 0)
1616                 return r;
1617
1618         return cg_path_get_slice(cgroup, slice);
1619 }
1620
1621 int cg_path_get_user_slice(const char *p, char **slice) {
1622         const char *t;
1623         assert(p);
1624         assert(slice);
1625
1626         t = skip_user_prefix(p);
1627         if (!t)
1628                 return -ENXIO;
1629
1630         /* And now it looks pretty much the same as for a system
1631          * slice, so let's just use the same parser from here on. */
1632         return cg_path_get_slice(t, slice);
1633 }
1634
1635 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1636         _cleanup_free_ char *cgroup = NULL;
1637         int r;
1638
1639         assert(slice);
1640
1641         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1642         if (r < 0)
1643                 return r;
1644
1645         return cg_path_get_user_slice(cgroup, slice);
1646 }
1647
1648 char *cg_escape(const char *p) {
1649         bool need_prefix = false;
1650
1651         /* This implements very minimal escaping for names to be used
1652          * as file names in the cgroup tree: any name which might
1653          * conflict with a kernel name or is prefixed with '_' is
1654          * prefixed with a '_'. That way, when reading cgroup names it
1655          * is sufficient to remove a single prefixing underscore if
1656          * there is one. */
1657
1658         /* The return value of this function (unlike cg_unescape())
1659          * needs free()! */
1660
1661         if (p[0] == 0 ||
1662             p[0] == '_' ||
1663             p[0] == '.' ||
1664             streq(p, "notify_on_release") ||
1665             streq(p, "release_agent") ||
1666             streq(p, "tasks") ||
1667             startswith(p, "cgroup."))
1668                 need_prefix = true;
1669         else {
1670                 const char *dot;
1671
1672                 dot = strrchr(p, '.');
1673                 if (dot) {
1674                         CGroupController c;
1675                         size_t l = dot - p;
1676
1677                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1678                                 const char *n;
1679
1680                                 n = cgroup_controller_to_string(c);
1681
1682                                 if (l != strlen(n))
1683                                         continue;
1684
1685                                 if (memcmp(p, n, l) != 0)
1686                                         continue;
1687
1688                                 need_prefix = true;
1689                                 break;
1690                         }
1691                 }
1692         }
1693
1694         if (need_prefix)
1695                 return strappend("_", p);
1696
1697         return strdup(p);
1698 }
1699
1700 char *cg_unescape(const char *p) {
1701         assert(p);
1702
1703         /* The return value of this function (unlike cg_escape())
1704          * doesn't need free()! */
1705
1706         if (p[0] == '_')
1707                 return (char*) p+1;
1708
1709         return (char*) p;
1710 }
1711
1712 #define CONTROLLER_VALID                        \
1713         DIGITS LETTERS                          \
1714         "_"
1715
1716 bool cg_controller_is_valid(const char *p) {
1717         const char *t, *s;
1718
1719         if (!p)
1720                 return false;
1721
1722         s = startswith(p, "name=");
1723         if (s)
1724                 p = s;
1725
1726         if (*p == 0 || *p == '_')
1727                 return false;
1728
1729         for (t = p; *t; t++)
1730                 if (!strchr(CONTROLLER_VALID, *t))
1731                         return false;
1732
1733         if (t - p > FILENAME_MAX)
1734                 return false;
1735
1736         return true;
1737 }
1738
1739 int cg_slice_to_path(const char *unit, char **ret) {
1740         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1741         const char *dash;
1742         int r;
1743
1744         assert(unit);
1745         assert(ret);
1746
1747         if (streq(unit, "-.slice")) {
1748                 char *x;
1749
1750                 x = strdup("");
1751                 if (!x)
1752                         return -ENOMEM;
1753                 *ret = x;
1754                 return 0;
1755         }
1756
1757         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1758                 return -EINVAL;
1759
1760         if (!endswith(unit, ".slice"))
1761                 return -EINVAL;
1762
1763         r = unit_name_to_prefix(unit, &p);
1764         if (r < 0)
1765                 return r;
1766
1767         dash = strchr(p, '-');
1768
1769         /* Don't allow initial dashes */
1770         if (dash == p)
1771                 return -EINVAL;
1772
1773         while (dash) {
1774                 _cleanup_free_ char *escaped = NULL;
1775                 char n[dash - p + sizeof(".slice")];
1776
1777                 /* Don't allow trailing or double dashes */
1778                 if (dash[1] == 0 || dash[1] == '-')
1779                         return -EINVAL;
1780
1781                 strcpy(stpncpy(n, p, dash - p), ".slice");
1782                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1783                         return -EINVAL;
1784
1785                 escaped = cg_escape(n);
1786                 if (!escaped)
1787                         return -ENOMEM;
1788
1789                 if (!strextend(&s, escaped, "/", NULL))
1790                         return -ENOMEM;
1791
1792                 dash = strchr(dash+1, '-');
1793         }
1794
1795         e = cg_escape(unit);
1796         if (!e)
1797                 return -ENOMEM;
1798
1799         if (!strextend(&s, e, NULL))
1800                 return -ENOMEM;
1801
1802         *ret = s;
1803         s = NULL;
1804
1805         return 0;
1806 }
1807
1808 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1809         _cleanup_free_ char *p = NULL;
1810         int r;
1811
1812         r = cg_get_path(controller, path, attribute, &p);
1813         if (r < 0)
1814                 return r;
1815
1816         return write_string_file(p, value, 0);
1817 }
1818
1819 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1820         _cleanup_free_ char *p = NULL;
1821         int r;
1822
1823         r = cg_get_path(controller, path, attribute, &p);
1824         if (r < 0)
1825                 return r;
1826
1827         return read_one_line_file(p, ret);
1828 }
1829
1830 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1831         CGroupController c;
1832         int r, unified;
1833
1834         /* This one will create a cgroup in our private tree, but also
1835          * duplicate it in the trees specified in mask, and remove it
1836          * in all others */
1837
1838         /* First create the cgroup in our own hierarchy. */
1839         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1840         if (r < 0)
1841                 return r;
1842
1843         /* If we are in the unified hierarchy, we are done now */
1844         unified = cg_unified();
1845         if (unified < 0)
1846                 return unified;
1847         if (unified > 0)
1848                 return 0;
1849
1850         /* Otherwise, do the same in the other hierarchies */
1851         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1852                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1853                 const char *n;
1854
1855                 n = cgroup_controller_to_string(c);
1856
1857                 if (mask & bit)
1858                         (void) cg_create(n, path);
1859                 else if (supported & bit)
1860                         (void) cg_trim(n, path, true);
1861         }
1862
1863         return 0;
1864 }
1865
1866 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1867         CGroupController c;
1868         int r, unified;
1869
1870         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1871         if (r < 0)
1872                 return r;
1873
1874         unified = cg_unified();
1875         if (unified < 0)
1876                 return unified;
1877         if (unified > 0)
1878                 return 0;
1879
1880         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1881                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1882                 const char *p = NULL;
1883
1884                 if (!(supported & bit))
1885                         continue;
1886
1887                 if (path_callback)
1888                         p = path_callback(bit, userdata);
1889
1890                 if (!p)
1891                         p = path;
1892
1893                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
1894         }
1895
1896         return 0;
1897 }
1898
1899 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1900         Iterator i;
1901         void *pidp;
1902         int r = 0;
1903
1904         SET_FOREACH(pidp, pids, i) {
1905                 pid_t pid = PTR_TO_PID(pidp);
1906                 int q;
1907
1908                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1909                 if (q < 0 && r >= 0)
1910                         r = q;
1911         }
1912
1913         return r;
1914 }
1915
1916 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1917         CGroupController c;
1918         int r = 0, unified;
1919
1920         if (!path_equal(from, to))  {
1921                 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true);
1922                 if (r < 0)
1923                         return r;
1924         }
1925
1926         unified = cg_unified();
1927         if (unified < 0)
1928                 return unified;
1929         if (unified > 0)
1930                 return r;
1931
1932         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1933                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1934                 const char *p = NULL;
1935
1936                 if (!(supported & bit))
1937                         continue;
1938
1939                 if (to_callback)
1940                         p = to_callback(bit, userdata);
1941
1942                 if (!p)
1943                         p = to;
1944
1945                 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, false, false);
1946         }
1947
1948         return 0;
1949 }
1950
1951 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
1952         CGroupController c;
1953         int r, unified;
1954
1955         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
1956         if (r < 0)
1957                 return r;
1958
1959         unified = cg_unified();
1960         if (unified < 0)
1961                 return unified;
1962         if (unified > 0)
1963                 return r;
1964
1965         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1966                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1967
1968                 if (!(supported & bit))
1969                         continue;
1970
1971                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
1972         }
1973
1974         return 0;
1975 }
1976
1977 int cg_mask_supported(CGroupMask *ret) {
1978         CGroupMask mask = 0;
1979         int r, unified;
1980
1981         /* Determines the mask of supported cgroup controllers. Only
1982          * includes controllers we can make sense of and that are
1983          * actually accessible. */
1984
1985         unified = cg_unified();
1986         if (unified < 0)
1987                 return unified;
1988         if (unified > 0) {
1989                 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
1990                 const char *c;
1991
1992                 /* In the unified hierarchy we can read the supported
1993                  * and accessible controllers from a the top-level
1994                  * cgroup attribute */
1995
1996                 r = cg_get_root_path(&root);
1997                 if (r < 0)
1998                         return r;
1999
2000                 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2001                 if (r < 0)
2002                         return r;
2003
2004                 r = read_one_line_file(path, &controllers);
2005                 if (r < 0)
2006                         return r;
2007
2008                 c = controllers;
2009                 for (;;) {
2010                         _cleanup_free_ char *n = NULL;
2011                         CGroupController v;
2012
2013                         r = extract_first_word(&c, &n, NULL, 0);
2014                         if (r < 0)
2015                                 return r;
2016                         if (r == 0)
2017                                 break;
2018
2019                         v = cgroup_controller_from_string(n);
2020                         if (v < 0)
2021                                 continue;
2022
2023                         mask |= CGROUP_CONTROLLER_TO_MASK(v);
2024                 }
2025
2026                 /* Currently, we only support the memory and pids
2027                  * controller in the unified hierarchy, mask
2028                  * everything else off. */
2029                 mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_PIDS;
2030
2031         } else {
2032                 CGroupController c;
2033
2034                 /* In the legacy hierarchy, we check whether which
2035                  * hierarchies are mounted. */
2036
2037                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2038                         const char *n;
2039
2040                         n = cgroup_controller_to_string(c);
2041                         if (controller_is_accessible(n) >= 0)
2042                                 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2043                 }
2044         }
2045
2046         *ret = mask;
2047         return 0;
2048 }
2049
2050 int cg_kernel_controllers(Set *controllers) {
2051         _cleanup_fclose_ FILE *f = NULL;
2052         char buf[LINE_MAX];
2053         int r;
2054
2055         assert(controllers);
2056
2057         /* Determines the full list of kernel-known controllers. Might
2058          * include controllers we don't actually support, arbitrary
2059          * named hierarchies and controllers that aren't currently
2060          * accessible (because not mounted). */
2061
2062         f = fopen("/proc/cgroups", "re");
2063         if (!f) {
2064                 if (errno == ENOENT)
2065                         return 0;
2066                 return -errno;
2067         }
2068
2069         /* Ignore the header line */
2070         (void) fgets(buf, sizeof(buf), f);
2071
2072         for (;;) {
2073                 char *controller;
2074                 int enabled = 0;
2075
2076                 errno = 0;
2077                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2078
2079                         if (feof(f))
2080                                 break;
2081
2082                         if (ferror(f) && errno != 0)
2083                                 return -errno;
2084
2085                         return -EBADMSG;
2086                 }
2087
2088                 if (!enabled) {
2089                         free(controller);
2090                         continue;
2091                 }
2092
2093                 if (!cg_controller_is_valid(controller)) {
2094                         free(controller);
2095                         return -EBADMSG;
2096                 }
2097
2098                 r = set_consume(controllers, controller);
2099                 if (r < 0)
2100                         return r;
2101         }
2102
2103         return 0;
2104 }
2105
2106 static thread_local int unified_cache = -1;
2107
2108 int cg_unified(void) {
2109         struct statfs fs;
2110
2111         /* Checks if we support the unified hierarchy. Returns an
2112          * error when the cgroup hierarchies aren't mounted yet or we
2113          * have any other trouble determining if the unified hierarchy
2114          * is supported. */
2115
2116         if (unified_cache >= 0)
2117                 return unified_cache;
2118
2119         if (statfs("/sys/fs/cgroup/", &fs) < 0)
2120                 return -errno;
2121
2122         if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC))
2123                 unified_cache = true;
2124         else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2125                 unified_cache = false;
2126         else
2127                 return -ENOEXEC;
2128
2129         return unified_cache;
2130 }
2131
2132 void cg_unified_flush(void) {
2133         unified_cache = -1;
2134 }
2135
2136 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2137         _cleanup_free_ char *fs = NULL;
2138         CGroupController c;
2139         int r, unified;
2140
2141         assert(p);
2142
2143         if (supported == 0)
2144                 return 0;
2145
2146         unified = cg_unified();
2147         if (unified < 0)
2148                 return unified;
2149         if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2150                 return 0;
2151
2152         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2153         if (r < 0)
2154                 return r;
2155
2156         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2157                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2158                 const char *n;
2159
2160                 if (!(supported & bit))
2161                         continue;
2162
2163                 n = cgroup_controller_to_string(c);
2164                 {
2165                         char s[1 + strlen(n) + 1];
2166
2167                         s[0] = mask & bit ? '+' : '-';
2168                         strcpy(s + 1, n);
2169
2170                         r = write_string_file(fs, s, 0);
2171                         if (r < 0)
2172                                 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2173                 }
2174         }
2175
2176         return 0;
2177 }
2178
2179 bool cg_is_unified_wanted(void) {
2180         static thread_local int wanted = -1;
2181         int r, unified;
2182
2183         /* If the hierarchy is already mounted, then follow whatever
2184          * was chosen for it. */
2185         unified = cg_unified();
2186         if (unified >= 0)
2187                 return unified;
2188
2189         /* Otherwise, let's see what the kernel command line has to
2190          * say. Since checking that is expensive, let's cache the
2191          * result. */
2192         if (wanted >= 0)
2193                 return wanted;
2194
2195         r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2196         if (r > 0)
2197                 return (wanted = true);
2198         else {
2199                 _cleanup_free_ char *value = NULL;
2200
2201                 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2202                 if (r < 0)
2203                         return false;
2204                 if (r == 0)
2205                         return (wanted = false);
2206
2207                 return (wanted = parse_boolean(value) > 0);
2208         }
2209 }
2210
2211 bool cg_is_legacy_wanted(void) {
2212         return !cg_is_unified_wanted();
2213 }
2214
2215 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2216         uint64_t u;
2217         int r;
2218
2219         if (isempty(s)) {
2220                 *ret = CGROUP_CPU_SHARES_INVALID;
2221                 return 0;
2222         }
2223
2224         r = safe_atou64(s, &u);
2225         if (r < 0)
2226                 return r;
2227
2228         if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2229                 return -ERANGE;
2230
2231         *ret = u;
2232         return 0;
2233 }
2234
2235 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2236         uint64_t u;
2237         int r;
2238
2239         if (isempty(s)) {
2240                 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2241                 return 0;
2242         }
2243
2244         r = safe_atou64(s, &u);
2245         if (r < 0)
2246                 return r;
2247
2248         if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2249                 return -ERANGE;
2250
2251         *ret = u;
2252         return 0;
2253 }
2254
2255 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2256         [CGROUP_CONTROLLER_CPU] = "cpu",
2257         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2258         [CGROUP_CONTROLLER_BLKIO] = "blkio",
2259         [CGROUP_CONTROLLER_MEMORY] = "memory",
2260         [CGROUP_CONTROLLER_DEVICES] = "devices",
2261         [CGROUP_CONTROLLER_PIDS] = "pids",
2262         [CGROUP_CONTROLLER_NET_CLS] = "net_cls",
2263 };
2264
2265 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);