src/basic/cgroup-util.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2010 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <dirent.h>
  23 #include <errno.h>
  24 #include <ftw.h>
  25 #include <signal.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28 #include <sys/stat.h>
  29 #include <sys/types.h>
  30 #include <unistd.h>
  31
  32 #include "cgroup-util.h"
  33 #include "dirent-util.h"
  34 #include "extract-word.h"
  35 #include "fd-util.h"
  36 #include "fileio.h"
  37 #include "formats-util.h"
  38 #include "fs-util.h"
  39 #include "login-util.h"
  40 #include "macro.h"
  41 #include "mkdir.h"
  42 #include "parse-util.h"
  43 #include "path-util.h"
  44 #include "process-util.h"
  45 #include "set.h"
  46 #include "special.h"
  47 #include "string-util.h"
  48 #include "unit-name.h"
  49 #include "user-util.h"
  50 #include "util.h"
  51
  52 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
  53         _cleanup_free_ char *fs = NULL;
  54         FILE *f;
  55         int r;
  56
  57         assert(_f);
  58
  59         r = cg_get_path(controller, path, "cgroup.procs", &fs);
  60         if (r < 0)
  61                 return r;
  62
  63         f = fopen(fs, "re");
  64         if (!f)
  65                 return -errno;
  66
  67         *_f = f;
  68         return 0;
  69 }
  70
  71 int cg_read_pid(FILE *f, pid_t *_pid) {
  72         unsigned long ul;
  73
  74         /* Note that the cgroup.procs might contain duplicates! See
  75          * cgroups.txt for details. */
  76
  77         assert(f);
  78         assert(_pid);
  79
  80         errno = 0;
  81         if (fscanf(f, "%lu", &ul) != 1) {
  82
  83                 if (feof(f))
  84                         return 0;
  85
  86                 return errno ? -errno : -EIO;
  87         }
  88
  89         if (ul <= 0)
  90                 return -EIO;
  91
  92         *_pid = (pid_t) ul;
  93         return 1;
  94 }
  95
  96 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
  97         _cleanup_free_ char *fs = NULL;
  98         int r;
  99         DIR *d;
 100
 101         assert(_d);
 102
 103         /* This is not recursive! */
 104
 105         r = cg_get_path(controller, path, NULL, &fs);
 106         if (r < 0)
 107                 return r;
 108
 109         d = opendir(fs);
 110         if (!d)
 111                 return -errno;
 112
 113         *_d = d;
 114         return 0;
 115 }
 116
 117 int cg_read_subgroup(DIR *d, char **fn) {
 118         struct dirent *de;
 119
 120         assert(d);
 121         assert(fn);
 122
 123         FOREACH_DIRENT_ALL(de, d, return -errno) {
 124                 char *b;
 125
 126                 if (de->d_type != DT_DIR)
 127                         continue;
 128
 129                 if (streq(de->d_name, ".") ||
 130                     streq(de->d_name, ".."))
 131                         continue;
 132
 133                 b = strdup(de->d_name);
 134                 if (!b)
 135                         return -ENOMEM;
 136
 137                 *fn = b;
 138                 return 1;
 139         }
 140
 141         return 0;
 142 }
 143
 144 int cg_rmdir(const char *controller, const char *path) {
 145         _cleanup_free_ char *p = NULL;
 146         int r;
 147
 148         r = cg_get_path(controller, path, NULL, &p);
 149         if (r < 0)
 150                 return r;
 151
 152         r = rmdir(p);
 153         if (r < 0 && errno != ENOENT)
 154                 return -errno;
 155
 156         return 0;
 157 }
 158
 159 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
 160         _cleanup_set_free_ Set *allocated_set = NULL;
 161         bool done = false;
 162         int r, ret = 0;
 163         pid_t my_pid;
 164
 165         assert(sig >= 0);
 166
 167         /* This goes through the tasks list and kills them all. This
 168          * is repeated until no further processes are added to the
 169          * tasks list, to properly handle forking processes */
 170
 171         if (!s) {
 172                 s = allocated_set = set_new(NULL);
 173                 if (!s)
 174                         return -ENOMEM;
 175         }
 176
 177         my_pid = getpid();
 178
 179         do {
 180                 _cleanup_fclose_ FILE *f = NULL;
 181                 pid_t pid = 0;
 182                 done = true;
 183
 184                 r = cg_enumerate_processes(controller, path, &f);
 185                 if (r < 0) {
 186                         if (ret >= 0 && r != -ENOENT)
 187                                 return r;
 188
 189                         return ret;
 190                 }
 191
 192                 while ((r = cg_read_pid(f, &pid)) > 0) {
 193
 194                         if (ignore_self && pid == my_pid)
 195                                 continue;
 196
 197                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 198                                 continue;
 199
 200                         /* If we haven't killed this process yet, kill
 201                          * it */
 202                         if (kill(pid, sig) < 0) {
 203                                 if (ret >= 0 && errno != ESRCH)
 204                                         ret = -errno;
 205                         } else {
 206                                 if (sigcont && sig != SIGKILL)
 207                                         (void) kill(pid, SIGCONT);
 208
 209                                 if (ret == 0)
 210                                         ret = 1;
 211                         }
 212
 213                         done = false;
 214
 215                         r = set_put(s, PID_TO_PTR(pid));
 216                         if (r < 0) {
 217                                 if (ret >= 0)
 218                                         return r;
 219
 220                                 return ret;
 221                         }
 222                 }
 223
 224                 if (r < 0) {
 225                         if (ret >= 0)
 226                                 return r;
 227
 228                         return ret;
 229                 }
 230
 231                 /* To avoid racing against processes which fork
 232                  * quicker than we can kill them we repeat this until
 233                  * no new pids need to be killed. */
 234
 235         } while (!done);
 236
 237         return ret;
 238 }
 239
 240 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
 241         _cleanup_set_free_ Set *allocated_set = NULL;
 242         _cleanup_closedir_ DIR *d = NULL;
 243         int r, ret;
 244         char *fn;
 245
 246         assert(path);
 247         assert(sig >= 0);
 248
 249         if (!s) {
 250                 s = allocated_set = set_new(NULL);
 251                 if (!s)
 252                         return -ENOMEM;
 253         }
 254
 255         ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
 256
 257         r = cg_enumerate_subgroups(controller, path, &d);
 258         if (r < 0) {
 259                 if (ret >= 0 && r != -ENOENT)
 260                         return r;
 261
 262                 return ret;
 263         }
 264
 265         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 266                 _cleanup_free_ char *p = NULL;
 267
 268                 p = strjoin(path, "/", fn, NULL);
 269                 free(fn);
 270                 if (!p)
 271                         return -ENOMEM;
 272
 273                 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
 274                 if (r != 0 && ret >= 0)
 275                         ret = r;
 276         }
 277
 278         if (ret >= 0 && r < 0)
 279                 ret = r;
 280
 281         if (rem) {
 282                 r = cg_rmdir(controller, path);
 283                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 284                         return r;
 285         }
 286
 287         return ret;
 288 }
 289
 290 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
 291         bool done = false;
 292         _cleanup_set_free_ Set *s = NULL;
 293         int r, ret = 0;
 294         pid_t my_pid;
 295
 296         assert(cfrom);
 297         assert(pfrom);
 298         assert(cto);
 299         assert(pto);
 300
 301         s = set_new(NULL);
 302         if (!s)
 303                 return -ENOMEM;
 304
 305         my_pid = getpid();
 306
 307         do {
 308                 _cleanup_fclose_ FILE *f = NULL;
 309                 pid_t pid = 0;
 310                 done = true;
 311
 312                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 313                 if (r < 0) {
 314                         if (ret >= 0 && r != -ENOENT)
 315                                 return r;
 316
 317                         return ret;
 318                 }
 319
 320                 while ((r = cg_read_pid(f, &pid)) > 0) {
 321
 322                         /* This might do weird stuff if we aren't a
 323                          * single-threaded program. However, we
 324                          * luckily know we are not */
 325                         if (ignore_self && pid == my_pid)
 326                                 continue;
 327
 328                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 329                                 continue;
 330
 331                         /* Ignore kernel threads. Since they can only
 332                          * exist in the root cgroup, we only check for
 333                          * them there. */
 334                         if (cfrom &&
 335                             (isempty(pfrom) || path_equal(pfrom, "/")) &&
 336                             is_kernel_thread(pid) > 0)
 337                                 continue;
 338
 339                         r = cg_attach(cto, pto, pid);
 340                         if (r < 0) {
 341                                 if (ret >= 0 && r != -ESRCH)
 342                                         ret = r;
 343                         } else if (ret == 0)
 344                                 ret = 1;
 345
 346                         done = false;
 347
 348                         r = set_put(s, PID_TO_PTR(pid));
 349                         if (r < 0) {
 350                                 if (ret >= 0)
 351                                         return r;
 352
 353                                 return ret;
 354                         }
 355                 }
 356
 357                 if (r < 0) {
 358                         if (ret >= 0)
 359                                 return r;
 360
 361                         return ret;
 362                 }
 363         } while (!done);
 364
 365         return ret;
 366 }
 367
 368 int cg_migrate_recursive(
 369                 const char *cfrom,
 370                 const char *pfrom,
 371                 const char *cto,
 372                 const char *pto,
 373                 bool ignore_self,
 374                 bool rem) {
 375
 376         _cleanup_closedir_ DIR *d = NULL;
 377         int r, ret = 0;
 378         char *fn;
 379
 380         assert(cfrom);
 381         assert(pfrom);
 382         assert(cto);
 383         assert(pto);
 384
 385         ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
 386
 387         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 388         if (r < 0) {
 389                 if (ret >= 0 && r != -ENOENT)
 390                         return r;
 391
 392                 return ret;
 393         }
 394
 395         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 396                 _cleanup_free_ char *p = NULL;
 397
 398                 p = strjoin(pfrom, "/", fn, NULL);
 399                 free(fn);
 400                 if (!p)
 401                         return -ENOMEM;
 402
 403                 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
 404                 if (r != 0 && ret >= 0)
 405                         ret = r;
 406         }
 407
 408         if (r < 0 && ret >= 0)
 409                 ret = r;
 410
 411         if (rem) {
 412                 r = cg_rmdir(cfrom, pfrom);
 413                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 414                         return r;
 415         }
 416
 417         return ret;
 418 }
 419
 420 int cg_migrate_recursive_fallback(
 421                 const char *cfrom,
 422                 const char *pfrom,
 423                 const char *cto,
 424                 const char *pto,
 425                 bool ignore_self,
 426                 bool rem) {
 427
 428         int r;
 429
 430         assert(cfrom);
 431         assert(pfrom);
 432         assert(cto);
 433         assert(pto);
 434
 435         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem);
 436         if (r < 0) {
 437                 char prefix[strlen(pto) + 1];
 438
 439                 /* This didn't work? Then let's try all prefixes of the destination */
 440
 441                 PATH_FOREACH_PREFIX(prefix, pto) {
 442                         int q;
 443
 444                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem);
 445                         if (q >= 0)
 446                                 return q;
 447                 }
 448         }
 449
 450         return r;
 451 }
 452
 453 static const char *controller_to_dirname(const char *controller) {
 454         const char *e;
 455
 456         assert(controller);
 457
 458         /* Converts a controller name to the directory name below
 459          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
 460          * just cuts off the name= prefixed used for named
 461          * hierarchies, if it is specified. */
 462
 463         e = startswith(controller, "name=");
 464         if (e)
 465                 return e;
 466
 467         return controller;
 468 }
 469
 470 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
 471         const char *dn;
 472         char *t = NULL;
 473
 474         assert(fs);
 475         assert(controller);
 476
 477         dn = controller_to_dirname(controller);
 478
 479         if (isempty(path) && isempty(suffix))
 480                 t = strappend("/sys/fs/cgroup/", dn);
 481         else if (isempty(path))
 482                 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL);
 483         else if (isempty(suffix))
 484                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL);
 485         else
 486                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL);
 487         if (!t)
 488                 return -ENOMEM;
 489
 490         *fs = t;
 491         return 0;
 492 }
 493
 494 static int join_path_unified(const char *path, const char *suffix, char **fs) {
 495         char *t;
 496
 497         assert(fs);
 498
 499         if (isempty(path) && isempty(suffix))
 500                 t = strdup("/sys/fs/cgroup");
 501         else if (isempty(path))
 502                 t = strappend("/sys/fs/cgroup/", suffix);
 503         else if (isempty(suffix))
 504                 t = strappend("/sys/fs/cgroup/", path);
 505         else
 506                 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
 507         if (!t)
 508                 return -ENOMEM;
 509
 510         *fs = t;
 511         return 0;
 512 }
 513
 514 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
 515         int unified, r;
 516
 517         assert(fs);
 518
 519         if (!controller) {
 520                 char *t;
 521
 522                 /* If no controller is specified, we return the path
 523                  * *below* the controllers, without any prefix. */
 524
 525                 if (!path && !suffix)
 526                         return -EINVAL;
 527
 528                 if (!suffix)
 529                         t = strdup(path);
 530                 else if (!path)
 531                         t = strdup(suffix);
 532                 else
 533                         t = strjoin(path, "/", suffix, NULL);
 534                 if (!t)
 535                         return -ENOMEM;
 536
 537                 *fs = path_kill_slashes(t);
 538                 return 0;
 539         }
 540
 541         if (!cg_controller_is_valid(controller))
 542                 return -EINVAL;
 543
 544         unified = cg_unified();
 545         if (unified < 0)
 546                 return unified;
 547
 548         if (unified > 0)
 549                 r = join_path_unified(path, suffix, fs);
 550         else
 551                 r = join_path_legacy(controller, path, suffix, fs);
 552         if (r < 0)
 553                 return r;
 554
 555         path_kill_slashes(*fs);
 556         return 0;
 557 }
 558
 559 static int controller_is_accessible(const char *controller) {
 560         int unified;
 561
 562         assert(controller);
 563
 564         /* Checks whether a specific controller is accessible,
 565          * i.e. its hierarchy mounted. In the unified hierarchy all
 566          * controllers are considered accessible, except for the named
 567          * hierarchies */
 568
 569         if (!cg_controller_is_valid(controller))
 570                 return -EINVAL;
 571
 572         unified = cg_unified();
 573         if (unified < 0)
 574                 return unified;
 575         if (unified > 0) {
 576                 /* We don't support named hierarchies if we are using
 577                  * the unified hierarchy. */
 578
 579                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
 580                         return 0;
 581
 582                 if (startswith(controller, "name="))
 583                         return -EOPNOTSUPP;
 584
 585         } else {
 586                 const char *cc, *dn;
 587
 588                 dn = controller_to_dirname(controller);
 589                 cc = strjoina("/sys/fs/cgroup/", dn);
 590
 591                 if (laccess(cc, F_OK) < 0)
 592                         return -errno;
 593         }
 594
 595         return 0;
 596 }
 597
 598 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
 599         int r;
 600
 601         assert(controller);
 602         assert(fs);
 603
 604         /* Check if the specified controller is actually accessible */
 605         r = controller_is_accessible(controller);
 606         if (r < 0)
 607                 return r;
 608
 609         return cg_get_path(controller, path, suffix, fs);
 610 }
 611
 612 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 613         assert(path);
 614         assert(sb);
 615         assert(ftwbuf);
 616
 617         if (typeflag != FTW_DP)
 618                 return 0;
 619
 620         if (ftwbuf->level < 1)
 621                 return 0;
 622
 623         (void) rmdir(path);
 624         return 0;
 625 }
 626
 627 int cg_trim(const char *controller, const char *path, bool delete_root) {
 628         _cleanup_free_ char *fs = NULL;
 629         int r = 0;
 630
 631         assert(path);
 632
 633         r = cg_get_path(controller, path, NULL, &fs);
 634         if (r < 0)
 635                 return r;
 636
 637         errno = 0;
 638         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 639                 if (errno == ENOENT)
 640                         r = 0;
 641                 else if (errno != 0)
 642                         r = -errno;
 643                 else
 644                         r = -EIO;
 645         }
 646
 647         if (delete_root) {
 648                 if (rmdir(fs) < 0 && errno != ENOENT)
 649                         return -errno;
 650         }
 651
 652         return r;
 653 }
 654
 655 int cg_create(const char *controller, const char *path) {
 656         _cleanup_free_ char *fs = NULL;
 657         int r;
 658
 659         r = cg_get_path_and_check(controller, path, NULL, &fs);
 660         if (r < 0)
 661                 return r;
 662
 663         r = mkdir_parents(fs, 0755);
 664         if (r < 0)
 665                 return r;
 666
 667         if (mkdir(fs, 0755) < 0) {
 668
 669                 if (errno == EEXIST)
 670                         return 0;
 671
 672                 return -errno;
 673         }
 674
 675         return 1;
 676 }
 677
 678 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 679         int r, q;
 680
 681         assert(pid >= 0);
 682
 683         r = cg_create(controller, path);
 684         if (r < 0)
 685                 return r;
 686
 687         q = cg_attach(controller, path, pid);
 688         if (q < 0)
 689                 return q;
 690
 691         /* This does not remove the cgroup on failure */
 692         return r;
 693 }
 694
 695 int cg_attach(const char *controller, const char *path, pid_t pid) {
 696         _cleanup_free_ char *fs = NULL;
 697         char c[DECIMAL_STR_MAX(pid_t) + 2];
 698         int r;
 699
 700         assert(path);
 701         assert(pid >= 0);
 702
 703         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 704         if (r < 0)
 705                 return r;
 706
 707         if (pid == 0)
 708                 pid = getpid();
 709
 710         snprintf(c, sizeof(c), PID_FMT"\n", pid);
 711
 712         return write_string_file(fs, c, 0);
 713 }
 714
 715 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 716         int r;
 717
 718         assert(controller);
 719         assert(path);
 720         assert(pid >= 0);
 721
 722         r = cg_attach(controller, path, pid);
 723         if (r < 0) {
 724                 char prefix[strlen(path) + 1];
 725
 726                 /* This didn't work? Then let's try all prefixes of
 727                  * the destination */
 728
 729                 PATH_FOREACH_PREFIX(prefix, path) {
 730                         int q;
 731
 732                         q = cg_attach(controller, prefix, pid);
 733                         if (q >= 0)
 734                                 return q;
 735                 }
 736         }
 737
 738         return r;
 739 }
 740
 741 int cg_set_group_access(
 742                 const char *controller,
 743                 const char *path,
 744                 mode_t mode,
 745                 uid_t uid,
 746                 gid_t gid) {
 747
 748         _cleanup_free_ char *fs = NULL;
 749         int r;
 750
 751         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 752                 return 0;
 753
 754         if (mode != MODE_INVALID)
 755                 mode &= 0777;
 756
 757         r = cg_get_path(controller, path, NULL, &fs);
 758         if (r < 0)
 759                 return r;
 760
 761         return chmod_and_chown(fs, mode, uid, gid);
 762 }
 763
 764 int cg_set_task_access(
 765                 const char *controller,
 766                 const char *path,
 767                 mode_t mode,
 768                 uid_t uid,
 769                 gid_t gid) {
 770
 771         _cleanup_free_ char *fs = NULL, *procs = NULL;
 772         int r, unified;
 773
 774         assert(path);
 775
 776         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 777                 return 0;
 778
 779         if (mode != MODE_INVALID)
 780                 mode &= 0666;
 781
 782         r = cg_get_path(controller, path, "cgroup.procs", &fs);
 783         if (r < 0)
 784                 return r;
 785
 786         r = chmod_and_chown(fs, mode, uid, gid);
 787         if (r < 0)
 788                 return r;
 789
 790         unified = cg_unified();
 791         if (unified < 0)
 792                 return unified;
 793         if (unified)
 794                 return 0;
 795
 796         /* Compatibility, Always keep values for "tasks" in sync with
 797          * "cgroup.procs" */
 798         if (cg_get_path(controller, path, "tasks", &procs) >= 0)
 799                 (void) chmod_and_chown(procs, mode, uid, gid);
 800
 801         return 0;
 802 }
 803
 804 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
 805         _cleanup_fclose_ FILE *f = NULL;
 806         char line[LINE_MAX];
 807         const char *fs;
 808         size_t cs = 0;
 809         int unified;
 810
 811         assert(path);
 812         assert(pid >= 0);
 813
 814         unified = cg_unified();
 815         if (unified < 0)
 816                 return unified;
 817         if (unified == 0) {
 818                 if (controller) {
 819                         if (!cg_controller_is_valid(controller))
 820                                 return -EINVAL;
 821                 } else
 822                         controller = SYSTEMD_CGROUP_CONTROLLER;
 823
 824                 cs = strlen(controller);
 825         }
 826
 827         fs = procfs_file_alloca(pid, "cgroup");
 828         f = fopen(fs, "re");
 829         if (!f)
 830                 return errno == ENOENT ? -ESRCH : -errno;
 831
 832         FOREACH_LINE(line, f, return -errno) {
 833                 char *e, *p;
 834
 835                 truncate_nl(line);
 836
 837                 if (unified) {
 838                         e = startswith(line, "0:");
 839                         if (!e)
 840                                 continue;
 841
 842                         e = strchr(e, ':');
 843                         if (!e)
 844                                 continue;
 845                 } else {
 846                         char *l;
 847                         size_t k;
 848                         const char *word, *state;
 849                         bool found = false;
 850
 851                         l = strchr(line, ':');
 852                         if (!l)
 853                                 continue;
 854
 855                         l++;
 856                         e = strchr(l, ':');
 857                         if (!e)
 858                                 continue;
 859
 860                         *e = 0;
 861                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
 862                                 if (k == cs && memcmp(word, controller, cs) == 0) {
 863                                         found = true;
 864                                         break;
 865                                 }
 866                         }
 867
 868                         if (!found)
 869                                 continue;
 870                 }
 871
 872                 p = strdup(e + 1);
 873                 if (!p)
 874                         return -ENOMEM;
 875
 876                 *path = p;
 877                 return 0;
 878         }
 879
 880         return -ENODATA;
 881 }
 882
 883 int cg_install_release_agent(const char *controller, const char *agent) {
 884         _cleanup_free_ char *fs = NULL, *contents = NULL;
 885         const char *sc;
 886         int r, unified;
 887
 888         assert(agent);
 889
 890         unified = cg_unified();
 891         if (unified < 0)
 892                 return unified;
 893         if (unified) /* doesn't apply to unified hierarchy */
 894                 return -EOPNOTSUPP;
 895
 896         r = cg_get_path(controller, NULL, "release_agent", &fs);
 897         if (r < 0)
 898                 return r;
 899
 900         r = read_one_line_file(fs, &contents);
 901         if (r < 0)
 902                 return r;
 903
 904         sc = strstrip(contents);
 905         if (isempty(sc)) {
 906                 r = write_string_file(fs, agent, 0);
 907                 if (r < 0)
 908                         return r;
 909         } else if (!path_equal(sc, agent))
 910                 return -EEXIST;
 911
 912         fs = mfree(fs);
 913         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 914         if (r < 0)
 915                 return r;
 916
 917         contents = mfree(contents);
 918         r = read_one_line_file(fs, &contents);
 919         if (r < 0)
 920                 return r;
 921
 922         sc = strstrip(contents);
 923         if (streq(sc, "0")) {
 924                 r = write_string_file(fs, "1", 0);
 925                 if (r < 0)
 926                         return r;
 927
 928                 return 1;
 929         }
 930
 931         if (!streq(sc, "1"))
 932                 return -EIO;
 933
 934         return 0;
 935 }
 936
 937 int cg_uninstall_release_agent(const char *controller) {
 938         _cleanup_free_ char *fs = NULL;
 939         int r, unified;
 940
 941         unified = cg_unified();
 942         if (unified < 0)
 943                 return unified;
 944         if (unified) /* Doesn't apply to unified hierarchy */
 945                 return -EOPNOTSUPP;
 946
 947         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 948         if (r < 0)
 949                 return r;
 950
 951         r = write_string_file(fs, "0", 0);
 952         if (r < 0)
 953                 return r;
 954
 955         fs = mfree(fs);
 956
 957         r = cg_get_path(controller, NULL, "release_agent", &fs);
 958         if (r < 0)
 959                 return r;
 960
 961         r = write_string_file(fs, "", 0);
 962         if (r < 0)
 963                 return r;
 964
 965         return 0;
 966 }
 967
 968 int cg_is_empty(const char *controller, const char *path) {
 969         _cleanup_fclose_ FILE *f = NULL;
 970         pid_t pid;
 971         int r;
 972
 973         assert(path);
 974
 975         r = cg_enumerate_processes(controller, path, &f);
 976         if (r == -ENOENT)
 977                 return 1;
 978         if (r < 0)
 979                 return r;
 980
 981         r = cg_read_pid(f, &pid);
 982         if (r < 0)
 983                 return r;
 984
 985         return r == 0;
 986 }
 987
 988 int cg_is_empty_recursive(const char *controller, const char *path) {
 989         int unified, r;
 990
 991         assert(path);
 992
 993         /* The root cgroup is always populated */
 994         if (controller && (isempty(path) || path_equal(path, "/")))
 995                 return false;
 996
 997         unified = cg_unified();
 998         if (unified < 0)
 999                 return unified;
1000
1001         if (unified > 0) {
1002                 _cleanup_free_ char *populated = NULL, *t = NULL;
1003
1004                 /* On the unified hierarchy we can check empty state
1005                  * via the "cgroup.populated" attribute. */
1006
1007                 r = cg_get_path(controller, path, "cgroup.populated", &populated);
1008                 if (r < 0)
1009                         return r;
1010
1011                 r = read_one_line_file(populated, &t);
1012                 if (r == -ENOENT)
1013                         return 1;
1014                 if (r < 0)
1015                         return r;
1016
1017                 return streq(t, "0");
1018         } else {
1019                 _cleanup_closedir_ DIR *d = NULL;
1020                 char *fn;
1021
1022                 r = cg_is_empty(controller, path);
1023                 if (r <= 0)
1024                         return r;
1025
1026                 r = cg_enumerate_subgroups(controller, path, &d);
1027                 if (r == -ENOENT)
1028                         return 1;
1029                 if (r < 0)
1030                         return r;
1031
1032                 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1033                         _cleanup_free_ char *p = NULL;
1034
1035                         p = strjoin(path, "/", fn, NULL);
1036                         free(fn);
1037                         if (!p)
1038                                 return -ENOMEM;
1039
1040                         r = cg_is_empty_recursive(controller, p);
1041                         if (r <= 0)
1042                                 return r;
1043                 }
1044                 if (r < 0)
1045                         return r;
1046
1047                 return true;
1048         }
1049 }
1050
1051 int cg_split_spec(const char *spec, char **controller, char **path) {
1052         char *t = NULL, *u = NULL;
1053         const char *e;
1054
1055         assert(spec);
1056
1057         if (*spec == '/') {
1058                 if (!path_is_safe(spec))
1059                         return -EINVAL;
1060
1061                 if (path) {
1062                         t = strdup(spec);
1063                         if (!t)
1064                                 return -ENOMEM;
1065
1066                         *path = path_kill_slashes(t);
1067                 }
1068
1069                 if (controller)
1070                         *controller = NULL;
1071
1072                 return 0;
1073         }
1074
1075         e = strchr(spec, ':');
1076         if (!e) {
1077                 if (!cg_controller_is_valid(spec))
1078                         return -EINVAL;
1079
1080                 if (controller) {
1081                         t = strdup(spec);
1082                         if (!t)
1083                                 return -ENOMEM;
1084
1085                         *controller = t;
1086                 }
1087
1088                 if (path)
1089                         *path = NULL;
1090
1091                 return 0;
1092         }
1093
1094         t = strndup(spec, e-spec);
1095         if (!t)
1096                 return -ENOMEM;
1097         if (!cg_controller_is_valid(t)) {
1098                 free(t);
1099                 return -EINVAL;
1100         }
1101
1102         if (isempty(e+1))
1103                 u = NULL;
1104         else {
1105                 u = strdup(e+1);
1106                 if (!u) {
1107                         free(t);
1108                         return -ENOMEM;
1109                 }
1110
1111                 if (!path_is_safe(u) ||
1112                     !path_is_absolute(u)) {
1113                         free(t);
1114                         free(u);
1115                         return -EINVAL;
1116                 }
1117
1118                 path_kill_slashes(u);
1119         }
1120
1121         if (controller)
1122                 *controller = t;
1123         else
1124                 free(t);
1125
1126         if (path)
1127                 *path = u;
1128         else
1129                 free(u);
1130
1131         return 0;
1132 }
1133
1134 int cg_mangle_path(const char *path, char **result) {
1135         _cleanup_free_ char *c = NULL, *p = NULL;
1136         char *t;
1137         int r;
1138
1139         assert(path);
1140         assert(result);
1141
1142         /* First, check if it already is a filesystem path */
1143         if (path_startswith(path, "/sys/fs/cgroup")) {
1144
1145                 t = strdup(path);
1146                 if (!t)
1147                         return -ENOMEM;
1148
1149                 *result = path_kill_slashes(t);
1150                 return 0;
1151         }
1152
1153         /* Otherwise, treat it as cg spec */
1154         r = cg_split_spec(path, &c, &p);
1155         if (r < 0)
1156                 return r;
1157
1158         return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1159 }
1160
1161 int cg_get_root_path(char **path) {
1162         char *p, *e;
1163         int r;
1164
1165         assert(path);
1166
1167         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1168         if (r < 0)
1169                 return r;
1170
1171         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1172         if (!e)
1173                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1174         if (!e)
1175                 e = endswith(p, "/system"); /* even more legacy */
1176         if (e)
1177                 *e = 0;
1178
1179         *path = p;
1180         return 0;
1181 }
1182
1183 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1184         _cleanup_free_ char *rt = NULL;
1185         char *p;
1186         int r;
1187
1188         assert(cgroup);
1189         assert(shifted);
1190
1191         if (!root) {
1192                 /* If the root was specified let's use that, otherwise
1193                  * let's determine it from PID 1 */
1194
1195                 r = cg_get_root_path(&rt);
1196                 if (r < 0)
1197                         return r;
1198
1199                 root = rt;
1200         }
1201
1202         p = path_startswith(cgroup, root);
1203         if (p && p > cgroup)
1204                 *shifted = p - 1;
1205         else
1206                 *shifted = cgroup;
1207
1208         return 0;
1209 }
1210
1211 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1212         _cleanup_free_ char *raw = NULL;
1213         const char *c;
1214         int r;
1215
1216         assert(pid >= 0);
1217         assert(cgroup);
1218
1219         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1220         if (r < 0)
1221                 return r;
1222
1223         r = cg_shift_path(raw, root, &c);
1224         if (r < 0)
1225                 return r;
1226
1227         if (c == raw) {
1228                 *cgroup = raw;
1229                 raw = NULL;
1230         } else {
1231                 char *n;
1232
1233                 n = strdup(c);
1234                 if (!n)
1235                         return -ENOMEM;
1236
1237                 *cgroup = n;
1238         }
1239
1240         return 0;
1241 }
1242
1243 int cg_path_decode_unit(const char *cgroup, char **unit){
1244         char *c, *s;
1245         size_t n;
1246
1247         assert(cgroup);
1248         assert(unit);
1249
1250         n = strcspn(cgroup, "/");
1251         if (n < 3)
1252                 return -ENXIO;
1253
1254         c = strndupa(cgroup, n);
1255         c = cg_unescape(c);
1256
1257         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1258                 return -ENXIO;
1259
1260         s = strdup(c);
1261         if (!s)
1262                 return -ENOMEM;
1263
1264         *unit = s;
1265         return 0;
1266 }
1267
1268 static bool valid_slice_name(const char *p, size_t n) {
1269
1270         if (!p)
1271                 return false;
1272
1273         if (n < strlen("x.slice"))
1274                 return false;
1275
1276         if (memcmp(p + n - 6, ".slice", 6) == 0) {
1277                 char buf[n+1], *c;
1278
1279                 memcpy(buf, p, n);
1280                 buf[n] = 0;
1281
1282                 c = cg_unescape(buf);
1283
1284                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1285         }
1286
1287         return false;
1288 }
1289
1290 static const char *skip_slices(const char *p) {
1291         assert(p);
1292
1293         /* Skips over all slice assignments */
1294
1295         for (;;) {
1296                 size_t n;
1297
1298                 p += strspn(p, "/");
1299
1300                 n = strcspn(p, "/");
1301                 if (!valid_slice_name(p, n))
1302                         return p;
1303
1304                 p += n;
1305         }
1306 }
1307
1308 int cg_path_get_unit(const char *path, char **ret) {
1309         const char *e;
1310         char *unit;
1311         int r;
1312
1313         assert(path);
1314         assert(ret);
1315
1316         e = skip_slices(path);
1317
1318         r = cg_path_decode_unit(e, &unit);
1319         if (r < 0)
1320                 return r;
1321
1322         /* We skipped over the slices, don't accept any now */
1323         if (endswith(unit, ".slice")) {
1324                 free(unit);
1325                 return -ENXIO;
1326         }
1327
1328         *ret = unit;
1329         return 0;
1330 }
1331
1332 int cg_pid_get_unit(pid_t pid, char **unit) {
1333         _cleanup_free_ char *cgroup = NULL;
1334         int r;
1335
1336         assert(unit);
1337
1338         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1339         if (r < 0)
1340                 return r;
1341
1342         return cg_path_get_unit(cgroup, unit);
1343 }
1344
1345 /**
1346  * Skip session-*.scope, but require it to be there.
1347  */
1348 static const char *skip_session(const char *p) {
1349         size_t n;
1350
1351         if (isempty(p))
1352                 return NULL;
1353
1354         p += strspn(p, "/");
1355
1356         n = strcspn(p, "/");
1357         if (n < strlen("session-x.scope"))
1358                 return NULL;
1359
1360         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1361                 char buf[n - 8 - 6 + 1];
1362
1363                 memcpy(buf, p + 8, n - 8 - 6);
1364                 buf[n - 8 - 6] = 0;
1365
1366                 /* Note that session scopes never need unescaping,
1367                  * since they cannot conflict with the kernel's own
1368                  * names, hence we don't need to call cg_unescape()
1369                  * here. */
1370
1371                 if (!session_id_valid(buf))
1372                         return false;
1373
1374                 p += n;
1375                 p += strspn(p, "/");
1376                 return p;
1377         }
1378
1379         return NULL;
1380 }
1381
1382 /**
1383  * Skip user@*.service, but require it to be there.
1384  */
1385 static const char *skip_user_manager(const char *p) {
1386         size_t n;
1387
1388         if (isempty(p))
1389                 return NULL;
1390
1391         p += strspn(p, "/");
1392
1393         n = strcspn(p, "/");
1394         if (n < strlen("user@x.service"))
1395                 return NULL;
1396
1397         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1398                 char buf[n - 5 - 8 + 1];
1399
1400                 memcpy(buf, p + 5, n - 5 - 8);
1401                 buf[n - 5 - 8] = 0;
1402
1403                 /* Note that user manager services never need unescaping,
1404                  * since they cannot conflict with the kernel's own
1405                  * names, hence we don't need to call cg_unescape()
1406                  * here. */
1407
1408                 if (parse_uid(buf, NULL) < 0)
1409                         return NULL;
1410
1411                 p += n;
1412                 p += strspn(p, "/");
1413
1414                 return p;
1415         }
1416
1417         return NULL;
1418 }
1419
1420 static const char *skip_user_prefix(const char *path) {
1421         const char *e, *t;
1422
1423         assert(path);
1424
1425         /* Skip slices, if there are any */
1426         e = skip_slices(path);
1427
1428         /* Skip the user manager, if it's in the path now... */
1429         t = skip_user_manager(e);
1430         if (t)
1431                 return t;
1432
1433         /* Alternatively skip the user session if it is in the path... */
1434         return skip_session(e);
1435 }
1436
1437 int cg_path_get_user_unit(const char *path, char **ret) {
1438         const char *t;
1439
1440         assert(path);
1441         assert(ret);
1442
1443         t = skip_user_prefix(path);
1444         if (!t)
1445                 return -ENXIO;
1446
1447         /* And from here on it looks pretty much the same as for a
1448          * system unit, hence let's use the same parser from here
1449          * on. */
1450         return cg_path_get_unit(t, ret);
1451 }
1452
1453 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1454         _cleanup_free_ char *cgroup = NULL;
1455         int r;
1456
1457         assert(unit);
1458
1459         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1460         if (r < 0)
1461                 return r;
1462
1463         return cg_path_get_user_unit(cgroup, unit);
1464 }
1465
1466 int cg_path_get_machine_name(const char *path, char **machine) {
1467         _cleanup_free_ char *u = NULL;
1468         const char *sl;
1469         int r;
1470
1471         r = cg_path_get_unit(path, &u);
1472         if (r < 0)
1473                 return r;
1474
1475         sl = strjoina("/run/systemd/machines/unit:", u);
1476         return readlink_malloc(sl, machine);
1477 }
1478
1479 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1480         _cleanup_free_ char *cgroup = NULL;
1481         int r;
1482
1483         assert(machine);
1484
1485         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1486         if (r < 0)
1487                 return r;
1488
1489         return cg_path_get_machine_name(cgroup, machine);
1490 }
1491
1492 int cg_path_get_session(const char *path, char **session) {
1493         _cleanup_free_ char *unit = NULL;
1494         char *start, *end;
1495         int r;
1496
1497         assert(path);
1498
1499         r = cg_path_get_unit(path, &unit);
1500         if (r < 0)
1501                 return r;
1502
1503         start = startswith(unit, "session-");
1504         if (!start)
1505                 return -ENXIO;
1506         end = endswith(start, ".scope");
1507         if (!end)
1508                 return -ENXIO;
1509
1510         *end = 0;
1511         if (!session_id_valid(start))
1512                 return -ENXIO;
1513
1514         if (session) {
1515                 char *rr;
1516
1517                 rr = strdup(start);
1518                 if (!rr)
1519                         return -ENOMEM;
1520
1521                 *session = rr;
1522         }
1523
1524         return 0;
1525 }
1526
1527 int cg_pid_get_session(pid_t pid, char **session) {
1528         _cleanup_free_ char *cgroup = NULL;
1529         int r;
1530
1531         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1532         if (r < 0)
1533                 return r;
1534
1535         return cg_path_get_session(cgroup, session);
1536 }
1537
1538 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1539         _cleanup_free_ char *slice = NULL;
1540         char *start, *end;
1541         int r;
1542
1543         assert(path);
1544
1545         r = cg_path_get_slice(path, &slice);
1546         if (r < 0)
1547                 return r;
1548
1549         start = startswith(slice, "user-");
1550         if (!start)
1551                 return -ENXIO;
1552         end = endswith(start, ".slice");
1553         if (!end)
1554                 return -ENXIO;
1555
1556         *end = 0;
1557         if (parse_uid(start, uid) < 0)
1558                 return -ENXIO;
1559
1560         return 0;
1561 }
1562
1563 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1564         _cleanup_free_ char *cgroup = NULL;
1565         int r;
1566
1567         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1568         if (r < 0)
1569                 return r;
1570
1571         return cg_path_get_owner_uid(cgroup, uid);
1572 }
1573
1574 int cg_path_get_slice(const char *p, char **slice) {
1575         const char *e = NULL;
1576
1577         assert(p);
1578         assert(slice);
1579
1580         /* Finds the right-most slice unit from the beginning, but
1581          * stops before we come to the first non-slice unit. */
1582
1583         for (;;) {
1584                 size_t n;
1585
1586                 p += strspn(p, "/");
1587
1588                 n = strcspn(p, "/");
1589                 if (!valid_slice_name(p, n)) {
1590
1591                         if (!e) {
1592                                 char *s;
1593
1594                                 s = strdup("-.slice");
1595                                 if (!s)
1596                                         return -ENOMEM;
1597
1598                                 *slice = s;
1599                                 return 0;
1600                         }
1601
1602                         return cg_path_decode_unit(e, slice);
1603                 }
1604
1605                 e = p;
1606                 p += n;
1607         }
1608 }
1609
1610 int cg_pid_get_slice(pid_t pid, char **slice) {
1611         _cleanup_free_ char *cgroup = NULL;
1612         int r;
1613
1614         assert(slice);
1615
1616         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1617         if (r < 0)
1618                 return r;
1619
1620         return cg_path_get_slice(cgroup, slice);
1621 }
1622
1623 int cg_path_get_user_slice(const char *p, char **slice) {
1624         const char *t;
1625         assert(p);
1626         assert(slice);
1627
1628         t = skip_user_prefix(p);
1629         if (!t)
1630                 return -ENXIO;
1631
1632         /* And now it looks pretty much the same as for a system
1633          * slice, so let's just use the same parser from here on. */
1634         return cg_path_get_slice(t, slice);
1635 }
1636
1637 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1638         _cleanup_free_ char *cgroup = NULL;
1639         int r;
1640
1641         assert(slice);
1642
1643         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1644         if (r < 0)
1645                 return r;
1646
1647         return cg_path_get_user_slice(cgroup, slice);
1648 }
1649
1650 char *cg_escape(const char *p) {
1651         bool need_prefix = false;
1652
1653         /* This implements very minimal escaping for names to be used
1654          * as file names in the cgroup tree: any name which might
1655          * conflict with a kernel name or is prefixed with '_' is
1656          * prefixed with a '_'. That way, when reading cgroup names it
1657          * is sufficient to remove a single prefixing underscore if
1658          * there is one. */
1659
1660         /* The return value of this function (unlike cg_unescape())
1661          * needs free()! */
1662
1663         if (p[0] == 0 ||
1664             p[0] == '_' ||
1665             p[0] == '.' ||
1666             streq(p, "notify_on_release") ||
1667             streq(p, "release_agent") ||
1668             streq(p, "tasks") ||
1669             startswith(p, "cgroup."))
1670                 need_prefix = true;
1671         else {
1672                 const char *dot;
1673
1674                 dot = strrchr(p, '.');
1675                 if (dot) {
1676                         CGroupController c;
1677                         size_t l = dot - p;
1678
1679                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1680                                 const char *n;
1681
1682                                 n = cgroup_controller_to_string(c);
1683
1684                                 if (l != strlen(n))
1685                                         continue;
1686
1687                                 if (memcmp(p, n, l) != 0)
1688                                         continue;
1689
1690                                 need_prefix = true;
1691                                 break;
1692                         }
1693                 }
1694         }
1695
1696         if (need_prefix)
1697                 return strappend("_", p);
1698
1699         return strdup(p);
1700 }
1701
1702 char *cg_unescape(const char *p) {
1703         assert(p);
1704
1705         /* The return value of this function (unlike cg_escape())
1706          * doesn't need free()! */
1707
1708         if (p[0] == '_')
1709                 return (char*) p+1;
1710
1711         return (char*) p;
1712 }
1713
1714 #define CONTROLLER_VALID                        \
1715         DIGITS LETTERS                          \
1716         "_"
1717
1718 bool cg_controller_is_valid(const char *p) {
1719         const char *t, *s;
1720
1721         if (!p)
1722                 return false;
1723
1724         s = startswith(p, "name=");
1725         if (s)
1726                 p = s;
1727
1728         if (*p == 0 || *p == '_')
1729                 return false;
1730
1731         for (t = p; *t; t++)
1732                 if (!strchr(CONTROLLER_VALID, *t))
1733                         return false;
1734
1735         if (t - p > FILENAME_MAX)
1736                 return false;
1737
1738         return true;
1739 }
1740
1741 int cg_slice_to_path(const char *unit, char **ret) {
1742         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1743         const char *dash;
1744         int r;
1745
1746         assert(unit);
1747         assert(ret);
1748
1749         if (streq(unit, "-.slice")) {
1750                 char *x;
1751
1752                 x = strdup("");
1753                 if (!x)
1754                         return -ENOMEM;
1755                 *ret = x;
1756                 return 0;
1757         }
1758
1759         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1760                 return -EINVAL;
1761
1762         if (!endswith(unit, ".slice"))
1763                 return -EINVAL;
1764
1765         r = unit_name_to_prefix(unit, &p);
1766         if (r < 0)
1767                 return r;
1768
1769         dash = strchr(p, '-');
1770
1771         /* Don't allow initial dashes */
1772         if (dash == p)
1773                 return -EINVAL;
1774
1775         while (dash) {
1776                 _cleanup_free_ char *escaped = NULL;
1777                 char n[dash - p + sizeof(".slice")];
1778
1779                 /* Don't allow trailing or double dashes */
1780                 if (dash[1] == 0 || dash[1] == '-')
1781                         return -EINVAL;
1782
1783                 strcpy(stpncpy(n, p, dash - p), ".slice");
1784                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1785                         return -EINVAL;
1786
1787                 escaped = cg_escape(n);
1788                 if (!escaped)
1789                         return -ENOMEM;
1790
1791                 if (!strextend(&s, escaped, "/", NULL))
1792                         return -ENOMEM;
1793
1794                 dash = strchr(dash+1, '-');
1795         }
1796
1797         e = cg_escape(unit);
1798         if (!e)
1799                 return -ENOMEM;
1800
1801         if (!strextend(&s, e, NULL))
1802                 return -ENOMEM;
1803
1804         *ret = s;
1805         s = NULL;
1806
1807         return 0;
1808 }
1809
1810 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1811         _cleanup_free_ char *p = NULL;
1812         int r;
1813
1814         r = cg_get_path(controller, path, attribute, &p);
1815         if (r < 0)
1816                 return r;
1817
1818         return write_string_file(p, value, 0);
1819 }
1820
1821 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1822         _cleanup_free_ char *p = NULL;
1823         int r;
1824
1825         r = cg_get_path(controller, path, attribute, &p);
1826         if (r < 0)
1827                 return r;
1828
1829         return read_one_line_file(p, ret);
1830 }
1831
1832 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1833         CGroupController c;
1834         int r, unified;
1835
1836         /* This one will create a cgroup in our private tree, but also
1837          * duplicate it in the trees specified in mask, and remove it
1838          * in all others */
1839
1840         /* First create the cgroup in our own hierarchy. */
1841         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1842         if (r < 0)
1843                 return r;
1844
1845         /* If we are in the unified hierarchy, we are done now */
1846         unified = cg_unified();
1847         if (unified < 0)
1848                 return unified;
1849         if (unified > 0)
1850                 return 0;
1851
1852         /* Otherwise, do the same in the other hierarchies */
1853         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1854                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1855                 const char *n;
1856
1857                 n = cgroup_controller_to_string(c);
1858
1859                 if (mask & bit)
1860                         (void) cg_create(n, path);
1861                 else if (supported & bit)
1862                         (void) cg_trim(n, path, true);
1863         }
1864
1865         return 0;
1866 }
1867
1868 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1869         CGroupController c;
1870         int r, unified;
1871
1872         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1873         if (r < 0)
1874                 return r;
1875
1876         unified = cg_unified();
1877         if (unified < 0)
1878                 return unified;
1879         if (unified > 0)
1880                 return 0;
1881
1882         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1883                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1884                 const char *p = NULL;
1885
1886                 if (!(supported & bit))
1887                         continue;
1888
1889                 if (path_callback)
1890                         p = path_callback(bit, userdata);
1891
1892                 if (!p)
1893                         p = path;
1894
1895                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
1896         }
1897
1898         return 0;
1899 }
1900
1901 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1902         Iterator i;
1903         void *pidp;
1904         int r = 0;
1905
1906         SET_FOREACH(pidp, pids, i) {
1907                 pid_t pid = PTR_TO_PID(pidp);
1908                 int q;
1909
1910                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1911                 if (q < 0 && r >= 0)
1912                         r = q;
1913         }
1914
1915         return r;
1916 }
1917
1918 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1919         CGroupController c;
1920         int r = 0, unified;
1921
1922         if (!path_equal(from, to))  {
1923                 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true);
1924                 if (r < 0)
1925                         return r;
1926         }
1927
1928         unified = cg_unified();
1929         if (unified < 0)
1930                 return unified;
1931         if (unified > 0)
1932                 return r;
1933
1934         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1935                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1936                 const char *p = NULL;
1937
1938                 if (!(supported & bit))
1939                         continue;
1940
1941                 if (to_callback)
1942                         p = to_callback(bit, userdata);
1943
1944                 if (!p)
1945                         p = to;
1946
1947                 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, false, false);
1948         }
1949
1950         return 0;
1951 }
1952
1953 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
1954         CGroupController c;
1955         int r, unified;
1956
1957         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
1958         if (r < 0)
1959                 return r;
1960
1961         unified = cg_unified();
1962         if (unified < 0)
1963                 return unified;
1964         if (unified > 0)
1965                 return r;
1966
1967         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1968                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1969
1970                 if (!(supported & bit))
1971                         continue;
1972
1973                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
1974         }
1975
1976         return 0;
1977 }
1978
1979 int cg_mask_supported(CGroupMask *ret) {
1980         CGroupMask mask = 0;
1981         int r, unified;
1982
1983         /* Determines the mask of supported cgroup controllers. Only
1984          * includes controllers we can make sense of and that are
1985          * actually accessible. */
1986
1987         unified = cg_unified();
1988         if (unified < 0)
1989                 return unified;
1990         if (unified > 0) {
1991                 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
1992                 const char *c;
1993
1994                 /* In the unified hierarchy we can read the supported
1995                  * and accessible controllers from a the top-level
1996                  * cgroup attribute */
1997
1998                 r = cg_get_root_path(&root);
1999                 if (r < 0)
2000                         return r;
2001
2002                 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2003                 if (r < 0)
2004                         return r;
2005
2006                 r = read_one_line_file(path, &controllers);
2007                 if (r < 0)
2008                         return r;
2009
2010                 c = controllers;
2011                 for (;;) {
2012                         _cleanup_free_ char *n = NULL;
2013                         CGroupController v;
2014
2015                         r = extract_first_word(&c, &n, NULL, 0);
2016                         if (r < 0)
2017                                 return r;
2018                         if (r == 0)
2019                                 break;
2020
2021                         v = cgroup_controller_from_string(n);
2022                         if (v < 0)
2023                                 continue;
2024
2025                         mask |= CGROUP_CONTROLLER_TO_MASK(v);
2026                 }
2027
2028                 /* Currently, we only support the memory and pids
2029                  * controller in the unified hierarchy, mask
2030                  * everything else off. */
2031                 mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_PIDS;
2032
2033         } else {
2034                 CGroupController c;
2035
2036                 /* In the legacy hierarchy, we check whether which
2037                  * hierarchies are mounted. */
2038
2039                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2040                         const char *n;
2041
2042                         n = cgroup_controller_to_string(c);
2043                         if (controller_is_accessible(n) >= 0)
2044                                 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2045                 }
2046         }
2047
2048         *ret = mask;
2049         return 0;
2050 }
2051
2052 int cg_kernel_controllers(Set *controllers) {
2053         _cleanup_fclose_ FILE *f = NULL;
2054         char buf[LINE_MAX];
2055         int r;
2056
2057         assert(controllers);
2058
2059         /* Determines the full list of kernel-known controllers. Might
2060          * include controllers we don't actually support, arbitrary
2061          * named hierarchies and controllers that aren't currently
2062          * accessible (because not mounted). */
2063
2064         f = fopen("/proc/cgroups", "re");
2065         if (!f) {
2066                 if (errno == ENOENT)
2067                         return 0;
2068                 return -errno;
2069         }
2070
2071         /* Ignore the header line */
2072         (void) fgets(buf, sizeof(buf), f);
2073
2074         for (;;) {
2075                 char *controller;
2076                 int enabled = 0;
2077
2078                 errno = 0;
2079                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2080
2081                         if (feof(f))
2082                                 break;
2083
2084                         if (ferror(f) && errno != 0)
2085                                 return -errno;
2086
2087                         return -EBADMSG;
2088                 }
2089
2090                 if (!enabled) {
2091                         free(controller);
2092                         continue;
2093                 }
2094
2095                 if (!cg_controller_is_valid(controller)) {
2096                         free(controller);
2097                         return -EBADMSG;
2098                 }
2099
2100                 r = set_consume(controllers, controller);
2101                 if (r < 0)
2102                         return r;
2103         }
2104
2105         return 0;
2106 }
2107
2108 static thread_local int unified_cache = -1;
2109
2110 int cg_unified(void) {
2111         struct statfs fs;
2112
2113         /* Checks if we support the unified hierarchy. Returns an
2114          * error when the cgroup hierarchies aren't mounted yet or we
2115          * have any other trouble determining if the unified hierarchy
2116          * is supported. */
2117
2118         if (unified_cache >= 0)
2119                 return unified_cache;
2120
2121         if (statfs("/sys/fs/cgroup/", &fs) < 0)
2122                 return -errno;
2123
2124         if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC))
2125                 unified_cache = true;
2126         else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2127                 unified_cache = false;
2128         else
2129                 return -ENOEXEC;
2130
2131         return unified_cache;
2132 }
2133
2134 void cg_unified_flush(void) {
2135         unified_cache = -1;
2136 }
2137
2138 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2139         _cleanup_free_ char *fs = NULL;
2140         CGroupController c;
2141         int r, unified;
2142
2143         assert(p);
2144
2145         if (supported == 0)
2146                 return 0;
2147
2148         unified = cg_unified();
2149         if (unified < 0)
2150                 return unified;
2151         if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2152                 return 0;
2153
2154         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2155         if (r < 0)
2156                 return r;
2157
2158         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2159                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2160                 const char *n;
2161
2162                 if (!(supported & bit))
2163                         continue;
2164
2165                 n = cgroup_controller_to_string(c);
2166                 {
2167                         char s[1 + strlen(n) + 1];
2168
2169                         s[0] = mask & bit ? '+' : '-';
2170                         strcpy(s + 1, n);
2171
2172                         r = write_string_file(fs, s, 0);
2173                         if (r < 0)
2174                                 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2175                 }
2176         }
2177
2178         return 0;
2179 }
2180
2181 bool cg_is_unified_wanted(void) {
2182         static thread_local int wanted = -1;
2183         int r, unified;
2184
2185         /* If the hierarchy is already mounted, then follow whatever
2186          * was chosen for it. */
2187         unified = cg_unified();
2188         if (unified >= 0)
2189                 return unified;
2190
2191         /* Otherwise, let's see what the kernel command line has to
2192          * say. Since checking that is expensive, let's cache the
2193          * result. */
2194         if (wanted >= 0)
2195                 return wanted;
2196
2197         r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2198         if (r > 0)
2199                 return (wanted = true);
2200         else {
2201                 _cleanup_free_ char *value = NULL;
2202
2203                 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2204                 if (r < 0)
2205                         return false;
2206                 if (r == 0)
2207                         return (wanted = false);
2208
2209                 return (wanted = parse_boolean(value) > 0);
2210         }
2211 }
2212
2213 bool cg_is_legacy_wanted(void) {
2214         return !cg_is_unified_wanted();
2215 }
2216
2217 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2218         uint64_t u;
2219         int r;
2220
2221         if (isempty(s)) {
2222                 *ret = CGROUP_CPU_SHARES_INVALID;
2223                 return 0;
2224         }
2225
2226         r = safe_atou64(s, &u);
2227         if (r < 0)
2228                 return r;
2229
2230         if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2231                 return -ERANGE;
2232
2233         *ret = u;
2234         return 0;
2235 }
2236
2237 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2238         uint64_t u;
2239         int r;
2240
2241         if (isempty(s)) {
2242                 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2243                 return 0;
2244         }
2245
2246         r = safe_atou64(s, &u);
2247         if (r < 0)
2248                 return r;
2249
2250         if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2251                 return -ERANGE;
2252
2253         *ret = u;
2254         return 0;
2255 }
2256
2257 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2258         [CGROUP_CONTROLLER_CPU] = "cpu",
2259         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2260         [CGROUP_CONTROLLER_BLKIO] = "blkio",
2261         [CGROUP_CONTROLLER_MEMORY] = "memory",
2262         [CGROUP_CONTROLLER_DEVICES] = "devices",
2263         [CGROUP_CONTROLLER_PIDS] = "pids",
2264         [CGROUP_CONTROLLER_NET_CLS] = "net_cls",
2265 };
2266
2267 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);