src/basic/cgroup-util.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2010 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <dirent.h>
  23 #include <errno.h>
  24 #include <ftw.h>
  25 #include <signal.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28 #include <sys/stat.h>
  29 #include <sys/types.h>
  30 #include <unistd.h>
  31
  32 #include "cgroup-util.h"
  33 #include "extract-word.h"
  34 #include "fd-util.h"
  35 #include "fileio.h"
  36 #include "formats-util.h"
  37 #include "login-util.h"
  38 #include "macro.h"
  39 #include "mkdir.h"
  40 #include "path-util.h"
  41 #include "process-util.h"
  42 #include "set.h"
  43 #include "special.h"
  44 #include "string-util.h"
  45 #include "unit-name.h"
  46 #include "util.h"
  47
  48 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
  49         _cleanup_free_ char *fs = NULL;
  50         FILE *f;
  51         int r;
  52
  53         assert(_f);
  54
  55         r = cg_get_path(controller, path, "cgroup.procs", &fs);
  56         if (r < 0)
  57                 return r;
  58
  59         f = fopen(fs, "re");
  60         if (!f)
  61                 return -errno;
  62
  63         *_f = f;
  64         return 0;
  65 }
  66
  67 int cg_read_pid(FILE *f, pid_t *_pid) {
  68         unsigned long ul;
  69
  70         /* Note that the cgroup.procs might contain duplicates! See
  71          * cgroups.txt for details. */
  72
  73         assert(f);
  74         assert(_pid);
  75
  76         errno = 0;
  77         if (fscanf(f, "%lu", &ul) != 1) {
  78
  79                 if (feof(f))
  80                         return 0;
  81
  82                 return errno ? -errno : -EIO;
  83         }
  84
  85         if (ul <= 0)
  86                 return -EIO;
  87
  88         *_pid = (pid_t) ul;
  89         return 1;
  90 }
  91
  92 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
  93         _cleanup_free_ char *fs = NULL;
  94         int r;
  95         DIR *d;
  96
  97         assert(_d);
  98
  99         /* This is not recursive! */
 100
 101         r = cg_get_path(controller, path, NULL, &fs);
 102         if (r < 0)
 103                 return r;
 104
 105         d = opendir(fs);
 106         if (!d)
 107                 return -errno;
 108
 109         *_d = d;
 110         return 0;
 111 }
 112
 113 int cg_read_subgroup(DIR *d, char **fn) {
 114         struct dirent *de;
 115
 116         assert(d);
 117         assert(fn);
 118
 119         FOREACH_DIRENT_ALL(de, d, return -errno) {
 120                 char *b;
 121
 122                 if (de->d_type != DT_DIR)
 123                         continue;
 124
 125                 if (streq(de->d_name, ".") ||
 126                     streq(de->d_name, ".."))
 127                         continue;
 128
 129                 b = strdup(de->d_name);
 130                 if (!b)
 131                         return -ENOMEM;
 132
 133                 *fn = b;
 134                 return 1;
 135         }
 136
 137         return 0;
 138 }
 139
 140 int cg_rmdir(const char *controller, const char *path) {
 141         _cleanup_free_ char *p = NULL;
 142         int r;
 143
 144         r = cg_get_path(controller, path, NULL, &p);
 145         if (r < 0)
 146                 return r;
 147
 148         r = rmdir(p);
 149         if (r < 0 && errno != ENOENT)
 150                 return -errno;
 151
 152         return 0;
 153 }
 154
 155 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
 156         _cleanup_set_free_ Set *allocated_set = NULL;
 157         bool done = false;
 158         int r, ret = 0;
 159         pid_t my_pid;
 160
 161         assert(sig >= 0);
 162
 163         /* This goes through the tasks list and kills them all. This
 164          * is repeated until no further processes are added to the
 165          * tasks list, to properly handle forking processes */
 166
 167         if (!s) {
 168                 s = allocated_set = set_new(NULL);
 169                 if (!s)
 170                         return -ENOMEM;
 171         }
 172
 173         my_pid = getpid();
 174
 175         do {
 176                 _cleanup_fclose_ FILE *f = NULL;
 177                 pid_t pid = 0;
 178                 done = true;
 179
 180                 r = cg_enumerate_processes(controller, path, &f);
 181                 if (r < 0) {
 182                         if (ret >= 0 && r != -ENOENT)
 183                                 return r;
 184
 185                         return ret;
 186                 }
 187
 188                 while ((r = cg_read_pid(f, &pid)) > 0) {
 189
 190                         if (ignore_self && pid == my_pid)
 191                                 continue;
 192
 193                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 194                                 continue;
 195
 196                         /* If we haven't killed this process yet, kill
 197                          * it */
 198                         if (kill(pid, sig) < 0) {
 199                                 if (ret >= 0 && errno != ESRCH)
 200                                         ret = -errno;
 201                         } else {
 202                                 if (sigcont && sig != SIGKILL)
 203                                         (void) kill(pid, SIGCONT);
 204
 205                                 if (ret == 0)
 206                                         ret = 1;
 207                         }
 208
 209                         done = false;
 210
 211                         r = set_put(s, PID_TO_PTR(pid));
 212                         if (r < 0) {
 213                                 if (ret >= 0)
 214                                         return r;
 215
 216                                 return ret;
 217                         }
 218                 }
 219
 220                 if (r < 0) {
 221                         if (ret >= 0)
 222                                 return r;
 223
 224                         return ret;
 225                 }
 226
 227                 /* To avoid racing against processes which fork
 228                  * quicker than we can kill them we repeat this until
 229                  * no new pids need to be killed. */
 230
 231         } while (!done);
 232
 233         return ret;
 234 }
 235
 236 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
 237         _cleanup_set_free_ Set *allocated_set = NULL;
 238         _cleanup_closedir_ DIR *d = NULL;
 239         int r, ret;
 240         char *fn;
 241
 242         assert(path);
 243         assert(sig >= 0);
 244
 245         if (!s) {
 246                 s = allocated_set = set_new(NULL);
 247                 if (!s)
 248                         return -ENOMEM;
 249         }
 250
 251         ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
 252
 253         r = cg_enumerate_subgroups(controller, path, &d);
 254         if (r < 0) {
 255                 if (ret >= 0 && r != -ENOENT)
 256                         return r;
 257
 258                 return ret;
 259         }
 260
 261         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 262                 _cleanup_free_ char *p = NULL;
 263
 264                 p = strjoin(path, "/", fn, NULL);
 265                 free(fn);
 266                 if (!p)
 267                         return -ENOMEM;
 268
 269                 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
 270                 if (r != 0 && ret >= 0)
 271                         ret = r;
 272         }
 273
 274         if (ret >= 0 && r < 0)
 275                 ret = r;
 276
 277         if (rem) {
 278                 r = cg_rmdir(controller, path);
 279                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 280                         return r;
 281         }
 282
 283         return ret;
 284 }
 285
 286 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
 287         bool done = false;
 288         _cleanup_set_free_ Set *s = NULL;
 289         int r, ret = 0;
 290         pid_t my_pid;
 291
 292         assert(cfrom);
 293         assert(pfrom);
 294         assert(cto);
 295         assert(pto);
 296
 297         s = set_new(NULL);
 298         if (!s)
 299                 return -ENOMEM;
 300
 301         my_pid = getpid();
 302
 303         do {
 304                 _cleanup_fclose_ FILE *f = NULL;
 305                 pid_t pid = 0;
 306                 done = true;
 307
 308                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 309                 if (r < 0) {
 310                         if (ret >= 0 && r != -ENOENT)
 311                                 return r;
 312
 313                         return ret;
 314                 }
 315
 316                 while ((r = cg_read_pid(f, &pid)) > 0) {
 317
 318                         /* This might do weird stuff if we aren't a
 319                          * single-threaded program. However, we
 320                          * luckily know we are not */
 321                         if (ignore_self && pid == my_pid)
 322                                 continue;
 323
 324                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 325                                 continue;
 326
 327                         /* Ignore kernel threads. Since they can only
 328                          * exist in the root cgroup, we only check for
 329                          * them there. */
 330                         if (cfrom &&
 331                             (isempty(pfrom) || path_equal(pfrom, "/")) &&
 332                             is_kernel_thread(pid) > 0)
 333                                 continue;
 334
 335                         r = cg_attach(cto, pto, pid);
 336                         if (r < 0) {
 337                                 if (ret >= 0 && r != -ESRCH)
 338                                         ret = r;
 339                         } else if (ret == 0)
 340                                 ret = 1;
 341
 342                         done = false;
 343
 344                         r = set_put(s, PID_TO_PTR(pid));
 345                         if (r < 0) {
 346                                 if (ret >= 0)
 347                                         return r;
 348
 349                                 return ret;
 350                         }
 351                 }
 352
 353                 if (r < 0) {
 354                         if (ret >= 0)
 355                                 return r;
 356
 357                         return ret;
 358                 }
 359         } while (!done);
 360
 361         return ret;
 362 }
 363
 364 int cg_migrate_recursive(
 365                 const char *cfrom,
 366                 const char *pfrom,
 367                 const char *cto,
 368                 const char *pto,
 369                 bool ignore_self,
 370                 bool rem) {
 371
 372         _cleanup_closedir_ DIR *d = NULL;
 373         int r, ret = 0;
 374         char *fn;
 375
 376         assert(cfrom);
 377         assert(pfrom);
 378         assert(cto);
 379         assert(pto);
 380
 381         ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
 382
 383         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 384         if (r < 0) {
 385                 if (ret >= 0 && r != -ENOENT)
 386                         return r;
 387
 388                 return ret;
 389         }
 390
 391         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 392                 _cleanup_free_ char *p = NULL;
 393
 394                 p = strjoin(pfrom, "/", fn, NULL);
 395                 free(fn);
 396                 if (!p)
 397                         return -ENOMEM;
 398
 399                 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
 400                 if (r != 0 && ret >= 0)
 401                         ret = r;
 402         }
 403
 404         if (r < 0 && ret >= 0)
 405                 ret = r;
 406
 407         if (rem) {
 408                 r = cg_rmdir(cfrom, pfrom);
 409                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 410                         return r;
 411         }
 412
 413         return ret;
 414 }
 415
 416 int cg_migrate_recursive_fallback(
 417                 const char *cfrom,
 418                 const char *pfrom,
 419                 const char *cto,
 420                 const char *pto,
 421                 bool ignore_self,
 422                 bool rem) {
 423
 424         int r;
 425
 426         assert(cfrom);
 427         assert(pfrom);
 428         assert(cto);
 429         assert(pto);
 430
 431         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem);
 432         if (r < 0) {
 433                 char prefix[strlen(pto) + 1];
 434
 435                 /* This didn't work? Then let's try all prefixes of the destination */
 436
 437                 PATH_FOREACH_PREFIX(prefix, pto) {
 438                         int q;
 439
 440                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem);
 441                         if (q >= 0)
 442                                 return q;
 443                 }
 444         }
 445
 446         return r;
 447 }
 448
 449 static const char *controller_to_dirname(const char *controller) {
 450         const char *e;
 451
 452         assert(controller);
 453
 454         /* Converts a controller name to the directory name below
 455          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
 456          * just cuts off the name= prefixed used for named
 457          * hierarchies, if it is specified. */
 458
 459         e = startswith(controller, "name=");
 460         if (e)
 461                 return e;
 462
 463         return controller;
 464 }
 465
 466 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
 467         const char *dn;
 468         char *t = NULL;
 469
 470         assert(fs);
 471         assert(controller);
 472
 473         dn = controller_to_dirname(controller);
 474
 475         if (isempty(path) && isempty(suffix))
 476                 t = strappend("/sys/fs/cgroup/", dn);
 477         else if (isempty(path))
 478                 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL);
 479         else if (isempty(suffix))
 480                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL);
 481         else
 482                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL);
 483         if (!t)
 484                 return -ENOMEM;
 485
 486         *fs = t;
 487         return 0;
 488 }
 489
 490 static int join_path_unified(const char *path, const char *suffix, char **fs) {
 491         char *t;
 492
 493         assert(fs);
 494
 495         if (isempty(path) && isempty(suffix))
 496                 t = strdup("/sys/fs/cgroup");
 497         else if (isempty(path))
 498                 t = strappend("/sys/fs/cgroup/", suffix);
 499         else if (isempty(suffix))
 500                 t = strappend("/sys/fs/cgroup/", path);
 501         else
 502                 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
 503         if (!t)
 504                 return -ENOMEM;
 505
 506         *fs = t;
 507         return 0;
 508 }
 509
 510 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
 511         int unified, r;
 512
 513         assert(fs);
 514
 515         if (!controller) {
 516                 char *t;
 517
 518                 /* If no controller is specified, we return the path
 519                  * *below* the controllers, without any prefix. */
 520
 521                 if (!path && !suffix)
 522                         return -EINVAL;
 523
 524                 if (!suffix)
 525                         t = strdup(path);
 526                 else if (!path)
 527                         t = strdup(suffix);
 528                 else
 529                         t = strjoin(path, "/", suffix, NULL);
 530                 if (!t)
 531                         return -ENOMEM;
 532
 533                 *fs = path_kill_slashes(t);
 534                 return 0;
 535         }
 536
 537         if (!cg_controller_is_valid(controller))
 538                 return -EINVAL;
 539
 540         unified = cg_unified();
 541         if (unified < 0)
 542                 return unified;
 543
 544         if (unified > 0)
 545                 r = join_path_unified(path, suffix, fs);
 546         else
 547                 r = join_path_legacy(controller, path, suffix, fs);
 548         if (r < 0)
 549                 return r;
 550
 551         path_kill_slashes(*fs);
 552         return 0;
 553 }
 554
 555 static int controller_is_accessible(const char *controller) {
 556         int unified;
 557
 558         assert(controller);
 559
 560         /* Checks whether a specific controller is accessible,
 561          * i.e. its hierarchy mounted. In the unified hierarchy all
 562          * controllers are considered accessible, except for the named
 563          * hierarchies */
 564
 565         if (!cg_controller_is_valid(controller))
 566                 return -EINVAL;
 567
 568         unified = cg_unified();
 569         if (unified < 0)
 570                 return unified;
 571         if (unified > 0) {
 572                 /* We don't support named hierarchies if we are using
 573                  * the unified hierarchy. */
 574
 575                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
 576                         return 0;
 577
 578                 if (startswith(controller, "name="))
 579                         return -EOPNOTSUPP;
 580
 581         } else {
 582                 const char *cc, *dn;
 583
 584                 dn = controller_to_dirname(controller);
 585                 cc = strjoina("/sys/fs/cgroup/", dn);
 586
 587                 if (laccess(cc, F_OK) < 0)
 588                         return -errno;
 589         }
 590
 591         return 0;
 592 }
 593
 594 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
 595         int r;
 596
 597         assert(controller);
 598         assert(fs);
 599
 600         /* Check if the specified controller is actually accessible */
 601         r = controller_is_accessible(controller);
 602         if (r < 0)
 603                 return r;
 604
 605         return cg_get_path(controller, path, suffix, fs);
 606 }
 607
 608 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 609         assert(path);
 610         assert(sb);
 611         assert(ftwbuf);
 612
 613         if (typeflag != FTW_DP)
 614                 return 0;
 615
 616         if (ftwbuf->level < 1)
 617                 return 0;
 618
 619         (void) rmdir(path);
 620         return 0;
 621 }
 622
 623 int cg_trim(const char *controller, const char *path, bool delete_root) {
 624         _cleanup_free_ char *fs = NULL;
 625         int r = 0;
 626
 627         assert(path);
 628
 629         r = cg_get_path(controller, path, NULL, &fs);
 630         if (r < 0)
 631                 return r;
 632
 633         errno = 0;
 634         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 635                 if (errno == ENOENT)
 636                         r = 0;
 637                 else if (errno != 0)
 638                         r = -errno;
 639                 else
 640                         r = -EIO;
 641         }
 642
 643         if (delete_root) {
 644                 if (rmdir(fs) < 0 && errno != ENOENT)
 645                         return -errno;
 646         }
 647
 648         return r;
 649 }
 650
 651 int cg_create(const char *controller, const char *path) {
 652         _cleanup_free_ char *fs = NULL;
 653         int r;
 654
 655         r = cg_get_path_and_check(controller, path, NULL, &fs);
 656         if (r < 0)
 657                 return r;
 658
 659         r = mkdir_parents(fs, 0755);
 660         if (r < 0)
 661                 return r;
 662
 663         if (mkdir(fs, 0755) < 0) {
 664
 665                 if (errno == EEXIST)
 666                         return 0;
 667
 668                 return -errno;
 669         }
 670
 671         return 1;
 672 }
 673
 674 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 675         int r, q;
 676
 677         assert(pid >= 0);
 678
 679         r = cg_create(controller, path);
 680         if (r < 0)
 681                 return r;
 682
 683         q = cg_attach(controller, path, pid);
 684         if (q < 0)
 685                 return q;
 686
 687         /* This does not remove the cgroup on failure */
 688         return r;
 689 }
 690
 691 int cg_attach(const char *controller, const char *path, pid_t pid) {
 692         _cleanup_free_ char *fs = NULL;
 693         char c[DECIMAL_STR_MAX(pid_t) + 2];
 694         int r;
 695
 696         assert(path);
 697         assert(pid >= 0);
 698
 699         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 700         if (r < 0)
 701                 return r;
 702
 703         if (pid == 0)
 704                 pid = getpid();
 705
 706         snprintf(c, sizeof(c), PID_FMT"\n", pid);
 707
 708         return write_string_file(fs, c, 0);
 709 }
 710
 711 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 712         int r;
 713
 714         assert(controller);
 715         assert(path);
 716         assert(pid >= 0);
 717
 718         r = cg_attach(controller, path, pid);
 719         if (r < 0) {
 720                 char prefix[strlen(path) + 1];
 721
 722                 /* This didn't work? Then let's try all prefixes of
 723                  * the destination */
 724
 725                 PATH_FOREACH_PREFIX(prefix, path) {
 726                         int q;
 727
 728                         q = cg_attach(controller, prefix, pid);
 729                         if (q >= 0)
 730                                 return q;
 731                 }
 732         }
 733
 734         return r;
 735 }
 736
 737 int cg_set_group_access(
 738                 const char *controller,
 739                 const char *path,
 740                 mode_t mode,
 741                 uid_t uid,
 742                 gid_t gid) {
 743
 744         _cleanup_free_ char *fs = NULL;
 745         int r;
 746
 747         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 748                 return 0;
 749
 750         if (mode != MODE_INVALID)
 751                 mode &= 0777;
 752
 753         r = cg_get_path(controller, path, NULL, &fs);
 754         if (r < 0)
 755                 return r;
 756
 757         return chmod_and_chown(fs, mode, uid, gid);
 758 }
 759
 760 int cg_set_task_access(
 761                 const char *controller,
 762                 const char *path,
 763                 mode_t mode,
 764                 uid_t uid,
 765                 gid_t gid) {
 766
 767         _cleanup_free_ char *fs = NULL, *procs = NULL;
 768         int r, unified;
 769
 770         assert(path);
 771
 772         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 773                 return 0;
 774
 775         if (mode != MODE_INVALID)
 776                 mode &= 0666;
 777
 778         r = cg_get_path(controller, path, "cgroup.procs", &fs);
 779         if (r < 0)
 780                 return r;
 781
 782         r = chmod_and_chown(fs, mode, uid, gid);
 783         if (r < 0)
 784                 return r;
 785
 786         unified = cg_unified();
 787         if (unified < 0)
 788                 return unified;
 789         if (unified)
 790                 return 0;
 791
 792         /* Compatibility, Always keep values for "tasks" in sync with
 793          * "cgroup.procs" */
 794         if (cg_get_path(controller, path, "tasks", &procs) >= 0)
 795                 (void) chmod_and_chown(procs, mode, uid, gid);
 796
 797         return 0;
 798 }
 799
 800 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
 801         _cleanup_fclose_ FILE *f = NULL;
 802         char line[LINE_MAX];
 803         const char *fs;
 804         size_t cs = 0;
 805         int unified;
 806
 807         assert(path);
 808         assert(pid >= 0);
 809
 810         unified = cg_unified();
 811         if (unified < 0)
 812                 return unified;
 813         if (unified == 0) {
 814                 if (controller) {
 815                         if (!cg_controller_is_valid(controller))
 816                                 return -EINVAL;
 817                 } else
 818                         controller = SYSTEMD_CGROUP_CONTROLLER;
 819
 820                 cs = strlen(controller);
 821         }
 822
 823         fs = procfs_file_alloca(pid, "cgroup");
 824         f = fopen(fs, "re");
 825         if (!f)
 826                 return errno == ENOENT ? -ESRCH : -errno;
 827
 828         FOREACH_LINE(line, f, return -errno) {
 829                 char *e, *p;
 830
 831                 truncate_nl(line);
 832
 833                 if (unified) {
 834                         e = startswith(line, "0:");
 835                         if (!e)
 836                                 continue;
 837
 838                         e = strchr(e, ':');
 839                         if (!e)
 840                                 continue;
 841                 } else {
 842                         char *l;
 843                         size_t k;
 844                         const char *word, *state;
 845                         bool found = false;
 846
 847                         l = strchr(line, ':');
 848                         if (!l)
 849                                 continue;
 850
 851                         l++;
 852                         e = strchr(l, ':');
 853                         if (!e)
 854                                 continue;
 855
 856                         *e = 0;
 857                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
 858                                 if (k == cs && memcmp(word, controller, cs) == 0) {
 859                                         found = true;
 860                                         break;
 861                                 }
 862                         }
 863
 864                         if (!found)
 865                                 continue;
 866                 }
 867
 868                 p = strdup(e + 1);
 869                 if (!p)
 870                         return -ENOMEM;
 871
 872                 *path = p;
 873                 return 0;
 874         }
 875
 876         return -ENODATA;
 877 }
 878
 879 int cg_install_release_agent(const char *controller, const char *agent) {
 880         _cleanup_free_ char *fs = NULL, *contents = NULL;
 881         const char *sc;
 882         int r, unified;
 883
 884         assert(agent);
 885
 886         unified = cg_unified();
 887         if (unified < 0)
 888                 return unified;
 889         if (unified) /* doesn't apply to unified hierarchy */
 890                 return -EOPNOTSUPP;
 891
 892         r = cg_get_path(controller, NULL, "release_agent", &fs);
 893         if (r < 0)
 894                 return r;
 895
 896         r = read_one_line_file(fs, &contents);
 897         if (r < 0)
 898                 return r;
 899
 900         sc = strstrip(contents);
 901         if (isempty(sc)) {
 902                 r = write_string_file(fs, agent, 0);
 903                 if (r < 0)
 904                         return r;
 905         } else if (!path_equal(sc, agent))
 906                 return -EEXIST;
 907
 908         fs = mfree(fs);
 909         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 910         if (r < 0)
 911                 return r;
 912
 913         contents = mfree(contents);
 914         r = read_one_line_file(fs, &contents);
 915         if (r < 0)
 916                 return r;
 917
 918         sc = strstrip(contents);
 919         if (streq(sc, "0")) {
 920                 r = write_string_file(fs, "1", 0);
 921                 if (r < 0)
 922                         return r;
 923
 924                 return 1;
 925         }
 926
 927         if (!streq(sc, "1"))
 928                 return -EIO;
 929
 930         return 0;
 931 }
 932
 933 int cg_uninstall_release_agent(const char *controller) {
 934         _cleanup_free_ char *fs = NULL;
 935         int r, unified;
 936
 937         unified = cg_unified();
 938         if (unified < 0)
 939                 return unified;
 940         if (unified) /* Doesn't apply to unified hierarchy */
 941                 return -EOPNOTSUPP;
 942
 943         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 944         if (r < 0)
 945                 return r;
 946
 947         r = write_string_file(fs, "0", 0);
 948         if (r < 0)
 949                 return r;
 950
 951         fs = mfree(fs);
 952
 953         r = cg_get_path(controller, NULL, "release_agent", &fs);
 954         if (r < 0)
 955                 return r;
 956
 957         r = write_string_file(fs, "", 0);
 958         if (r < 0)
 959                 return r;
 960
 961         return 0;
 962 }
 963
 964 int cg_is_empty(const char *controller, const char *path) {
 965         _cleanup_fclose_ FILE *f = NULL;
 966         pid_t pid;
 967         int r;
 968
 969         assert(path);
 970
 971         r = cg_enumerate_processes(controller, path, &f);
 972         if (r == -ENOENT)
 973                 return 1;
 974         if (r < 0)
 975                 return r;
 976
 977         r = cg_read_pid(f, &pid);
 978         if (r < 0)
 979                 return r;
 980
 981         return r == 0;
 982 }
 983
 984 int cg_is_empty_recursive(const char *controller, const char *path) {
 985         int unified, r;
 986
 987         assert(path);
 988
 989         /* The root cgroup is always populated */
 990         if (controller && (isempty(path) || path_equal(path, "/")))
 991                 return false;
 992
 993         unified = cg_unified();
 994         if (unified < 0)
 995                 return unified;
 996
 997         if (unified > 0) {
 998                 _cleanup_free_ char *populated = NULL, *t = NULL;
 999
1000                 /* On the unified hierarchy we can check empty state
1001                  * via the "cgroup.populated" attribute. */
1002
1003                 r = cg_get_path(controller, path, "cgroup.populated", &populated);
1004                 if (r < 0)
1005                         return r;
1006
1007                 r = read_one_line_file(populated, &t);
1008                 if (r == -ENOENT)
1009                         return 1;
1010                 if (r < 0)
1011                         return r;
1012
1013                 return streq(t, "0");
1014         } else {
1015                 _cleanup_closedir_ DIR *d = NULL;
1016                 char *fn;
1017
1018                 r = cg_is_empty(controller, path);
1019                 if (r <= 0)
1020                         return r;
1021
1022                 r = cg_enumerate_subgroups(controller, path, &d);
1023                 if (r == -ENOENT)
1024                         return 1;
1025                 if (r < 0)
1026                         return r;
1027
1028                 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1029                         _cleanup_free_ char *p = NULL;
1030
1031                         p = strjoin(path, "/", fn, NULL);
1032                         free(fn);
1033                         if (!p)
1034                                 return -ENOMEM;
1035
1036                         r = cg_is_empty_recursive(controller, p);
1037                         if (r <= 0)
1038                                 return r;
1039                 }
1040                 if (r < 0)
1041                         return r;
1042
1043                 return true;
1044         }
1045 }
1046
1047 int cg_split_spec(const char *spec, char **controller, char **path) {
1048         char *t = NULL, *u = NULL;
1049         const char *e;
1050
1051         assert(spec);
1052
1053         if (*spec == '/') {
1054                 if (!path_is_safe(spec))
1055                         return -EINVAL;
1056
1057                 if (path) {
1058                         t = strdup(spec);
1059                         if (!t)
1060                                 return -ENOMEM;
1061
1062                         *path = path_kill_slashes(t);
1063                 }
1064
1065                 if (controller)
1066                         *controller = NULL;
1067
1068                 return 0;
1069         }
1070
1071         e = strchr(spec, ':');
1072         if (!e) {
1073                 if (!cg_controller_is_valid(spec))
1074                         return -EINVAL;
1075
1076                 if (controller) {
1077                         t = strdup(spec);
1078                         if (!t)
1079                                 return -ENOMEM;
1080
1081                         *controller = t;
1082                 }
1083
1084                 if (path)
1085                         *path = NULL;
1086
1087                 return 0;
1088         }
1089
1090         t = strndup(spec, e-spec);
1091         if (!t)
1092                 return -ENOMEM;
1093         if (!cg_controller_is_valid(t)) {
1094                 free(t);
1095                 return -EINVAL;
1096         }
1097
1098         if (isempty(e+1))
1099                 u = NULL;
1100         else {
1101                 u = strdup(e+1);
1102                 if (!u) {
1103                         free(t);
1104                         return -ENOMEM;
1105                 }
1106
1107                 if (!path_is_safe(u) ||
1108                     !path_is_absolute(u)) {
1109                         free(t);
1110                         free(u);
1111                         return -EINVAL;
1112                 }
1113
1114                 path_kill_slashes(u);
1115         }
1116
1117         if (controller)
1118                 *controller = t;
1119         else
1120                 free(t);
1121
1122         if (path)
1123                 *path = u;
1124         else
1125                 free(u);
1126
1127         return 0;
1128 }
1129
1130 int cg_mangle_path(const char *path, char **result) {
1131         _cleanup_free_ char *c = NULL, *p = NULL;
1132         char *t;
1133         int r;
1134
1135         assert(path);
1136         assert(result);
1137
1138         /* First, check if it already is a filesystem path */
1139         if (path_startswith(path, "/sys/fs/cgroup")) {
1140
1141                 t = strdup(path);
1142                 if (!t)
1143                         return -ENOMEM;
1144
1145                 *result = path_kill_slashes(t);
1146                 return 0;
1147         }
1148
1149         /* Otherwise, treat it as cg spec */
1150         r = cg_split_spec(path, &c, &p);
1151         if (r < 0)
1152                 return r;
1153
1154         return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1155 }
1156
1157 int cg_get_root_path(char **path) {
1158         char *p, *e;
1159         int r;
1160
1161         assert(path);
1162
1163         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1164         if (r < 0)
1165                 return r;
1166
1167         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1168         if (!e)
1169                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1170         if (!e)
1171                 e = endswith(p, "/system"); /* even more legacy */
1172         if (e)
1173                 *e = 0;
1174
1175         *path = p;
1176         return 0;
1177 }
1178
1179 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1180         _cleanup_free_ char *rt = NULL;
1181         char *p;
1182         int r;
1183
1184         assert(cgroup);
1185         assert(shifted);
1186
1187         if (!root) {
1188                 /* If the root was specified let's use that, otherwise
1189                  * let's determine it from PID 1 */
1190
1191                 r = cg_get_root_path(&rt);
1192                 if (r < 0)
1193                         return r;
1194
1195                 root = rt;
1196         }
1197
1198         p = path_startswith(cgroup, root);
1199         if (p && p > cgroup)
1200                 *shifted = p - 1;
1201         else
1202                 *shifted = cgroup;
1203
1204         return 0;
1205 }
1206
1207 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1208         _cleanup_free_ char *raw = NULL;
1209         const char *c;
1210         int r;
1211
1212         assert(pid >= 0);
1213         assert(cgroup);
1214
1215         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1216         if (r < 0)
1217                 return r;
1218
1219         r = cg_shift_path(raw, root, &c);
1220         if (r < 0)
1221                 return r;
1222
1223         if (c == raw) {
1224                 *cgroup = raw;
1225                 raw = NULL;
1226         } else {
1227                 char *n;
1228
1229                 n = strdup(c);
1230                 if (!n)
1231                         return -ENOMEM;
1232
1233                 *cgroup = n;
1234         }
1235
1236         return 0;
1237 }
1238
1239 int cg_path_decode_unit(const char *cgroup, char **unit){
1240         char *c, *s;
1241         size_t n;
1242
1243         assert(cgroup);
1244         assert(unit);
1245
1246         n = strcspn(cgroup, "/");
1247         if (n < 3)
1248                 return -ENXIO;
1249
1250         c = strndupa(cgroup, n);
1251         c = cg_unescape(c);
1252
1253         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1254                 return -ENXIO;
1255
1256         s = strdup(c);
1257         if (!s)
1258                 return -ENOMEM;
1259
1260         *unit = s;
1261         return 0;
1262 }
1263
1264 static bool valid_slice_name(const char *p, size_t n) {
1265
1266         if (!p)
1267                 return false;
1268
1269         if (n < strlen("x.slice"))
1270                 return false;
1271
1272         if (memcmp(p + n - 6, ".slice", 6) == 0) {
1273                 char buf[n+1], *c;
1274
1275                 memcpy(buf, p, n);
1276                 buf[n] = 0;
1277
1278                 c = cg_unescape(buf);
1279
1280                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1281         }
1282
1283         return false;
1284 }
1285
1286 static const char *skip_slices(const char *p) {
1287         assert(p);
1288
1289         /* Skips over all slice assignments */
1290
1291         for (;;) {
1292                 size_t n;
1293
1294                 p += strspn(p, "/");
1295
1296                 n = strcspn(p, "/");
1297                 if (!valid_slice_name(p, n))
1298                         return p;
1299
1300                 p += n;
1301         }
1302 }
1303
1304 int cg_path_get_unit(const char *path, char **ret) {
1305         const char *e;
1306         char *unit;
1307         int r;
1308
1309         assert(path);
1310         assert(ret);
1311
1312         e = skip_slices(path);
1313
1314         r = cg_path_decode_unit(e, &unit);
1315         if (r < 0)
1316                 return r;
1317
1318         /* We skipped over the slices, don't accept any now */
1319         if (endswith(unit, ".slice")) {
1320                 free(unit);
1321                 return -ENXIO;
1322         }
1323
1324         *ret = unit;
1325         return 0;
1326 }
1327
1328 int cg_pid_get_unit(pid_t pid, char **unit) {
1329         _cleanup_free_ char *cgroup = NULL;
1330         int r;
1331
1332         assert(unit);
1333
1334         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1335         if (r < 0)
1336                 return r;
1337
1338         return cg_path_get_unit(cgroup, unit);
1339 }
1340
1341 /**
1342  * Skip session-*.scope, but require it to be there.
1343  */
1344 static const char *skip_session(const char *p) {
1345         size_t n;
1346
1347         if (isempty(p))
1348                 return NULL;
1349
1350         p += strspn(p, "/");
1351
1352         n = strcspn(p, "/");
1353         if (n < strlen("session-x.scope"))
1354                 return NULL;
1355
1356         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1357                 char buf[n - 8 - 6 + 1];
1358
1359                 memcpy(buf, p + 8, n - 8 - 6);
1360                 buf[n - 8 - 6] = 0;
1361
1362                 /* Note that session scopes never need unescaping,
1363                  * since they cannot conflict with the kernel's own
1364                  * names, hence we don't need to call cg_unescape()
1365                  * here. */
1366
1367                 if (!session_id_valid(buf))
1368                         return false;
1369
1370                 p += n;
1371                 p += strspn(p, "/");
1372                 return p;
1373         }
1374
1375         return NULL;
1376 }
1377
1378 /**
1379  * Skip user@*.service, but require it to be there.
1380  */
1381 static const char *skip_user_manager(const char *p) {
1382         size_t n;
1383
1384         if (isempty(p))
1385                 return NULL;
1386
1387         p += strspn(p, "/");
1388
1389         n = strcspn(p, "/");
1390         if (n < strlen("user@x.service"))
1391                 return NULL;
1392
1393         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1394                 char buf[n - 5 - 8 + 1];
1395
1396                 memcpy(buf, p + 5, n - 5 - 8);
1397                 buf[n - 5 - 8] = 0;
1398
1399                 /* Note that user manager services never need unescaping,
1400                  * since they cannot conflict with the kernel's own
1401                  * names, hence we don't need to call cg_unescape()
1402                  * here. */
1403
1404                 if (parse_uid(buf, NULL) < 0)
1405                         return NULL;
1406
1407                 p += n;
1408                 p += strspn(p, "/");
1409
1410                 return p;
1411         }
1412
1413         return NULL;
1414 }
1415
1416 static const char *skip_user_prefix(const char *path) {
1417         const char *e, *t;
1418
1419         assert(path);
1420
1421         /* Skip slices, if there are any */
1422         e = skip_slices(path);
1423
1424         /* Skip the user manager, if it's in the path now... */
1425         t = skip_user_manager(e);
1426         if (t)
1427                 return t;
1428
1429         /* Alternatively skip the user session if it is in the path... */
1430         return skip_session(e);
1431 }
1432
1433 int cg_path_get_user_unit(const char *path, char **ret) {
1434         const char *t;
1435
1436         assert(path);
1437         assert(ret);
1438
1439         t = skip_user_prefix(path);
1440         if (!t)
1441                 return -ENXIO;
1442
1443         /* And from here on it looks pretty much the same as for a
1444          * system unit, hence let's use the same parser from here
1445          * on. */
1446         return cg_path_get_unit(t, ret);
1447 }
1448
1449 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1450         _cleanup_free_ char *cgroup = NULL;
1451         int r;
1452
1453         assert(unit);
1454
1455         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1456         if (r < 0)
1457                 return r;
1458
1459         return cg_path_get_user_unit(cgroup, unit);
1460 }
1461
1462 int cg_path_get_machine_name(const char *path, char **machine) {
1463         _cleanup_free_ char *u = NULL;
1464         const char *sl;
1465         int r;
1466
1467         r = cg_path_get_unit(path, &u);
1468         if (r < 0)
1469                 return r;
1470
1471         sl = strjoina("/run/systemd/machines/unit:", u);
1472         return readlink_malloc(sl, machine);
1473 }
1474
1475 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1476         _cleanup_free_ char *cgroup = NULL;
1477         int r;
1478
1479         assert(machine);
1480
1481         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1482         if (r < 0)
1483                 return r;
1484
1485         return cg_path_get_machine_name(cgroup, machine);
1486 }
1487
1488 int cg_path_get_session(const char *path, char **session) {
1489         _cleanup_free_ char *unit = NULL;
1490         char *start, *end;
1491         int r;
1492
1493         assert(path);
1494
1495         r = cg_path_get_unit(path, &unit);
1496         if (r < 0)
1497                 return r;
1498
1499         start = startswith(unit, "session-");
1500         if (!start)
1501                 return -ENXIO;
1502         end = endswith(start, ".scope");
1503         if (!end)
1504                 return -ENXIO;
1505
1506         *end = 0;
1507         if (!session_id_valid(start))
1508                 return -ENXIO;
1509
1510         if (session) {
1511                 char *rr;
1512
1513                 rr = strdup(start);
1514                 if (!rr)
1515                         return -ENOMEM;
1516
1517                 *session = rr;
1518         }
1519
1520         return 0;
1521 }
1522
1523 int cg_pid_get_session(pid_t pid, char **session) {
1524         _cleanup_free_ char *cgroup = NULL;
1525         int r;
1526
1527         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1528         if (r < 0)
1529                 return r;
1530
1531         return cg_path_get_session(cgroup, session);
1532 }
1533
1534 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1535         _cleanup_free_ char *slice = NULL;
1536         char *start, *end;
1537         int r;
1538
1539         assert(path);
1540
1541         r = cg_path_get_slice(path, &slice);
1542         if (r < 0)
1543                 return r;
1544
1545         start = startswith(slice, "user-");
1546         if (!start)
1547                 return -ENXIO;
1548         end = endswith(start, ".slice");
1549         if (!end)
1550                 return -ENXIO;
1551
1552         *end = 0;
1553         if (parse_uid(start, uid) < 0)
1554                 return -ENXIO;
1555
1556         return 0;
1557 }
1558
1559 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1560         _cleanup_free_ char *cgroup = NULL;
1561         int r;
1562
1563         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1564         if (r < 0)
1565                 return r;
1566
1567         return cg_path_get_owner_uid(cgroup, uid);
1568 }
1569
1570 int cg_path_get_slice(const char *p, char **slice) {
1571         const char *e = NULL;
1572
1573         assert(p);
1574         assert(slice);
1575
1576         /* Finds the right-most slice unit from the beginning, but
1577          * stops before we come to the first non-slice unit. */
1578
1579         for (;;) {
1580                 size_t n;
1581
1582                 p += strspn(p, "/");
1583
1584                 n = strcspn(p, "/");
1585                 if (!valid_slice_name(p, n)) {
1586
1587                         if (!e) {
1588                                 char *s;
1589
1590                                 s = strdup("-.slice");
1591                                 if (!s)
1592                                         return -ENOMEM;
1593
1594                                 *slice = s;
1595                                 return 0;
1596                         }
1597
1598                         return cg_path_decode_unit(e, slice);
1599                 }
1600
1601                 e = p;
1602                 p += n;
1603         }
1604 }
1605
1606 int cg_pid_get_slice(pid_t pid, char **slice) {
1607         _cleanup_free_ char *cgroup = NULL;
1608         int r;
1609
1610         assert(slice);
1611
1612         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1613         if (r < 0)
1614                 return r;
1615
1616         return cg_path_get_slice(cgroup, slice);
1617 }
1618
1619 int cg_path_get_user_slice(const char *p, char **slice) {
1620         const char *t;
1621         assert(p);
1622         assert(slice);
1623
1624         t = skip_user_prefix(p);
1625         if (!t)
1626                 return -ENXIO;
1627
1628         /* And now it looks pretty much the same as for a system
1629          * slice, so let's just use the same parser from here on. */
1630         return cg_path_get_slice(t, slice);
1631 }
1632
1633 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1634         _cleanup_free_ char *cgroup = NULL;
1635         int r;
1636
1637         assert(slice);
1638
1639         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1640         if (r < 0)
1641                 return r;
1642
1643         return cg_path_get_user_slice(cgroup, slice);
1644 }
1645
1646 char *cg_escape(const char *p) {
1647         bool need_prefix = false;
1648
1649         /* This implements very minimal escaping for names to be used
1650          * as file names in the cgroup tree: any name which might
1651          * conflict with a kernel name or is prefixed with '_' is
1652          * prefixed with a '_'. That way, when reading cgroup names it
1653          * is sufficient to remove a single prefixing underscore if
1654          * there is one. */
1655
1656         /* The return value of this function (unlike cg_unescape())
1657          * needs free()! */
1658
1659         if (p[0] == 0 ||
1660             p[0] == '_' ||
1661             p[0] == '.' ||
1662             streq(p, "notify_on_release") ||
1663             streq(p, "release_agent") ||
1664             streq(p, "tasks") ||
1665             startswith(p, "cgroup."))
1666                 need_prefix = true;
1667         else {
1668                 const char *dot;
1669
1670                 dot = strrchr(p, '.');
1671                 if (dot) {
1672                         CGroupController c;
1673                         size_t l = dot - p;
1674
1675                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1676                                 const char *n;
1677
1678                                 n = cgroup_controller_to_string(c);
1679
1680                                 if (l != strlen(n))
1681                                         continue;
1682
1683                                 if (memcmp(p, n, l) != 0)
1684                                         continue;
1685
1686                                 need_prefix = true;
1687                                 break;
1688                         }
1689                 }
1690         }
1691
1692         if (need_prefix)
1693                 return strappend("_", p);
1694
1695         return strdup(p);
1696 }
1697
1698 char *cg_unescape(const char *p) {
1699         assert(p);
1700
1701         /* The return value of this function (unlike cg_escape())
1702          * doesn't need free()! */
1703
1704         if (p[0] == '_')
1705                 return (char*) p+1;
1706
1707         return (char*) p;
1708 }
1709
1710 #define CONTROLLER_VALID                        \
1711         DIGITS LETTERS                          \
1712         "_"
1713
1714 bool cg_controller_is_valid(const char *p) {
1715         const char *t, *s;
1716
1717         if (!p)
1718                 return false;
1719
1720         s = startswith(p, "name=");
1721         if (s)
1722                 p = s;
1723
1724         if (*p == 0 || *p == '_')
1725                 return false;
1726
1727         for (t = p; *t; t++)
1728                 if (!strchr(CONTROLLER_VALID, *t))
1729                         return false;
1730
1731         if (t - p > FILENAME_MAX)
1732                 return false;
1733
1734         return true;
1735 }
1736
1737 int cg_slice_to_path(const char *unit, char **ret) {
1738         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1739         const char *dash;
1740         int r;
1741
1742         assert(unit);
1743         assert(ret);
1744
1745         if (streq(unit, "-.slice")) {
1746                 char *x;
1747
1748                 x = strdup("");
1749                 if (!x)
1750                         return -ENOMEM;
1751                 *ret = x;
1752                 return 0;
1753         }
1754
1755         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1756                 return -EINVAL;
1757
1758         if (!endswith(unit, ".slice"))
1759                 return -EINVAL;
1760
1761         r = unit_name_to_prefix(unit, &p);
1762         if (r < 0)
1763                 return r;
1764
1765         dash = strchr(p, '-');
1766
1767         /* Don't allow initial dashes */
1768         if (dash == p)
1769                 return -EINVAL;
1770
1771         while (dash) {
1772                 _cleanup_free_ char *escaped = NULL;
1773                 char n[dash - p + sizeof(".slice")];
1774
1775                 /* Don't allow trailing or double dashes */
1776                 if (dash[1] == 0 || dash[1] == '-')
1777                         return -EINVAL;
1778
1779                 strcpy(stpncpy(n, p, dash - p), ".slice");
1780                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1781                         return -EINVAL;
1782
1783                 escaped = cg_escape(n);
1784                 if (!escaped)
1785                         return -ENOMEM;
1786
1787                 if (!strextend(&s, escaped, "/", NULL))
1788                         return -ENOMEM;
1789
1790                 dash = strchr(dash+1, '-');
1791         }
1792
1793         e = cg_escape(unit);
1794         if (!e)
1795                 return -ENOMEM;
1796
1797         if (!strextend(&s, e, NULL))
1798                 return -ENOMEM;
1799
1800         *ret = s;
1801         s = NULL;
1802
1803         return 0;
1804 }
1805
1806 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1807         _cleanup_free_ char *p = NULL;
1808         int r;
1809
1810         r = cg_get_path(controller, path, attribute, &p);
1811         if (r < 0)
1812                 return r;
1813
1814         return write_string_file(p, value, 0);
1815 }
1816
1817 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1818         _cleanup_free_ char *p = NULL;
1819         int r;
1820
1821         r = cg_get_path(controller, path, attribute, &p);
1822         if (r < 0)
1823                 return r;
1824
1825         return read_one_line_file(p, ret);
1826 }
1827
1828 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1829         CGroupController c;
1830         int r, unified;
1831
1832         /* This one will create a cgroup in our private tree, but also
1833          * duplicate it in the trees specified in mask, and remove it
1834          * in all others */
1835
1836         /* First create the cgroup in our own hierarchy. */
1837         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1838         if (r < 0)
1839                 return r;
1840
1841         /* If we are in the unified hierarchy, we are done now */
1842         unified = cg_unified();
1843         if (unified < 0)
1844                 return unified;
1845         if (unified > 0)
1846                 return 0;
1847
1848         /* Otherwise, do the same in the other hierarchies */
1849         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1850                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1851                 const char *n;
1852
1853                 n = cgroup_controller_to_string(c);
1854
1855                 if (mask & bit)
1856                         (void) cg_create(n, path);
1857                 else if (supported & bit)
1858                         (void) cg_trim(n, path, true);
1859         }
1860
1861         return 0;
1862 }
1863
1864 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1865         CGroupController c;
1866         int r, unified;
1867
1868         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1869         if (r < 0)
1870                 return r;
1871
1872         unified = cg_unified();
1873         if (unified < 0)
1874                 return unified;
1875         if (unified > 0)
1876                 return 0;
1877
1878         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1879                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1880                 const char *p = NULL;
1881
1882                 if (!(supported & bit))
1883                         continue;
1884
1885                 if (path_callback)
1886                         p = path_callback(bit, userdata);
1887
1888                 if (!p)
1889                         p = path;
1890
1891                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
1892         }
1893
1894         return 0;
1895 }
1896
1897 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1898         Iterator i;
1899         void *pidp;
1900         int r = 0;
1901
1902         SET_FOREACH(pidp, pids, i) {
1903                 pid_t pid = PTR_TO_PID(pidp);
1904                 int q;
1905
1906                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1907                 if (q < 0 && r >= 0)
1908                         r = q;
1909         }
1910
1911         return r;
1912 }
1913
1914 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1915         CGroupController c;
1916         int r = 0, unified;
1917
1918         if (!path_equal(from, to))  {
1919                 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true);
1920                 if (r < 0)
1921                         return r;
1922         }
1923
1924         unified = cg_unified();
1925         if (unified < 0)
1926                 return unified;
1927         if (unified > 0)
1928                 return r;
1929
1930         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1931                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1932                 const char *p = NULL;
1933
1934                 if (!(supported & bit))
1935                         continue;
1936
1937                 if (to_callback)
1938                         p = to_callback(bit, userdata);
1939
1940                 if (!p)
1941                         p = to;
1942
1943                 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, false, false);
1944         }
1945
1946         return 0;
1947 }
1948
1949 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
1950         CGroupController c;
1951         int r, unified;
1952
1953         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
1954         if (r < 0)
1955                 return r;
1956
1957         unified = cg_unified();
1958         if (unified < 0)
1959                 return unified;
1960         if (unified > 0)
1961                 return r;
1962
1963         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1964                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1965
1966                 if (!(supported & bit))
1967                         continue;
1968
1969                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
1970         }
1971
1972         return 0;
1973 }
1974
1975 int cg_mask_supported(CGroupMask *ret) {
1976         CGroupMask mask = 0;
1977         int r, unified;
1978
1979         /* Determines the mask of supported cgroup controllers. Only
1980          * includes controllers we can make sense of and that are
1981          * actually accessible. */
1982
1983         unified = cg_unified();
1984         if (unified < 0)
1985                 return unified;
1986         if (unified > 0) {
1987                 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
1988                 const char *c;
1989
1990                 /* In the unified hierarchy we can read the supported
1991                  * and accessible controllers from a the top-level
1992                  * cgroup attribute */
1993
1994                 r = cg_get_root_path(&root);
1995                 if (r < 0)
1996                         return r;
1997
1998                 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
1999                 if (r < 0)
2000                         return r;
2001
2002                 r = read_one_line_file(path, &controllers);
2003                 if (r < 0)
2004                         return r;
2005
2006                 c = controllers;
2007                 for (;;) {
2008                         _cleanup_free_ char *n = NULL;
2009                         CGroupController v;
2010
2011                         r = extract_first_word(&c, &n, NULL, 0);
2012                         if (r < 0)
2013                                 return r;
2014                         if (r == 0)
2015                                 break;
2016
2017                         v = cgroup_controller_from_string(n);
2018                         if (v < 0)
2019                                 continue;
2020
2021                         mask |= CGROUP_CONTROLLER_TO_MASK(v);
2022                 }
2023
2024                 /* Currently, we only support the memory and pids
2025                  * controller in the unified hierarchy, mask
2026                  * everything else off. */
2027                 mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_PIDS;
2028
2029         } else {
2030                 CGroupController c;
2031
2032                 /* In the legacy hierarchy, we check whether which
2033                  * hierarchies are mounted. */
2034
2035                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2036                         const char *n;
2037
2038                         n = cgroup_controller_to_string(c);
2039                         if (controller_is_accessible(n) >= 0)
2040                                 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2041                 }
2042         }
2043
2044         *ret = mask;
2045         return 0;
2046 }
2047
2048 int cg_kernel_controllers(Set *controllers) {
2049         _cleanup_fclose_ FILE *f = NULL;
2050         char buf[LINE_MAX];
2051         int r;
2052
2053         assert(controllers);
2054
2055         /* Determines the full list of kernel-known controllers. Might
2056          * include controllers we don't actually support, arbitrary
2057          * named hierarchies and controllers that aren't currently
2058          * accessible (because not mounted). */
2059
2060         f = fopen("/proc/cgroups", "re");
2061         if (!f) {
2062                 if (errno == ENOENT)
2063                         return 0;
2064                 return -errno;
2065         }
2066
2067         /* Ignore the header line */
2068         (void) fgets(buf, sizeof(buf), f);
2069
2070         for (;;) {
2071                 char *controller;
2072                 int enabled = 0;
2073
2074                 errno = 0;
2075                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2076
2077                         if (feof(f))
2078                                 break;
2079
2080                         if (ferror(f) && errno != 0)
2081                                 return -errno;
2082
2083                         return -EBADMSG;
2084                 }
2085
2086                 if (!enabled) {
2087                         free(controller);
2088                         continue;
2089                 }
2090
2091                 if (!cg_controller_is_valid(controller)) {
2092                         free(controller);
2093                         return -EBADMSG;
2094                 }
2095
2096                 r = set_consume(controllers, controller);
2097                 if (r < 0)
2098                         return r;
2099         }
2100
2101         return 0;
2102 }
2103
2104 static thread_local int unified_cache = -1;
2105
2106 int cg_unified(void) {
2107         struct statfs fs;
2108
2109         /* Checks if we support the unified hierarchy. Returns an
2110          * error when the cgroup hierarchies aren't mounted yet or we
2111          * have any other trouble determining if the unified hierarchy
2112          * is supported. */
2113
2114         if (unified_cache >= 0)
2115                 return unified_cache;
2116
2117         if (statfs("/sys/fs/cgroup/", &fs) < 0)
2118                 return -errno;
2119
2120         if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC))
2121                 unified_cache = true;
2122         else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2123                 unified_cache = false;
2124         else
2125                 return -ENOEXEC;
2126
2127         return unified_cache;
2128 }
2129
2130 void cg_unified_flush(void) {
2131         unified_cache = -1;
2132 }
2133
2134 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2135         _cleanup_free_ char *fs = NULL;
2136         CGroupController c;
2137         int r, unified;
2138
2139         assert(p);
2140
2141         if (supported == 0)
2142                 return 0;
2143
2144         unified = cg_unified();
2145         if (unified < 0)
2146                 return unified;
2147         if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2148                 return 0;
2149
2150         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2151         if (r < 0)
2152                 return r;
2153
2154         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2155                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2156                 const char *n;
2157
2158                 if (!(supported & bit))
2159                         continue;
2160
2161                 n = cgroup_controller_to_string(c);
2162                 {
2163                         char s[1 + strlen(n) + 1];
2164
2165                         s[0] = mask & bit ? '+' : '-';
2166                         strcpy(s + 1, n);
2167
2168                         r = write_string_file(fs, s, 0);
2169                         if (r < 0)
2170                                 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2171                 }
2172         }
2173
2174         return 0;
2175 }
2176
2177 bool cg_is_unified_wanted(void) {
2178         static thread_local int wanted = -1;
2179         int r, unified;
2180
2181         /* If the hierarchy is already mounted, then follow whatever
2182          * was chosen for it. */
2183         unified = cg_unified();
2184         if (unified >= 0)
2185                 return unified;
2186
2187         /* Otherwise, let's see what the kernel command line has to
2188          * say. Since checking that is expensive, let's cache the
2189          * result. */
2190         if (wanted >= 0)
2191                 return wanted;
2192
2193         r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2194         if (r > 0)
2195                 return (wanted = true);
2196         else {
2197                 _cleanup_free_ char *value = NULL;
2198
2199                 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2200                 if (r < 0)
2201                         return false;
2202                 if (r == 0)
2203                         return (wanted = false);
2204
2205                 return (wanted = parse_boolean(value) > 0);
2206         }
2207 }
2208
2209 bool cg_is_legacy_wanted(void) {
2210         return !cg_is_unified_wanted();
2211 }
2212
2213 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2214         uint64_t u;
2215         int r;
2216
2217         if (isempty(s)) {
2218                 *ret = CGROUP_CPU_SHARES_INVALID;
2219                 return 0;
2220         }
2221
2222         r = safe_atou64(s, &u);
2223         if (r < 0)
2224                 return r;
2225
2226         if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2227                 return -ERANGE;
2228
2229         *ret = u;
2230         return 0;
2231 }
2232
2233 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2234         uint64_t u;
2235         int r;
2236
2237         if (isempty(s)) {
2238                 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2239                 return 0;
2240         }
2241
2242         r = safe_atou64(s, &u);
2243         if (r < 0)
2244                 return r;
2245
2246         if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2247                 return -ERANGE;
2248
2249         *ret = u;
2250         return 0;
2251 }
2252
2253 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2254         [CGROUP_CONTROLLER_CPU] = "cpu",
2255         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2256         [CGROUP_CONTROLLER_BLKIO] = "blkio",
2257         [CGROUP_CONTROLLER_MEMORY] = "memory",
2258         [CGROUP_CONTROLLER_DEVICES] = "devices",
2259         [CGROUP_CONTROLLER_PIDS] = "pids",
2260         [CGROUP_CONTROLLER_NET_CLS] = "net_cls",
2261 };
2262
2263 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);