src/basic/cgroup-util.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2010 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <dirent.h>
  23 #include <errno.h>
  24 #include <ftw.h>
  25 #include <signal.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28 #include <sys/stat.h>
  29 #include <sys/types.h>
  30 #include <unistd.h>
  31
  32 #include "extract-word.h"
  33 #include "fileio.h"
  34 #include "formats-util.h"
  35 #include "login-util.h"
  36 #include "macro.h"
  37 #include "mkdir.h"
  38 #include "path-util.h"
  39 #include "process-util.h"
  40 #include "set.h"
  41 #include "special.h"
  42 #include "string-util.h"
  43 #include "unit-name.h"
  44 #include "util.h"
  45 #include "cgroup-util.h"
  46
  47 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
  48         _cleanup_free_ char *fs = NULL;
  49         FILE *f;
  50         int r;
  51
  52         assert(_f);
  53
  54         r = cg_get_path(controller, path, "cgroup.procs", &fs);
  55         if (r < 0)
  56                 return r;
  57
  58         f = fopen(fs, "re");
  59         if (!f)
  60                 return -errno;
  61
  62         *_f = f;
  63         return 0;
  64 }
  65
  66 int cg_read_pid(FILE *f, pid_t *_pid) {
  67         unsigned long ul;
  68
  69         /* Note that the cgroup.procs might contain duplicates! See
  70          * cgroups.txt for details. */
  71
  72         assert(f);
  73         assert(_pid);
  74
  75         errno = 0;
  76         if (fscanf(f, "%lu", &ul) != 1) {
  77
  78                 if (feof(f))
  79                         return 0;
  80
  81                 return errno ? -errno : -EIO;
  82         }
  83
  84         if (ul <= 0)
  85                 return -EIO;
  86
  87         *_pid = (pid_t) ul;
  88         return 1;
  89 }
  90
  91 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
  92         _cleanup_free_ char *fs = NULL;
  93         int r;
  94         DIR *d;
  95
  96         assert(_d);
  97
  98         /* This is not recursive! */
  99
 100         r = cg_get_path(controller, path, NULL, &fs);
 101         if (r < 0)
 102                 return r;
 103
 104         d = opendir(fs);
 105         if (!d)
 106                 return -errno;
 107
 108         *_d = d;
 109         return 0;
 110 }
 111
 112 int cg_read_subgroup(DIR *d, char **fn) {
 113         struct dirent *de;
 114
 115         assert(d);
 116         assert(fn);
 117
 118         FOREACH_DIRENT_ALL(de, d, return -errno) {
 119                 char *b;
 120
 121                 if (de->d_type != DT_DIR)
 122                         continue;
 123
 124                 if (streq(de->d_name, ".") ||
 125                     streq(de->d_name, ".."))
 126                         continue;
 127
 128                 b = strdup(de->d_name);
 129                 if (!b)
 130                         return -ENOMEM;
 131
 132                 *fn = b;
 133                 return 1;
 134         }
 135
 136         return 0;
 137 }
 138
 139 int cg_rmdir(const char *controller, const char *path) {
 140         _cleanup_free_ char *p = NULL;
 141         int r;
 142
 143         r = cg_get_path(controller, path, NULL, &p);
 144         if (r < 0)
 145                 return r;
 146
 147         r = rmdir(p);
 148         if (r < 0 && errno != ENOENT)
 149                 return -errno;
 150
 151         return 0;
 152 }
 153
 154 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
 155         _cleanup_set_free_ Set *allocated_set = NULL;
 156         bool done = false;
 157         int r, ret = 0;
 158         pid_t my_pid;
 159
 160         assert(sig >= 0);
 161
 162         /* This goes through the tasks list and kills them all. This
 163          * is repeated until no further processes are added to the
 164          * tasks list, to properly handle forking processes */
 165
 166         if (!s) {
 167                 s = allocated_set = set_new(NULL);
 168                 if (!s)
 169                         return -ENOMEM;
 170         }
 171
 172         my_pid = getpid();
 173
 174         do {
 175                 _cleanup_fclose_ FILE *f = NULL;
 176                 pid_t pid = 0;
 177                 done = true;
 178
 179                 r = cg_enumerate_processes(controller, path, &f);
 180                 if (r < 0) {
 181                         if (ret >= 0 && r != -ENOENT)
 182                                 return r;
 183
 184                         return ret;
 185                 }
 186
 187                 while ((r = cg_read_pid(f, &pid)) > 0) {
 188
 189                         if (ignore_self && pid == my_pid)
 190                                 continue;
 191
 192                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 193                                 continue;
 194
 195                         /* If we haven't killed this process yet, kill
 196                          * it */
 197                         if (kill(pid, sig) < 0) {
 198                                 if (ret >= 0 && errno != ESRCH)
 199                                         ret = -errno;
 200                         } else {
 201                                 if (sigcont && sig != SIGKILL)
 202                                         (void) kill(pid, SIGCONT);
 203
 204                                 if (ret == 0)
 205                                         ret = 1;
 206                         }
 207
 208                         done = false;
 209
 210                         r = set_put(s, PID_TO_PTR(pid));
 211                         if (r < 0) {
 212                                 if (ret >= 0)
 213                                         return r;
 214
 215                                 return ret;
 216                         }
 217                 }
 218
 219                 if (r < 0) {
 220                         if (ret >= 0)
 221                                 return r;
 222
 223                         return ret;
 224                 }
 225
 226                 /* To avoid racing against processes which fork
 227                  * quicker than we can kill them we repeat this until
 228                  * no new pids need to be killed. */
 229
 230         } while (!done);
 231
 232         return ret;
 233 }
 234
 235 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
 236         _cleanup_set_free_ Set *allocated_set = NULL;
 237         _cleanup_closedir_ DIR *d = NULL;
 238         int r, ret;
 239         char *fn;
 240
 241         assert(path);
 242         assert(sig >= 0);
 243
 244         if (!s) {
 245                 s = allocated_set = set_new(NULL);
 246                 if (!s)
 247                         return -ENOMEM;
 248         }
 249
 250         ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
 251
 252         r = cg_enumerate_subgroups(controller, path, &d);
 253         if (r < 0) {
 254                 if (ret >= 0 && r != -ENOENT)
 255                         return r;
 256
 257                 return ret;
 258         }
 259
 260         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 261                 _cleanup_free_ char *p = NULL;
 262
 263                 p = strjoin(path, "/", fn, NULL);
 264                 free(fn);
 265                 if (!p)
 266                         return -ENOMEM;
 267
 268                 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
 269                 if (r != 0 && ret >= 0)
 270                         ret = r;
 271         }
 272
 273         if (ret >= 0 && r < 0)
 274                 ret = r;
 275
 276         if (rem) {
 277                 r = cg_rmdir(controller, path);
 278                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 279                         return r;
 280         }
 281
 282         return ret;
 283 }
 284
 285 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
 286         bool done = false;
 287         _cleanup_set_free_ Set *s = NULL;
 288         int r, ret = 0;
 289         pid_t my_pid;
 290
 291         assert(cfrom);
 292         assert(pfrom);
 293         assert(cto);
 294         assert(pto);
 295
 296         s = set_new(NULL);
 297         if (!s)
 298                 return -ENOMEM;
 299
 300         my_pid = getpid();
 301
 302         do {
 303                 _cleanup_fclose_ FILE *f = NULL;
 304                 pid_t pid = 0;
 305                 done = true;
 306
 307                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 308                 if (r < 0) {
 309                         if (ret >= 0 && r != -ENOENT)
 310                                 return r;
 311
 312                         return ret;
 313                 }
 314
 315                 while ((r = cg_read_pid(f, &pid)) > 0) {
 316
 317                         /* This might do weird stuff if we aren't a
 318                          * single-threaded program. However, we
 319                          * luckily know we are not */
 320                         if (ignore_self && pid == my_pid)
 321                                 continue;
 322
 323                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 324                                 continue;
 325
 326                         /* Ignore kernel threads. Since they can only
 327                          * exist in the root cgroup, we only check for
 328                          * them there. */
 329                         if (cfrom &&
 330                             (isempty(pfrom) || path_equal(pfrom, "/")) &&
 331                             is_kernel_thread(pid) > 0)
 332                                 continue;
 333
 334                         r = cg_attach(cto, pto, pid);
 335                         if (r < 0) {
 336                                 if (ret >= 0 && r != -ESRCH)
 337                                         ret = r;
 338                         } else if (ret == 0)
 339                                 ret = 1;
 340
 341                         done = false;
 342
 343                         r = set_put(s, PID_TO_PTR(pid));
 344                         if (r < 0) {
 345                                 if (ret >= 0)
 346                                         return r;
 347
 348                                 return ret;
 349                         }
 350                 }
 351
 352                 if (r < 0) {
 353                         if (ret >= 0)
 354                                 return r;
 355
 356                         return ret;
 357                 }
 358         } while (!done);
 359
 360         return ret;
 361 }
 362
 363 int cg_migrate_recursive(
 364                 const char *cfrom,
 365                 const char *pfrom,
 366                 const char *cto,
 367                 const char *pto,
 368                 bool ignore_self,
 369                 bool rem) {
 370
 371         _cleanup_closedir_ DIR *d = NULL;
 372         int r, ret = 0;
 373         char *fn;
 374
 375         assert(cfrom);
 376         assert(pfrom);
 377         assert(cto);
 378         assert(pto);
 379
 380         ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
 381
 382         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 383         if (r < 0) {
 384                 if (ret >= 0 && r != -ENOENT)
 385                         return r;
 386
 387                 return ret;
 388         }
 389
 390         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 391                 _cleanup_free_ char *p = NULL;
 392
 393                 p = strjoin(pfrom, "/", fn, NULL);
 394                 free(fn);
 395                 if (!p)
 396                         return -ENOMEM;
 397
 398                 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
 399                 if (r != 0 && ret >= 0)
 400                         ret = r;
 401         }
 402
 403         if (r < 0 && ret >= 0)
 404                 ret = r;
 405
 406         if (rem) {
 407                 r = cg_rmdir(cfrom, pfrom);
 408                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 409                         return r;
 410         }
 411
 412         return ret;
 413 }
 414
 415 int cg_migrate_recursive_fallback(
 416                 const char *cfrom,
 417                 const char *pfrom,
 418                 const char *cto,
 419                 const char *pto,
 420                 bool ignore_self,
 421                 bool rem) {
 422
 423         int r;
 424
 425         assert(cfrom);
 426         assert(pfrom);
 427         assert(cto);
 428         assert(pto);
 429
 430         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem);
 431         if (r < 0) {
 432                 char prefix[strlen(pto) + 1];
 433
 434                 /* This didn't work? Then let's try all prefixes of the destination */
 435
 436                 PATH_FOREACH_PREFIX(prefix, pto) {
 437                         int q;
 438
 439                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem);
 440                         if (q >= 0)
 441                                 return q;
 442                 }
 443         }
 444
 445         return r;
 446 }
 447
 448 static const char *controller_to_dirname(const char *controller) {
 449         const char *e;
 450
 451         assert(controller);
 452
 453         /* Converts a controller name to the directory name below
 454          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
 455          * just cuts off the name= prefixed used for named
 456          * hierarchies, if it is specified. */
 457
 458         e = startswith(controller, "name=");
 459         if (e)
 460                 return e;
 461
 462         return controller;
 463 }
 464
 465 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
 466         const char *dn;
 467         char *t = NULL;
 468
 469         assert(fs);
 470         assert(controller);
 471
 472         dn = controller_to_dirname(controller);
 473
 474         if (isempty(path) && isempty(suffix))
 475                 t = strappend("/sys/fs/cgroup/", dn);
 476         else if (isempty(path))
 477                 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL);
 478         else if (isempty(suffix))
 479                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL);
 480         else
 481                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL);
 482         if (!t)
 483                 return -ENOMEM;
 484
 485         *fs = t;
 486         return 0;
 487 }
 488
 489 static int join_path_unified(const char *path, const char *suffix, char **fs) {
 490         char *t;
 491
 492         assert(fs);
 493
 494         if (isempty(path) && isempty(suffix))
 495                 t = strdup("/sys/fs/cgroup");
 496         else if (isempty(path))
 497                 t = strappend("/sys/fs/cgroup/", suffix);
 498         else if (isempty(suffix))
 499                 t = strappend("/sys/fs/cgroup/", path);
 500         else
 501                 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
 502         if (!t)
 503                 return -ENOMEM;
 504
 505         *fs = t;
 506         return 0;
 507 }
 508
 509 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
 510         int unified, r;
 511
 512         assert(fs);
 513
 514         if (!controller) {
 515                 char *t;
 516
 517                 /* If no controller is specified, we return the path
 518                  * *below* the controllers, without any prefix. */
 519
 520                 if (!path && !suffix)
 521                         return -EINVAL;
 522
 523                 if (!suffix)
 524                         t = strdup(path);
 525                 else if (!path)
 526                         t = strdup(suffix);
 527                 else
 528                         t = strjoin(path, "/", suffix, NULL);
 529                 if (!t)
 530                         return -ENOMEM;
 531
 532                 *fs = path_kill_slashes(t);
 533                 return 0;
 534         }
 535
 536         if (!cg_controller_is_valid(controller))
 537                 return -EINVAL;
 538
 539         unified = cg_unified();
 540         if (unified < 0)
 541                 return unified;
 542
 543         if (unified > 0)
 544                 r = join_path_unified(path, suffix, fs);
 545         else
 546                 r = join_path_legacy(controller, path, suffix, fs);
 547         if (r < 0)
 548                 return r;
 549
 550         path_kill_slashes(*fs);
 551         return 0;
 552 }
 553
 554 static int controller_is_accessible(const char *controller) {
 555         int unified;
 556
 557         assert(controller);
 558
 559         /* Checks whether a specific controller is accessible,
 560          * i.e. its hierarchy mounted. In the unified hierarchy all
 561          * controllers are considered accessible, except for the named
 562          * hierarchies */
 563
 564         if (!cg_controller_is_valid(controller))
 565                 return -EINVAL;
 566
 567         unified = cg_unified();
 568         if (unified < 0)
 569                 return unified;
 570         if (unified > 0) {
 571                 /* We don't support named hierarchies if we are using
 572                  * the unified hierarchy. */
 573
 574                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
 575                         return 0;
 576
 577                 if (startswith(controller, "name="))
 578                         return -EOPNOTSUPP;
 579
 580         } else {
 581                 const char *cc, *dn;
 582
 583                 dn = controller_to_dirname(controller);
 584                 cc = strjoina("/sys/fs/cgroup/", dn);
 585
 586                 if (laccess(cc, F_OK) < 0)
 587                         return -errno;
 588         }
 589
 590         return 0;
 591 }
 592
 593 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
 594         int r;
 595
 596         assert(controller);
 597         assert(fs);
 598
 599         /* Check if the specified controller is actually accessible */
 600         r = controller_is_accessible(controller);
 601         if (r < 0)
 602                 return r;
 603
 604         return cg_get_path(controller, path, suffix, fs);
 605 }
 606
 607 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 608         assert(path);
 609         assert(sb);
 610         assert(ftwbuf);
 611
 612         if (typeflag != FTW_DP)
 613                 return 0;
 614
 615         if (ftwbuf->level < 1)
 616                 return 0;
 617
 618         (void) rmdir(path);
 619         return 0;
 620 }
 621
 622 int cg_trim(const char *controller, const char *path, bool delete_root) {
 623         _cleanup_free_ char *fs = NULL;
 624         int r = 0;
 625
 626         assert(path);
 627
 628         r = cg_get_path(controller, path, NULL, &fs);
 629         if (r < 0)
 630                 return r;
 631
 632         errno = 0;
 633         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 634                 if (errno == ENOENT)
 635                         r = 0;
 636                 else if (errno != 0)
 637                         r = -errno;
 638                 else
 639                         r = -EIO;
 640         }
 641
 642         if (delete_root) {
 643                 if (rmdir(fs) < 0 && errno != ENOENT)
 644                         return -errno;
 645         }
 646
 647         return r;
 648 }
 649
 650 int cg_create(const char *controller, const char *path) {
 651         _cleanup_free_ char *fs = NULL;
 652         int r;
 653
 654         r = cg_get_path_and_check(controller, path, NULL, &fs);
 655         if (r < 0)
 656                 return r;
 657
 658         r = mkdir_parents(fs, 0755);
 659         if (r < 0)
 660                 return r;
 661
 662         if (mkdir(fs, 0755) < 0) {
 663
 664                 if (errno == EEXIST)
 665                         return 0;
 666
 667                 return -errno;
 668         }
 669
 670         return 1;
 671 }
 672
 673 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 674         int r, q;
 675
 676         assert(pid >= 0);
 677
 678         r = cg_create(controller, path);
 679         if (r < 0)
 680                 return r;
 681
 682         q = cg_attach(controller, path, pid);
 683         if (q < 0)
 684                 return q;
 685
 686         /* This does not remove the cgroup on failure */
 687         return r;
 688 }
 689
 690 int cg_attach(const char *controller, const char *path, pid_t pid) {
 691         _cleanup_free_ char *fs = NULL;
 692         char c[DECIMAL_STR_MAX(pid_t) + 2];
 693         int r;
 694
 695         assert(path);
 696         assert(pid >= 0);
 697
 698         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 699         if (r < 0)
 700                 return r;
 701
 702         if (pid == 0)
 703                 pid = getpid();
 704
 705         snprintf(c, sizeof(c), PID_FMT"\n", pid);
 706
 707         return write_string_file(fs, c, 0);
 708 }
 709
 710 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 711         int r;
 712
 713         assert(controller);
 714         assert(path);
 715         assert(pid >= 0);
 716
 717         r = cg_attach(controller, path, pid);
 718         if (r < 0) {
 719                 char prefix[strlen(path) + 1];
 720
 721                 /* This didn't work? Then let's try all prefixes of
 722                  * the destination */
 723
 724                 PATH_FOREACH_PREFIX(prefix, path) {
 725                         int q;
 726
 727                         q = cg_attach(controller, prefix, pid);
 728                         if (q >= 0)
 729                                 return q;
 730                 }
 731         }
 732
 733         return r;
 734 }
 735
 736 int cg_set_group_access(
 737                 const char *controller,
 738                 const char *path,
 739                 mode_t mode,
 740                 uid_t uid,
 741                 gid_t gid) {
 742
 743         _cleanup_free_ char *fs = NULL;
 744         int r;
 745
 746         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 747                 return 0;
 748
 749         if (mode != MODE_INVALID)
 750                 mode &= 0777;
 751
 752         r = cg_get_path(controller, path, NULL, &fs);
 753         if (r < 0)
 754                 return r;
 755
 756         return chmod_and_chown(fs, mode, uid, gid);
 757 }
 758
 759 int cg_set_task_access(
 760                 const char *controller,
 761                 const char *path,
 762                 mode_t mode,
 763                 uid_t uid,
 764                 gid_t gid) {
 765
 766         _cleanup_free_ char *fs = NULL, *procs = NULL;
 767         int r, unified;
 768
 769         assert(path);
 770
 771         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 772                 return 0;
 773
 774         if (mode != MODE_INVALID)
 775                 mode &= 0666;
 776
 777         r = cg_get_path(controller, path, "cgroup.procs", &fs);
 778         if (r < 0)
 779                 return r;
 780
 781         r = chmod_and_chown(fs, mode, uid, gid);
 782         if (r < 0)
 783                 return r;
 784
 785         unified = cg_unified();
 786         if (unified < 0)
 787                 return unified;
 788         if (unified)
 789                 return 0;
 790
 791         /* Compatibility, Always keep values for "tasks" in sync with
 792          * "cgroup.procs" */
 793         if (cg_get_path(controller, path, "tasks", &procs) >= 0)
 794                 (void) chmod_and_chown(procs, mode, uid, gid);
 795
 796         return 0;
 797 }
 798
 799 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
 800         _cleanup_fclose_ FILE *f = NULL;
 801         char line[LINE_MAX];
 802         const char *fs;
 803         size_t cs = 0;
 804         int unified;
 805
 806         assert(path);
 807         assert(pid >= 0);
 808
 809         unified = cg_unified();
 810         if (unified < 0)
 811                 return unified;
 812         if (unified == 0) {
 813                 if (controller) {
 814                         if (!cg_controller_is_valid(controller))
 815                                 return -EINVAL;
 816                 } else
 817                         controller = SYSTEMD_CGROUP_CONTROLLER;
 818
 819                 cs = strlen(controller);
 820         }
 821
 822         fs = procfs_file_alloca(pid, "cgroup");
 823         f = fopen(fs, "re");
 824         if (!f)
 825                 return errno == ENOENT ? -ESRCH : -errno;
 826
 827         FOREACH_LINE(line, f, return -errno) {
 828                 char *e, *p;
 829
 830                 truncate_nl(line);
 831
 832                 if (unified) {
 833                         e = startswith(line, "0:");
 834                         if (!e)
 835                                 continue;
 836
 837                         e = strchr(e, ':');
 838                         if (!e)
 839                                 continue;
 840                 } else {
 841                         char *l;
 842                         size_t k;
 843                         const char *word, *state;
 844                         bool found = false;
 845
 846                         l = strchr(line, ':');
 847                         if (!l)
 848                                 continue;
 849
 850                         l++;
 851                         e = strchr(l, ':');
 852                         if (!e)
 853                                 continue;
 854
 855                         *e = 0;
 856                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
 857                                 if (k == cs && memcmp(word, controller, cs) == 0) {
 858                                         found = true;
 859                                         break;
 860                                 }
 861                         }
 862
 863                         if (!found)
 864                                 continue;
 865                 }
 866
 867                 p = strdup(e + 1);
 868                 if (!p)
 869                         return -ENOMEM;
 870
 871                 *path = p;
 872                 return 0;
 873         }
 874
 875         return -ENODATA;
 876 }
 877
 878 int cg_install_release_agent(const char *controller, const char *agent) {
 879         _cleanup_free_ char *fs = NULL, *contents = NULL;
 880         const char *sc;
 881         int r, unified;
 882
 883         assert(agent);
 884
 885         unified = cg_unified();
 886         if (unified < 0)
 887                 return unified;
 888         if (unified) /* doesn't apply to unified hierarchy */
 889                 return -EOPNOTSUPP;
 890
 891         r = cg_get_path(controller, NULL, "release_agent", &fs);
 892         if (r < 0)
 893                 return r;
 894
 895         r = read_one_line_file(fs, &contents);
 896         if (r < 0)
 897                 return r;
 898
 899         sc = strstrip(contents);
 900         if (isempty(sc)) {
 901                 r = write_string_file(fs, agent, 0);
 902                 if (r < 0)
 903                         return r;
 904         } else if (!path_equal(sc, agent))
 905                 return -EEXIST;
 906
 907         fs = mfree(fs);
 908         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 909         if (r < 0)
 910                 return r;
 911
 912         contents = mfree(contents);
 913         r = read_one_line_file(fs, &contents);
 914         if (r < 0)
 915                 return r;
 916
 917         sc = strstrip(contents);
 918         if (streq(sc, "0")) {
 919                 r = write_string_file(fs, "1", 0);
 920                 if (r < 0)
 921                         return r;
 922
 923                 return 1;
 924         }
 925
 926         if (!streq(sc, "1"))
 927                 return -EIO;
 928
 929         return 0;
 930 }
 931
 932 int cg_uninstall_release_agent(const char *controller) {
 933         _cleanup_free_ char *fs = NULL;
 934         int r, unified;
 935
 936         unified = cg_unified();
 937         if (unified < 0)
 938                 return unified;
 939         if (unified) /* Doesn't apply to unified hierarchy */
 940                 return -EOPNOTSUPP;
 941
 942         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 943         if (r < 0)
 944                 return r;
 945
 946         r = write_string_file(fs, "0", 0);
 947         if (r < 0)
 948                 return r;
 949
 950         fs = mfree(fs);
 951
 952         r = cg_get_path(controller, NULL, "release_agent", &fs);
 953         if (r < 0)
 954                 return r;
 955
 956         r = write_string_file(fs, "", 0);
 957         if (r < 0)
 958                 return r;
 959
 960         return 0;
 961 }
 962
 963 int cg_is_empty(const char *controller, const char *path) {
 964         _cleanup_fclose_ FILE *f = NULL;
 965         pid_t pid;
 966         int r;
 967
 968         assert(path);
 969
 970         r = cg_enumerate_processes(controller, path, &f);
 971         if (r == -ENOENT)
 972                 return 1;
 973         if (r < 0)
 974                 return r;
 975
 976         r = cg_read_pid(f, &pid);
 977         if (r < 0)
 978                 return r;
 979
 980         return r == 0;
 981 }
 982
 983 int cg_is_empty_recursive(const char *controller, const char *path) {
 984         int unified, r;
 985
 986         assert(path);
 987
 988         /* The root cgroup is always populated */
 989         if (controller && (isempty(path) || path_equal(path, "/")))
 990                 return false;
 991
 992         unified = cg_unified();
 993         if (unified < 0)
 994                 return unified;
 995
 996         if (unified > 0) {
 997                 _cleanup_free_ char *populated = NULL, *t = NULL;
 998
 999                 /* On the unified hierarchy we can check empty state
1000                  * via the "cgroup.populated" attribute. */
1001
1002                 r = cg_get_path(controller, path, "cgroup.populated", &populated);
1003                 if (r < 0)
1004                         return r;
1005
1006                 r = read_one_line_file(populated, &t);
1007                 if (r == -ENOENT)
1008                         return 1;
1009                 if (r < 0)
1010                         return r;
1011
1012                 return streq(t, "0");
1013         } else {
1014                 _cleanup_closedir_ DIR *d = NULL;
1015                 char *fn;
1016
1017                 r = cg_is_empty(controller, path);
1018                 if (r <= 0)
1019                         return r;
1020
1021                 r = cg_enumerate_subgroups(controller, path, &d);
1022                 if (r == -ENOENT)
1023                         return 1;
1024                 if (r < 0)
1025                         return r;
1026
1027                 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1028                         _cleanup_free_ char *p = NULL;
1029
1030                         p = strjoin(path, "/", fn, NULL);
1031                         free(fn);
1032                         if (!p)
1033                                 return -ENOMEM;
1034
1035                         r = cg_is_empty_recursive(controller, p);
1036                         if (r <= 0)
1037                                 return r;
1038                 }
1039                 if (r < 0)
1040                         return r;
1041
1042                 return true;
1043         }
1044 }
1045
1046 int cg_split_spec(const char *spec, char **controller, char **path) {
1047         char *t = NULL, *u = NULL;
1048         const char *e;
1049
1050         assert(spec);
1051
1052         if (*spec == '/') {
1053                 if (!path_is_safe(spec))
1054                         return -EINVAL;
1055
1056                 if (path) {
1057                         t = strdup(spec);
1058                         if (!t)
1059                                 return -ENOMEM;
1060
1061                         *path = path_kill_slashes(t);
1062                 }
1063
1064                 if (controller)
1065                         *controller = NULL;
1066
1067                 return 0;
1068         }
1069
1070         e = strchr(spec, ':');
1071         if (!e) {
1072                 if (!cg_controller_is_valid(spec))
1073                         return -EINVAL;
1074
1075                 if (controller) {
1076                         t = strdup(spec);
1077                         if (!t)
1078                                 return -ENOMEM;
1079
1080                         *controller = t;
1081                 }
1082
1083                 if (path)
1084                         *path = NULL;
1085
1086                 return 0;
1087         }
1088
1089         t = strndup(spec, e-spec);
1090         if (!t)
1091                 return -ENOMEM;
1092         if (!cg_controller_is_valid(t)) {
1093                 free(t);
1094                 return -EINVAL;
1095         }
1096
1097         if (isempty(e+1))
1098                 u = NULL;
1099         else {
1100                 u = strdup(e+1);
1101                 if (!u) {
1102                         free(t);
1103                         return -ENOMEM;
1104                 }
1105
1106                 if (!path_is_safe(u) ||
1107                     !path_is_absolute(u)) {
1108                         free(t);
1109                         free(u);
1110                         return -EINVAL;
1111                 }
1112
1113                 path_kill_slashes(u);
1114         }
1115
1116         if (controller)
1117                 *controller = t;
1118         else
1119                 free(t);
1120
1121         if (path)
1122                 *path = u;
1123         else
1124                 free(u);
1125
1126         return 0;
1127 }
1128
1129 int cg_mangle_path(const char *path, char **result) {
1130         _cleanup_free_ char *c = NULL, *p = NULL;
1131         char *t;
1132         int r;
1133
1134         assert(path);
1135         assert(result);
1136
1137         /* First, check if it already is a filesystem path */
1138         if (path_startswith(path, "/sys/fs/cgroup")) {
1139
1140                 t = strdup(path);
1141                 if (!t)
1142                         return -ENOMEM;
1143
1144                 *result = path_kill_slashes(t);
1145                 return 0;
1146         }
1147
1148         /* Otherwise, treat it as cg spec */
1149         r = cg_split_spec(path, &c, &p);
1150         if (r < 0)
1151                 return r;
1152
1153         return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1154 }
1155
1156 int cg_get_root_path(char **path) {
1157         char *p, *e;
1158         int r;
1159
1160         assert(path);
1161
1162         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1163         if (r < 0)
1164                 return r;
1165
1166         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1167         if (!e)
1168                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1169         if (!e)
1170                 e = endswith(p, "/system"); /* even more legacy */
1171         if (e)
1172                 *e = 0;
1173
1174         *path = p;
1175         return 0;
1176 }
1177
1178 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1179         _cleanup_free_ char *rt = NULL;
1180         char *p;
1181         int r;
1182
1183         assert(cgroup);
1184         assert(shifted);
1185
1186         if (!root) {
1187                 /* If the root was specified let's use that, otherwise
1188                  * let's determine it from PID 1 */
1189
1190                 r = cg_get_root_path(&rt);
1191                 if (r < 0)
1192                         return r;
1193
1194                 root = rt;
1195         }
1196
1197         p = path_startswith(cgroup, root);
1198         if (p && p > cgroup)
1199                 *shifted = p - 1;
1200         else
1201                 *shifted = cgroup;
1202
1203         return 0;
1204 }
1205
1206 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1207         _cleanup_free_ char *raw = NULL;
1208         const char *c;
1209         int r;
1210
1211         assert(pid >= 0);
1212         assert(cgroup);
1213
1214         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1215         if (r < 0)
1216                 return r;
1217
1218         r = cg_shift_path(raw, root, &c);
1219         if (r < 0)
1220                 return r;
1221
1222         if (c == raw) {
1223                 *cgroup = raw;
1224                 raw = NULL;
1225         } else {
1226                 char *n;
1227
1228                 n = strdup(c);
1229                 if (!n)
1230                         return -ENOMEM;
1231
1232                 *cgroup = n;
1233         }
1234
1235         return 0;
1236 }
1237
1238 int cg_path_decode_unit(const char *cgroup, char **unit){
1239         char *c, *s;
1240         size_t n;
1241
1242         assert(cgroup);
1243         assert(unit);
1244
1245         n = strcspn(cgroup, "/");
1246         if (n < 3)
1247                 return -ENXIO;
1248
1249         c = strndupa(cgroup, n);
1250         c = cg_unescape(c);
1251
1252         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1253                 return -ENXIO;
1254
1255         s = strdup(c);
1256         if (!s)
1257                 return -ENOMEM;
1258
1259         *unit = s;
1260         return 0;
1261 }
1262
1263 static bool valid_slice_name(const char *p, size_t n) {
1264
1265         if (!p)
1266                 return false;
1267
1268         if (n < strlen("x.slice"))
1269                 return false;
1270
1271         if (memcmp(p + n - 6, ".slice", 6) == 0) {
1272                 char buf[n+1], *c;
1273
1274                 memcpy(buf, p, n);
1275                 buf[n] = 0;
1276
1277                 c = cg_unescape(buf);
1278
1279                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1280         }
1281
1282         return false;
1283 }
1284
1285 static const char *skip_slices(const char *p) {
1286         assert(p);
1287
1288         /* Skips over all slice assignments */
1289
1290         for (;;) {
1291                 size_t n;
1292
1293                 p += strspn(p, "/");
1294
1295                 n = strcspn(p, "/");
1296                 if (!valid_slice_name(p, n))
1297                         return p;
1298
1299                 p += n;
1300         }
1301 }
1302
1303 int cg_path_get_unit(const char *path, char **ret) {
1304         const char *e;
1305         char *unit;
1306         int r;
1307
1308         assert(path);
1309         assert(ret);
1310
1311         e = skip_slices(path);
1312
1313         r = cg_path_decode_unit(e, &unit);
1314         if (r < 0)
1315                 return r;
1316
1317         /* We skipped over the slices, don't accept any now */
1318         if (endswith(unit, ".slice")) {
1319                 free(unit);
1320                 return -ENXIO;
1321         }
1322
1323         *ret = unit;
1324         return 0;
1325 }
1326
1327 int cg_pid_get_unit(pid_t pid, char **unit) {
1328         _cleanup_free_ char *cgroup = NULL;
1329         int r;
1330
1331         assert(unit);
1332
1333         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1334         if (r < 0)
1335                 return r;
1336
1337         return cg_path_get_unit(cgroup, unit);
1338 }
1339
1340 /**
1341  * Skip session-*.scope, but require it to be there.
1342  */
1343 static const char *skip_session(const char *p) {
1344         size_t n;
1345
1346         if (isempty(p))
1347                 return NULL;
1348
1349         p += strspn(p, "/");
1350
1351         n = strcspn(p, "/");
1352         if (n < strlen("session-x.scope"))
1353                 return NULL;
1354
1355         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1356                 char buf[n - 8 - 6 + 1];
1357
1358                 memcpy(buf, p + 8, n - 8 - 6);
1359                 buf[n - 8 - 6] = 0;
1360
1361                 /* Note that session scopes never need unescaping,
1362                  * since they cannot conflict with the kernel's own
1363                  * names, hence we don't need to call cg_unescape()
1364                  * here. */
1365
1366                 if (!session_id_valid(buf))
1367                         return false;
1368
1369                 p += n;
1370                 p += strspn(p, "/");
1371                 return p;
1372         }
1373
1374         return NULL;
1375 }
1376
1377 /**
1378  * Skip user@*.service, but require it to be there.
1379  */
1380 static const char *skip_user_manager(const char *p) {
1381         size_t n;
1382
1383         if (isempty(p))
1384                 return NULL;
1385
1386         p += strspn(p, "/");
1387
1388         n = strcspn(p, "/");
1389         if (n < strlen("user@x.service"))
1390                 return NULL;
1391
1392         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1393                 char buf[n - 5 - 8 + 1];
1394
1395                 memcpy(buf, p + 5, n - 5 - 8);
1396                 buf[n - 5 - 8] = 0;
1397
1398                 /* Note that user manager services never need unescaping,
1399                  * since they cannot conflict with the kernel's own
1400                  * names, hence we don't need to call cg_unescape()
1401                  * here. */
1402
1403                 if (parse_uid(buf, NULL) < 0)
1404                         return NULL;
1405
1406                 p += n;
1407                 p += strspn(p, "/");
1408
1409                 return p;
1410         }
1411
1412         return NULL;
1413 }
1414
1415 static const char *skip_user_prefix(const char *path) {
1416         const char *e, *t;
1417
1418         assert(path);
1419
1420         /* Skip slices, if there are any */
1421         e = skip_slices(path);
1422
1423         /* Skip the user manager, if it's in the path now... */
1424         t = skip_user_manager(e);
1425         if (t)
1426                 return t;
1427
1428         /* Alternatively skip the user session if it is in the path... */
1429         return skip_session(e);
1430 }
1431
1432 int cg_path_get_user_unit(const char *path, char **ret) {
1433         const char *t;
1434
1435         assert(path);
1436         assert(ret);
1437
1438         t = skip_user_prefix(path);
1439         if (!t)
1440                 return -ENXIO;
1441
1442         /* And from here on it looks pretty much the same as for a
1443          * system unit, hence let's use the same parser from here
1444          * on. */
1445         return cg_path_get_unit(t, ret);
1446 }
1447
1448 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1449         _cleanup_free_ char *cgroup = NULL;
1450         int r;
1451
1452         assert(unit);
1453
1454         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1455         if (r < 0)
1456                 return r;
1457
1458         return cg_path_get_user_unit(cgroup, unit);
1459 }
1460
1461 int cg_path_get_machine_name(const char *path, char **machine) {
1462         _cleanup_free_ char *u = NULL;
1463         const char *sl;
1464         int r;
1465
1466         r = cg_path_get_unit(path, &u);
1467         if (r < 0)
1468                 return r;
1469
1470         sl = strjoina("/run/systemd/machines/unit:", u);
1471         return readlink_malloc(sl, machine);
1472 }
1473
1474 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1475         _cleanup_free_ char *cgroup = NULL;
1476         int r;
1477
1478         assert(machine);
1479
1480         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1481         if (r < 0)
1482                 return r;
1483
1484         return cg_path_get_machine_name(cgroup, machine);
1485 }
1486
1487 int cg_path_get_session(const char *path, char **session) {
1488         _cleanup_free_ char *unit = NULL;
1489         char *start, *end;
1490         int r;
1491
1492         assert(path);
1493
1494         r = cg_path_get_unit(path, &unit);
1495         if (r < 0)
1496                 return r;
1497
1498         start = startswith(unit, "session-");
1499         if (!start)
1500                 return -ENXIO;
1501         end = endswith(start, ".scope");
1502         if (!end)
1503                 return -ENXIO;
1504
1505         *end = 0;
1506         if (!session_id_valid(start))
1507                 return -ENXIO;
1508
1509         if (session) {
1510                 char *rr;
1511
1512                 rr = strdup(start);
1513                 if (!rr)
1514                         return -ENOMEM;
1515
1516                 *session = rr;
1517         }
1518
1519         return 0;
1520 }
1521
1522 int cg_pid_get_session(pid_t pid, char **session) {
1523         _cleanup_free_ char *cgroup = NULL;
1524         int r;
1525
1526         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1527         if (r < 0)
1528                 return r;
1529
1530         return cg_path_get_session(cgroup, session);
1531 }
1532
1533 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1534         _cleanup_free_ char *slice = NULL;
1535         char *start, *end;
1536         int r;
1537
1538         assert(path);
1539
1540         r = cg_path_get_slice(path, &slice);
1541         if (r < 0)
1542                 return r;
1543
1544         start = startswith(slice, "user-");
1545         if (!start)
1546                 return -ENXIO;
1547         end = endswith(start, ".slice");
1548         if (!end)
1549                 return -ENXIO;
1550
1551         *end = 0;
1552         if (parse_uid(start, uid) < 0)
1553                 return -ENXIO;
1554
1555         return 0;
1556 }
1557
1558 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1559         _cleanup_free_ char *cgroup = NULL;
1560         int r;
1561
1562         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1563         if (r < 0)
1564                 return r;
1565
1566         return cg_path_get_owner_uid(cgroup, uid);
1567 }
1568
1569 int cg_path_get_slice(const char *p, char **slice) {
1570         const char *e = NULL;
1571
1572         assert(p);
1573         assert(slice);
1574
1575         /* Finds the right-most slice unit from the beginning, but
1576          * stops before we come to the first non-slice unit. */
1577
1578         for (;;) {
1579                 size_t n;
1580
1581                 p += strspn(p, "/");
1582
1583                 n = strcspn(p, "/");
1584                 if (!valid_slice_name(p, n)) {
1585
1586                         if (!e) {
1587                                 char *s;
1588
1589                                 s = strdup("-.slice");
1590                                 if (!s)
1591                                         return -ENOMEM;
1592
1593                                 *slice = s;
1594                                 return 0;
1595                         }
1596
1597                         return cg_path_decode_unit(e, slice);
1598                 }
1599
1600                 e = p;
1601                 p += n;
1602         }
1603 }
1604
1605 int cg_pid_get_slice(pid_t pid, char **slice) {
1606         _cleanup_free_ char *cgroup = NULL;
1607         int r;
1608
1609         assert(slice);
1610
1611         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1612         if (r < 0)
1613                 return r;
1614
1615         return cg_path_get_slice(cgroup, slice);
1616 }
1617
1618 int cg_path_get_user_slice(const char *p, char **slice) {
1619         const char *t;
1620         assert(p);
1621         assert(slice);
1622
1623         t = skip_user_prefix(p);
1624         if (!t)
1625                 return -ENXIO;
1626
1627         /* And now it looks pretty much the same as for a system
1628          * slice, so let's just use the same parser from here on. */
1629         return cg_path_get_slice(t, slice);
1630 }
1631
1632 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1633         _cleanup_free_ char *cgroup = NULL;
1634         int r;
1635
1636         assert(slice);
1637
1638         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1639         if (r < 0)
1640                 return r;
1641
1642         return cg_path_get_user_slice(cgroup, slice);
1643 }
1644
1645 char *cg_escape(const char *p) {
1646         bool need_prefix = false;
1647
1648         /* This implements very minimal escaping for names to be used
1649          * as file names in the cgroup tree: any name which might
1650          * conflict with a kernel name or is prefixed with '_' is
1651          * prefixed with a '_'. That way, when reading cgroup names it
1652          * is sufficient to remove a single prefixing underscore if
1653          * there is one. */
1654
1655         /* The return value of this function (unlike cg_unescape())
1656          * needs free()! */
1657
1658         if (p[0] == 0 ||
1659             p[0] == '_' ||
1660             p[0] == '.' ||
1661             streq(p, "notify_on_release") ||
1662             streq(p, "release_agent") ||
1663             streq(p, "tasks") ||
1664             startswith(p, "cgroup."))
1665                 need_prefix = true;
1666         else {
1667                 const char *dot;
1668
1669                 dot = strrchr(p, '.');
1670                 if (dot) {
1671                         CGroupController c;
1672                         size_t l = dot - p;
1673
1674                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1675                                 const char *n;
1676
1677                                 n = cgroup_controller_to_string(c);
1678
1679                                 if (l != strlen(n))
1680                                         continue;
1681
1682                                 if (memcmp(p, n, l) != 0)
1683                                         continue;
1684
1685                                 need_prefix = true;
1686                                 break;
1687                         }
1688                 }
1689         }
1690
1691         if (need_prefix)
1692                 return strappend("_", p);
1693
1694         return strdup(p);
1695 }
1696
1697 char *cg_unescape(const char *p) {
1698         assert(p);
1699
1700         /* The return value of this function (unlike cg_escape())
1701          * doesn't need free()! */
1702
1703         if (p[0] == '_')
1704                 return (char*) p+1;
1705
1706         return (char*) p;
1707 }
1708
1709 #define CONTROLLER_VALID                        \
1710         DIGITS LETTERS                          \
1711         "_"
1712
1713 bool cg_controller_is_valid(const char *p) {
1714         const char *t, *s;
1715
1716         if (!p)
1717                 return false;
1718
1719         s = startswith(p, "name=");
1720         if (s)
1721                 p = s;
1722
1723         if (*p == 0 || *p == '_')
1724                 return false;
1725
1726         for (t = p; *t; t++)
1727                 if (!strchr(CONTROLLER_VALID, *t))
1728                         return false;
1729
1730         if (t - p > FILENAME_MAX)
1731                 return false;
1732
1733         return true;
1734 }
1735
1736 int cg_slice_to_path(const char *unit, char **ret) {
1737         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1738         const char *dash;
1739         int r;
1740
1741         assert(unit);
1742         assert(ret);
1743
1744         if (streq(unit, "-.slice")) {
1745                 char *x;
1746
1747                 x = strdup("");
1748                 if (!x)
1749                         return -ENOMEM;
1750                 *ret = x;
1751                 return 0;
1752         }
1753
1754         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1755                 return -EINVAL;
1756
1757         if (!endswith(unit, ".slice"))
1758                 return -EINVAL;
1759
1760         r = unit_name_to_prefix(unit, &p);
1761         if (r < 0)
1762                 return r;
1763
1764         dash = strchr(p, '-');
1765
1766         /* Don't allow initial dashes */
1767         if (dash == p)
1768                 return -EINVAL;
1769
1770         while (dash) {
1771                 _cleanup_free_ char *escaped = NULL;
1772                 char n[dash - p + sizeof(".slice")];
1773
1774                 /* Don't allow trailing or double dashes */
1775                 if (dash[1] == 0 || dash[1] == '-')
1776                         return -EINVAL;
1777
1778                 strcpy(stpncpy(n, p, dash - p), ".slice");
1779                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1780                         return -EINVAL;
1781
1782                 escaped = cg_escape(n);
1783                 if (!escaped)
1784                         return -ENOMEM;
1785
1786                 if (!strextend(&s, escaped, "/", NULL))
1787                         return -ENOMEM;
1788
1789                 dash = strchr(dash+1, '-');
1790         }
1791
1792         e = cg_escape(unit);
1793         if (!e)
1794                 return -ENOMEM;
1795
1796         if (!strextend(&s, e, NULL))
1797                 return -ENOMEM;
1798
1799         *ret = s;
1800         s = NULL;
1801
1802         return 0;
1803 }
1804
1805 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1806         _cleanup_free_ char *p = NULL;
1807         int r;
1808
1809         r = cg_get_path(controller, path, attribute, &p);
1810         if (r < 0)
1811                 return r;
1812
1813         return write_string_file(p, value, 0);
1814 }
1815
1816 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1817         _cleanup_free_ char *p = NULL;
1818         int r;
1819
1820         r = cg_get_path(controller, path, attribute, &p);
1821         if (r < 0)
1822                 return r;
1823
1824         return read_one_line_file(p, ret);
1825 }
1826
1827 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1828         CGroupController c;
1829         int r, unified;
1830
1831         /* This one will create a cgroup in our private tree, but also
1832          * duplicate it in the trees specified in mask, and remove it
1833          * in all others */
1834
1835         /* First create the cgroup in our own hierarchy. */
1836         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1837         if (r < 0)
1838                 return r;
1839
1840         /* If we are in the unified hierarchy, we are done now */
1841         unified = cg_unified();
1842         if (unified < 0)
1843                 return unified;
1844         if (unified > 0)
1845                 return 0;
1846
1847         /* Otherwise, do the same in the other hierarchies */
1848         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1849                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1850                 const char *n;
1851
1852                 n = cgroup_controller_to_string(c);
1853
1854                 if (mask & bit)
1855                         (void) cg_create(n, path);
1856                 else if (supported & bit)
1857                         (void) cg_trim(n, path, true);
1858         }
1859
1860         return 0;
1861 }
1862
1863 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1864         CGroupController c;
1865         int r, unified;
1866
1867         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1868         if (r < 0)
1869                 return r;
1870
1871         unified = cg_unified();
1872         if (unified < 0)
1873                 return unified;
1874         if (unified > 0)
1875                 return 0;
1876
1877         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1878                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1879                 const char *p = NULL;
1880
1881                 if (!(supported & bit))
1882                         continue;
1883
1884                 if (path_callback)
1885                         p = path_callback(bit, userdata);
1886
1887                 if (!p)
1888                         p = path;
1889
1890                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
1891         }
1892
1893         return 0;
1894 }
1895
1896 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1897         Iterator i;
1898         void *pidp;
1899         int r = 0;
1900
1901         SET_FOREACH(pidp, pids, i) {
1902                 pid_t pid = PTR_TO_PID(pidp);
1903                 int q;
1904
1905                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1906                 if (q < 0 && r >= 0)
1907                         r = q;
1908         }
1909
1910         return r;
1911 }
1912
1913 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1914         CGroupController c;
1915         int r = 0, unified;
1916
1917         if (!path_equal(from, to))  {
1918                 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true);
1919                 if (r < 0)
1920                         return r;
1921         }
1922
1923         unified = cg_unified();
1924         if (unified < 0)
1925                 return unified;
1926         if (unified > 0)
1927                 return r;
1928
1929         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1930                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1931                 const char *p = NULL;
1932
1933                 if (!(supported & bit))
1934                         continue;
1935
1936                 if (to_callback)
1937                         p = to_callback(bit, userdata);
1938
1939                 if (!p)
1940                         p = to;
1941
1942                 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, false, false);
1943         }
1944
1945         return 0;
1946 }
1947
1948 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
1949         CGroupController c;
1950         int r, unified;
1951
1952         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
1953         if (r < 0)
1954                 return r;
1955
1956         unified = cg_unified();
1957         if (unified < 0)
1958                 return unified;
1959         if (unified > 0)
1960                 return r;
1961
1962         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1963                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1964
1965                 if (!(supported & bit))
1966                         continue;
1967
1968                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
1969         }
1970
1971         return 0;
1972 }
1973
1974 int cg_mask_supported(CGroupMask *ret) {
1975         CGroupMask mask = 0;
1976         int r, unified;
1977
1978         /* Determines the mask of supported cgroup controllers. Only
1979          * includes controllers we can make sense of and that are
1980          * actually accessible. */
1981
1982         unified = cg_unified();
1983         if (unified < 0)
1984                 return unified;
1985         if (unified > 0) {
1986                 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
1987                 const char *c;
1988
1989                 /* In the unified hierarchy we can read the supported
1990                  * and accessible controllers from a the top-level
1991                  * cgroup attribute */
1992
1993                 r = cg_get_root_path(&root);
1994                 if (r < 0)
1995                         return r;
1996
1997                 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
1998                 if (r < 0)
1999                         return r;
2000
2001                 r = read_one_line_file(path, &controllers);
2002                 if (r < 0)
2003                         return r;
2004
2005                 c = controllers;
2006                 for (;;) {
2007                         _cleanup_free_ char *n = NULL;
2008                         CGroupController v;
2009
2010                         r = extract_first_word(&c, &n, NULL, 0);
2011                         if (r < 0)
2012                                 return r;
2013                         if (r == 0)
2014                                 break;
2015
2016                         v = cgroup_controller_from_string(n);
2017                         if (v < 0)
2018                                 continue;
2019
2020                         mask |= CGROUP_CONTROLLER_TO_MASK(v);
2021                 }
2022
2023                 /* Currently, we only support the memory and pids
2024                  * controller in the unified hierarchy, mask
2025                  * everything else off. */
2026                 mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_PIDS;
2027
2028         } else {
2029                 CGroupController c;
2030
2031                 /* In the legacy hierarchy, we check whether which
2032                  * hierarchies are mounted. */
2033
2034                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2035                         const char *n;
2036
2037                         n = cgroup_controller_to_string(c);
2038                         if (controller_is_accessible(n) >= 0)
2039                                 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2040                 }
2041         }
2042
2043         *ret = mask;
2044         return 0;
2045 }
2046
2047 int cg_kernel_controllers(Set *controllers) {
2048         _cleanup_fclose_ FILE *f = NULL;
2049         char buf[LINE_MAX];
2050         int r;
2051
2052         assert(controllers);
2053
2054         /* Determines the full list of kernel-known controllers. Might
2055          * include controllers we don't actually support, arbitrary
2056          * named hierarchies and controllers that aren't currently
2057          * accessible (because not mounted). */
2058
2059         f = fopen("/proc/cgroups", "re");
2060         if (!f) {
2061                 if (errno == ENOENT)
2062                         return 0;
2063                 return -errno;
2064         }
2065
2066         /* Ignore the header line */
2067         (void) fgets(buf, sizeof(buf), f);
2068
2069         for (;;) {
2070                 char *controller;
2071                 int enabled = 0;
2072
2073                 errno = 0;
2074                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2075
2076                         if (feof(f))
2077                                 break;
2078
2079                         if (ferror(f) && errno != 0)
2080                                 return -errno;
2081
2082                         return -EBADMSG;
2083                 }
2084
2085                 if (!enabled) {
2086                         free(controller);
2087                         continue;
2088                 }
2089
2090                 if (!cg_controller_is_valid(controller)) {
2091                         free(controller);
2092                         return -EBADMSG;
2093                 }
2094
2095                 r = set_consume(controllers, controller);
2096                 if (r < 0)
2097                         return r;
2098         }
2099
2100         return 0;
2101 }
2102
2103 static thread_local int unified_cache = -1;
2104
2105 int cg_unified(void) {
2106         struct statfs fs;
2107
2108         /* Checks if we support the unified hierarchy. Returns an
2109          * error when the cgroup hierarchies aren't mounted yet or we
2110          * have any other trouble determining if the unified hierarchy
2111          * is supported. */
2112
2113         if (unified_cache >= 0)
2114                 return unified_cache;
2115
2116         if (statfs("/sys/fs/cgroup/", &fs) < 0)
2117                 return -errno;
2118
2119         if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC))
2120                 unified_cache = true;
2121         else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2122                 unified_cache = false;
2123         else
2124                 return -ENOEXEC;
2125
2126         return unified_cache;
2127 }
2128
2129 void cg_unified_flush(void) {
2130         unified_cache = -1;
2131 }
2132
2133 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2134         _cleanup_free_ char *fs = NULL;
2135         CGroupController c;
2136         int r, unified;
2137
2138         assert(p);
2139
2140         if (supported == 0)
2141                 return 0;
2142
2143         unified = cg_unified();
2144         if (unified < 0)
2145                 return unified;
2146         if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2147                 return 0;
2148
2149         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2150         if (r < 0)
2151                 return r;
2152
2153         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2154                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2155                 const char *n;
2156
2157                 if (!(supported & bit))
2158                         continue;
2159
2160                 n = cgroup_controller_to_string(c);
2161                 {
2162                         char s[1 + strlen(n) + 1];
2163
2164                         s[0] = mask & bit ? '+' : '-';
2165                         strcpy(s + 1, n);
2166
2167                         r = write_string_file(fs, s, 0);
2168                         if (r < 0)
2169                                 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2170                 }
2171         }
2172
2173         return 0;
2174 }
2175
2176 bool cg_is_unified_wanted(void) {
2177         static thread_local int wanted = -1;
2178         int r, unified;
2179
2180         /* If the hierarchy is already mounted, then follow whatever
2181          * was chosen for it. */
2182         unified = cg_unified();
2183         if (unified >= 0)
2184                 return unified;
2185
2186         /* Otherwise, let's see what the kernel command line has to
2187          * say. Since checking that is expensive, let's cache the
2188          * result. */
2189         if (wanted >= 0)
2190                 return wanted;
2191
2192         r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2193         if (r > 0)
2194                 return (wanted = true);
2195         else {
2196                 _cleanup_free_ char *value = NULL;
2197
2198                 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2199                 if (r < 0)
2200                         return false;
2201                 if (r == 0)
2202                         return (wanted = false);
2203
2204                 return (wanted = parse_boolean(value) > 0);
2205         }
2206 }
2207
2208 bool cg_is_legacy_wanted(void) {
2209         return !cg_is_unified_wanted();
2210 }
2211
2212 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2213         uint64_t u;
2214         int r;
2215
2216         if (isempty(s)) {
2217                 *ret = CGROUP_CPU_SHARES_INVALID;
2218                 return 0;
2219         }
2220
2221         r = safe_atou64(s, &u);
2222         if (r < 0)
2223                 return r;
2224
2225         if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2226                 return -ERANGE;
2227
2228         *ret = u;
2229         return 0;
2230 }
2231
2232 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2233         uint64_t u;
2234         int r;
2235
2236         if (isempty(s)) {
2237                 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2238                 return 0;
2239         }
2240
2241         r = safe_atou64(s, &u);
2242         if (r < 0)
2243                 return r;
2244
2245         if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2246                 return -ERANGE;
2247
2248         *ret = u;
2249         return 0;
2250 }
2251
2252 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2253         [CGROUP_CONTROLLER_CPU] = "cpu",
2254         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2255         [CGROUP_CONTROLLER_BLKIO] = "blkio",
2256         [CGROUP_CONTROLLER_MEMORY] = "memory",
2257         [CGROUP_CONTROLLER_DEVICES] = "devices",
2258         [CGROUP_CONTROLLER_PIDS] = "pids",
2259         [CGROUP_CONTROLLER_NET_CLS] = "net_cls",
2260 };
2261
2262 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);