src/basic/cgroup-util.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2010 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <dirent.h>
  23 #include <errno.h>
  24 #include <ftw.h>
  25 #include <signal.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28 #include <sys/stat.h>
  29 #include <sys/types.h>
  30 #include <unistd.h>
  31
  32 #include "extract-word.h"
  33 #include "fileio.h"
  34 #include "formats-util.h"
  35 #include "login-util.h"
  36 #include "macro.h"
  37 #include "mkdir.h"
  38 #include "path-util.h"
  39 #include "process-util.h"
  40 #include "set.h"
  41 #include "special.h"
  42 #include "unit-name.h"
  43 #include "util.h"
  44 #include "cgroup-util.h"
  45
  46 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
  47         _cleanup_free_ char *fs = NULL;
  48         FILE *f;
  49         int r;
  50
  51         assert(_f);
  52
  53         r = cg_get_path(controller, path, "cgroup.procs", &fs);
  54         if (r < 0)
  55                 return r;
  56
  57         f = fopen(fs, "re");
  58         if (!f)
  59                 return -errno;
  60
  61         *_f = f;
  62         return 0;
  63 }
  64
  65 int cg_read_pid(FILE *f, pid_t *_pid) {
  66         unsigned long ul;
  67
  68         /* Note that the cgroup.procs might contain duplicates! See
  69          * cgroups.txt for details. */
  70
  71         assert(f);
  72         assert(_pid);
  73
  74         errno = 0;
  75         if (fscanf(f, "%lu", &ul) != 1) {
  76
  77                 if (feof(f))
  78                         return 0;
  79
  80                 return errno ? -errno : -EIO;
  81         }
  82
  83         if (ul <= 0)
  84                 return -EIO;
  85
  86         *_pid = (pid_t) ul;
  87         return 1;
  88 }
  89
  90 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
  91         _cleanup_free_ char *fs = NULL;
  92         int r;
  93         DIR *d;
  94
  95         assert(_d);
  96
  97         /* This is not recursive! */
  98
  99         r = cg_get_path(controller, path, NULL, &fs);
 100         if (r < 0)
 101                 return r;
 102
 103         d = opendir(fs);
 104         if (!d)
 105                 return -errno;
 106
 107         *_d = d;
 108         return 0;
 109 }
 110
 111 int cg_read_subgroup(DIR *d, char **fn) {
 112         struct dirent *de;
 113
 114         assert(d);
 115         assert(fn);
 116
 117         FOREACH_DIRENT_ALL(de, d, return -errno) {
 118                 char *b;
 119
 120                 if (de->d_type != DT_DIR)
 121                         continue;
 122
 123                 if (streq(de->d_name, ".") ||
 124                     streq(de->d_name, ".."))
 125                         continue;
 126
 127                 b = strdup(de->d_name);
 128                 if (!b)
 129                         return -ENOMEM;
 130
 131                 *fn = b;
 132                 return 1;
 133         }
 134
 135         return 0;
 136 }
 137
 138 int cg_rmdir(const char *controller, const char *path) {
 139         _cleanup_free_ char *p = NULL;
 140         int r;
 141
 142         r = cg_get_path(controller, path, NULL, &p);
 143         if (r < 0)
 144                 return r;
 145
 146         r = rmdir(p);
 147         if (r < 0 && errno != ENOENT)
 148                 return -errno;
 149
 150         return 0;
 151 }
 152
 153 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
 154         _cleanup_set_free_ Set *allocated_set = NULL;
 155         bool done = false;
 156         int r, ret = 0;
 157         pid_t my_pid;
 158
 159         assert(sig >= 0);
 160
 161         /* This goes through the tasks list and kills them all. This
 162          * is repeated until no further processes are added to the
 163          * tasks list, to properly handle forking processes */
 164
 165         if (!s) {
 166                 s = allocated_set = set_new(NULL);
 167                 if (!s)
 168                         return -ENOMEM;
 169         }
 170
 171         my_pid = getpid();
 172
 173         do {
 174                 _cleanup_fclose_ FILE *f = NULL;
 175                 pid_t pid = 0;
 176                 done = true;
 177
 178                 r = cg_enumerate_processes(controller, path, &f);
 179                 if (r < 0) {
 180                         if (ret >= 0 && r != -ENOENT)
 181                                 return r;
 182
 183                         return ret;
 184                 }
 185
 186                 while ((r = cg_read_pid(f, &pid)) > 0) {
 187
 188                         if (ignore_self && pid == my_pid)
 189                                 continue;
 190
 191                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 192                                 continue;
 193
 194                         /* If we haven't killed this process yet, kill
 195                          * it */
 196                         if (kill(pid, sig) < 0) {
 197                                 if (ret >= 0 && errno != ESRCH)
 198                                         ret = -errno;
 199                         } else {
 200                                 if (sigcont && sig != SIGKILL)
 201                                         (void) kill(pid, SIGCONT);
 202
 203                                 if (ret == 0)
 204                                         ret = 1;
 205                         }
 206
 207                         done = false;
 208
 209                         r = set_put(s, PID_TO_PTR(pid));
 210                         if (r < 0) {
 211                                 if (ret >= 0)
 212                                         return r;
 213
 214                                 return ret;
 215                         }
 216                 }
 217
 218                 if (r < 0) {
 219                         if (ret >= 0)
 220                                 return r;
 221
 222                         return ret;
 223                 }
 224
 225                 /* To avoid racing against processes which fork
 226                  * quicker than we can kill them we repeat this until
 227                  * no new pids need to be killed. */
 228
 229         } while (!done);
 230
 231         return ret;
 232 }
 233
 234 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
 235         _cleanup_set_free_ Set *allocated_set = NULL;
 236         _cleanup_closedir_ DIR *d = NULL;
 237         int r, ret;
 238         char *fn;
 239
 240         assert(path);
 241         assert(sig >= 0);
 242
 243         if (!s) {
 244                 s = allocated_set = set_new(NULL);
 245                 if (!s)
 246                         return -ENOMEM;
 247         }
 248
 249         ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
 250
 251         r = cg_enumerate_subgroups(controller, path, &d);
 252         if (r < 0) {
 253                 if (ret >= 0 && r != -ENOENT)
 254                         return r;
 255
 256                 return ret;
 257         }
 258
 259         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 260                 _cleanup_free_ char *p = NULL;
 261
 262                 p = strjoin(path, "/", fn, NULL);
 263                 free(fn);
 264                 if (!p)
 265                         return -ENOMEM;
 266
 267                 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
 268                 if (r != 0 && ret >= 0)
 269                         ret = r;
 270         }
 271
 272         if (ret >= 0 && r < 0)
 273                 ret = r;
 274
 275         if (rem) {
 276                 r = cg_rmdir(controller, path);
 277                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 278                         return r;
 279         }
 280
 281         return ret;
 282 }
 283
 284 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
 285         bool done = false;
 286         _cleanup_set_free_ Set *s = NULL;
 287         int r, ret = 0;
 288         pid_t my_pid;
 289
 290         assert(cfrom);
 291         assert(pfrom);
 292         assert(cto);
 293         assert(pto);
 294
 295         s = set_new(NULL);
 296         if (!s)
 297                 return -ENOMEM;
 298
 299         my_pid = getpid();
 300
 301         do {
 302                 _cleanup_fclose_ FILE *f = NULL;
 303                 pid_t pid = 0;
 304                 done = true;
 305
 306                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 307                 if (r < 0) {
 308                         if (ret >= 0 && r != -ENOENT)
 309                                 return r;
 310
 311                         return ret;
 312                 }
 313
 314                 while ((r = cg_read_pid(f, &pid)) > 0) {
 315
 316                         /* This might do weird stuff if we aren't a
 317                          * single-threaded program. However, we
 318                          * luckily know we are not */
 319                         if (ignore_self && pid == my_pid)
 320                                 continue;
 321
 322                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 323                                 continue;
 324
 325                         /* Ignore kernel threads. Since they can only
 326                          * exist in the root cgroup, we only check for
 327                          * them there. */
 328                         if (cfrom &&
 329                             (isempty(pfrom) || path_equal(pfrom, "/")) &&
 330                             is_kernel_thread(pid) > 0)
 331                                 continue;
 332
 333                         r = cg_attach(cto, pto, pid);
 334                         if (r < 0) {
 335                                 if (ret >= 0 && r != -ESRCH)
 336                                         ret = r;
 337                         } else if (ret == 0)
 338                                 ret = 1;
 339
 340                         done = false;
 341
 342                         r = set_put(s, PID_TO_PTR(pid));
 343                         if (r < 0) {
 344                                 if (ret >= 0)
 345                                         return r;
 346
 347                                 return ret;
 348                         }
 349                 }
 350
 351                 if (r < 0) {
 352                         if (ret >= 0)
 353                                 return r;
 354
 355                         return ret;
 356                 }
 357         } while (!done);
 358
 359         return ret;
 360 }
 361
 362 int cg_migrate_recursive(
 363                 const char *cfrom,
 364                 const char *pfrom,
 365                 const char *cto,
 366                 const char *pto,
 367                 bool ignore_self,
 368                 bool rem) {
 369
 370         _cleanup_closedir_ DIR *d = NULL;
 371         int r, ret = 0;
 372         char *fn;
 373
 374         assert(cfrom);
 375         assert(pfrom);
 376         assert(cto);
 377         assert(pto);
 378
 379         ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
 380
 381         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 382         if (r < 0) {
 383                 if (ret >= 0 && r != -ENOENT)
 384                         return r;
 385
 386                 return ret;
 387         }
 388
 389         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 390                 _cleanup_free_ char *p = NULL;
 391
 392                 p = strjoin(pfrom, "/", fn, NULL);
 393                 free(fn);
 394                 if (!p)
 395                         return -ENOMEM;
 396
 397                 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
 398                 if (r != 0 && ret >= 0)
 399                         ret = r;
 400         }
 401
 402         if (r < 0 && ret >= 0)
 403                 ret = r;
 404
 405         if (rem) {
 406                 r = cg_rmdir(cfrom, pfrom);
 407                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 408                         return r;
 409         }
 410
 411         return ret;
 412 }
 413
 414 int cg_migrate_recursive_fallback(
 415                 const char *cfrom,
 416                 const char *pfrom,
 417                 const char *cto,
 418                 const char *pto,
 419                 bool ignore_self,
 420                 bool rem) {
 421
 422         int r;
 423
 424         assert(cfrom);
 425         assert(pfrom);
 426         assert(cto);
 427         assert(pto);
 428
 429         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem);
 430         if (r < 0) {
 431                 char prefix[strlen(pto) + 1];
 432
 433                 /* This didn't work? Then let's try all prefixes of the destination */
 434
 435                 PATH_FOREACH_PREFIX(prefix, pto) {
 436                         int q;
 437
 438                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem);
 439                         if (q >= 0)
 440                                 return q;
 441                 }
 442         }
 443
 444         return r;
 445 }
 446
 447 static const char *controller_to_dirname(const char *controller) {
 448         const char *e;
 449
 450         assert(controller);
 451
 452         /* Converts a controller name to the directory name below
 453          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
 454          * just cuts off the name= prefixed used for named
 455          * hierarchies, if it is specified. */
 456
 457         e = startswith(controller, "name=");
 458         if (e)
 459                 return e;
 460
 461         return controller;
 462 }
 463
 464 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
 465         const char *dn;
 466         char *t = NULL;
 467
 468         assert(fs);
 469         assert(controller);
 470
 471         dn = controller_to_dirname(controller);
 472
 473         if (isempty(path) && isempty(suffix))
 474                 t = strappend("/sys/fs/cgroup/", dn);
 475         else if (isempty(path))
 476                 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL);
 477         else if (isempty(suffix))
 478                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL);
 479         else
 480                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL);
 481         if (!t)
 482                 return -ENOMEM;
 483
 484         *fs = t;
 485         return 0;
 486 }
 487
 488 static int join_path_unified(const char *path, const char *suffix, char **fs) {
 489         char *t;
 490
 491         assert(fs);
 492
 493         if (isempty(path) && isempty(suffix))
 494                 t = strdup("/sys/fs/cgroup");
 495         else if (isempty(path))
 496                 t = strappend("/sys/fs/cgroup/", suffix);
 497         else if (isempty(suffix))
 498                 t = strappend("/sys/fs/cgroup/", path);
 499         else
 500                 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
 501         if (!t)
 502                 return -ENOMEM;
 503
 504         *fs = t;
 505         return 0;
 506 }
 507
 508 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
 509         int unified, r;
 510
 511         assert(fs);
 512
 513         if (!controller) {
 514                 char *t;
 515
 516                 /* If no controller is specified, we return the path
 517                  * *below* the controllers, without any prefix. */
 518
 519                 if (!path && !suffix)
 520                         return -EINVAL;
 521
 522                 if (!suffix)
 523                         t = strdup(path);
 524                 else if (!path)
 525                         t = strdup(suffix);
 526                 else
 527                         t = strjoin(path, "/", suffix, NULL);
 528                 if (!t)
 529                         return -ENOMEM;
 530
 531                 *fs = path_kill_slashes(t);
 532                 return 0;
 533         }
 534
 535         if (!cg_controller_is_valid(controller))
 536                 return -EINVAL;
 537
 538         unified = cg_unified();
 539         if (unified < 0)
 540                 return unified;
 541
 542         if (unified > 0)
 543                 r = join_path_unified(path, suffix, fs);
 544         else
 545                 r = join_path_legacy(controller, path, suffix, fs);
 546         if (r < 0)
 547                 return r;
 548
 549         path_kill_slashes(*fs);
 550         return 0;
 551 }
 552
 553 static int controller_is_accessible(const char *controller) {
 554         int unified;
 555
 556         assert(controller);
 557
 558         /* Checks whether a specific controller is accessible,
 559          * i.e. its hierarchy mounted. In the unified hierarchy all
 560          * controllers are considered accessible, except for the named
 561          * hierarchies */
 562
 563         if (!cg_controller_is_valid(controller))
 564                 return -EINVAL;
 565
 566         unified = cg_unified();
 567         if (unified < 0)
 568                 return unified;
 569         if (unified > 0) {
 570                 /* We don't support named hierarchies if we are using
 571                  * the unified hierarchy. */
 572
 573                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
 574                         return 0;
 575
 576                 if (startswith(controller, "name="))
 577                         return -EOPNOTSUPP;
 578
 579         } else {
 580                 const char *cc, *dn;
 581
 582                 dn = controller_to_dirname(controller);
 583                 cc = strjoina("/sys/fs/cgroup/", dn);
 584
 585                 if (laccess(cc, F_OK) < 0)
 586                         return -errno;
 587         }
 588
 589         return 0;
 590 }
 591
 592 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
 593         int r;
 594
 595         assert(controller);
 596         assert(fs);
 597
 598         /* Check if the specified controller is actually accessible */
 599         r = controller_is_accessible(controller);
 600         if (r < 0)
 601                 return r;
 602
 603         return cg_get_path(controller, path, suffix, fs);
 604 }
 605
 606 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 607         assert(path);
 608         assert(sb);
 609         assert(ftwbuf);
 610
 611         if (typeflag != FTW_DP)
 612                 return 0;
 613
 614         if (ftwbuf->level < 1)
 615                 return 0;
 616
 617         (void) rmdir(path);
 618         return 0;
 619 }
 620
 621 int cg_trim(const char *controller, const char *path, bool delete_root) {
 622         _cleanup_free_ char *fs = NULL;
 623         int r = 0;
 624
 625         assert(path);
 626
 627         r = cg_get_path(controller, path, NULL, &fs);
 628         if (r < 0)
 629                 return r;
 630
 631         errno = 0;
 632         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 633                 if (errno == ENOENT)
 634                         r = 0;
 635                 else if (errno != 0)
 636                         r = -errno;
 637                 else
 638                         r = -EIO;
 639         }
 640
 641         if (delete_root) {
 642                 if (rmdir(fs) < 0 && errno != ENOENT)
 643                         return -errno;
 644         }
 645
 646         return r;
 647 }
 648
 649 int cg_create(const char *controller, const char *path) {
 650         _cleanup_free_ char *fs = NULL;
 651         int r;
 652
 653         r = cg_get_path_and_check(controller, path, NULL, &fs);
 654         if (r < 0)
 655                 return r;
 656
 657         r = mkdir_parents(fs, 0755);
 658         if (r < 0)
 659                 return r;
 660
 661         if (mkdir(fs, 0755) < 0) {
 662
 663                 if (errno == EEXIST)
 664                         return 0;
 665
 666                 return -errno;
 667         }
 668
 669         return 1;
 670 }
 671
 672 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 673         int r, q;
 674
 675         assert(pid >= 0);
 676
 677         r = cg_create(controller, path);
 678         if (r < 0)
 679                 return r;
 680
 681         q = cg_attach(controller, path, pid);
 682         if (q < 0)
 683                 return q;
 684
 685         /* This does not remove the cgroup on failure */
 686         return r;
 687 }
 688
 689 int cg_attach(const char *controller, const char *path, pid_t pid) {
 690         _cleanup_free_ char *fs = NULL;
 691         char c[DECIMAL_STR_MAX(pid_t) + 2];
 692         int r;
 693
 694         assert(path);
 695         assert(pid >= 0);
 696
 697         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 698         if (r < 0)
 699                 return r;
 700
 701         if (pid == 0)
 702                 pid = getpid();
 703
 704         snprintf(c, sizeof(c), PID_FMT"\n", pid);
 705
 706         return write_string_file(fs, c, 0);
 707 }
 708
 709 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 710         int r;
 711
 712         assert(controller);
 713         assert(path);
 714         assert(pid >= 0);
 715
 716         r = cg_attach(controller, path, pid);
 717         if (r < 0) {
 718                 char prefix[strlen(path) + 1];
 719
 720                 /* This didn't work? Then let's try all prefixes of
 721                  * the destination */
 722
 723                 PATH_FOREACH_PREFIX(prefix, path) {
 724                         int q;
 725
 726                         q = cg_attach(controller, prefix, pid);
 727                         if (q >= 0)
 728                                 return q;
 729                 }
 730         }
 731
 732         return r;
 733 }
 734
 735 int cg_set_group_access(
 736                 const char *controller,
 737                 const char *path,
 738                 mode_t mode,
 739                 uid_t uid,
 740                 gid_t gid) {
 741
 742         _cleanup_free_ char *fs = NULL;
 743         int r;
 744
 745         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 746                 return 0;
 747
 748         if (mode != MODE_INVALID)
 749                 mode &= 0777;
 750
 751         r = cg_get_path(controller, path, NULL, &fs);
 752         if (r < 0)
 753                 return r;
 754
 755         return chmod_and_chown(fs, mode, uid, gid);
 756 }
 757
 758 int cg_set_task_access(
 759                 const char *controller,
 760                 const char *path,
 761                 mode_t mode,
 762                 uid_t uid,
 763                 gid_t gid) {
 764
 765         _cleanup_free_ char *fs = NULL, *procs = NULL;
 766         int r, unified;
 767
 768         assert(path);
 769
 770         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 771                 return 0;
 772
 773         if (mode != MODE_INVALID)
 774                 mode &= 0666;
 775
 776         r = cg_get_path(controller, path, "cgroup.procs", &fs);
 777         if (r < 0)
 778                 return r;
 779
 780         r = chmod_and_chown(fs, mode, uid, gid);
 781         if (r < 0)
 782                 return r;
 783
 784         unified = cg_unified();
 785         if (unified < 0)
 786                 return unified;
 787         if (unified)
 788                 return 0;
 789
 790         /* Compatibility, Always keep values for "tasks" in sync with
 791          * "cgroup.procs" */
 792         if (cg_get_path(controller, path, "tasks", &procs) >= 0)
 793                 (void) chmod_and_chown(procs, mode, uid, gid);
 794
 795         return 0;
 796 }
 797
 798 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
 799         _cleanup_fclose_ FILE *f = NULL;
 800         char line[LINE_MAX];
 801         const char *fs;
 802         size_t cs = 0;
 803         int unified;
 804
 805         assert(path);
 806         assert(pid >= 0);
 807
 808         unified = cg_unified();
 809         if (unified < 0)
 810                 return unified;
 811         if (unified == 0) {
 812                 if (controller) {
 813                         if (!cg_controller_is_valid(controller))
 814                                 return -EINVAL;
 815                 } else
 816                         controller = SYSTEMD_CGROUP_CONTROLLER;
 817
 818                 cs = strlen(controller);
 819         }
 820
 821         fs = procfs_file_alloca(pid, "cgroup");
 822         f = fopen(fs, "re");
 823         if (!f)
 824                 return errno == ENOENT ? -ESRCH : -errno;
 825
 826         FOREACH_LINE(line, f, return -errno) {
 827                 char *e, *p;
 828
 829                 truncate_nl(line);
 830
 831                 if (unified) {
 832                         e = startswith(line, "0:");
 833                         if (!e)
 834                                 continue;
 835
 836                         e = strchr(e, ':');
 837                         if (!e)
 838                                 continue;
 839                 } else {
 840                         char *l;
 841                         size_t k;
 842                         const char *word, *state;
 843                         bool found = false;
 844
 845                         l = strchr(line, ':');
 846                         if (!l)
 847                                 continue;
 848
 849                         l++;
 850                         e = strchr(l, ':');
 851                         if (!e)
 852                                 continue;
 853
 854                         *e = 0;
 855                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
 856                                 if (k == cs && memcmp(word, controller, cs) == 0) {
 857                                         found = true;
 858                                         break;
 859                                 }
 860                         }
 861
 862                         if (!found)
 863                                 continue;
 864                 }
 865
 866                 p = strdup(e + 1);
 867                 if (!p)
 868                         return -ENOMEM;
 869
 870                 *path = p;
 871                 return 0;
 872         }
 873
 874         return -ENODATA;
 875 }
 876
 877 int cg_install_release_agent(const char *controller, const char *agent) {
 878         _cleanup_free_ char *fs = NULL, *contents = NULL;
 879         const char *sc;
 880         int r, unified;
 881
 882         assert(agent);
 883
 884         unified = cg_unified();
 885         if (unified < 0)
 886                 return unified;
 887         if (unified) /* doesn't apply to unified hierarchy */
 888                 return -EOPNOTSUPP;
 889
 890         r = cg_get_path(controller, NULL, "release_agent", &fs);
 891         if (r < 0)
 892                 return r;
 893
 894         r = read_one_line_file(fs, &contents);
 895         if (r < 0)
 896                 return r;
 897
 898         sc = strstrip(contents);
 899         if (isempty(sc)) {
 900                 r = write_string_file(fs, agent, 0);
 901                 if (r < 0)
 902                         return r;
 903         } else if (!path_equal(sc, agent))
 904                 return -EEXIST;
 905
 906         fs = mfree(fs);
 907         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 908         if (r < 0)
 909                 return r;
 910
 911         contents = mfree(contents);
 912         r = read_one_line_file(fs, &contents);
 913         if (r < 0)
 914                 return r;
 915
 916         sc = strstrip(contents);
 917         if (streq(sc, "0")) {
 918                 r = write_string_file(fs, "1", 0);
 919                 if (r < 0)
 920                         return r;
 921
 922                 return 1;
 923         }
 924
 925         if (!streq(sc, "1"))
 926                 return -EIO;
 927
 928         return 0;
 929 }
 930
 931 int cg_uninstall_release_agent(const char *controller) {
 932         _cleanup_free_ char *fs = NULL;
 933         int r, unified;
 934
 935         unified = cg_unified();
 936         if (unified < 0)
 937                 return unified;
 938         if (unified) /* Doesn't apply to unified hierarchy */
 939                 return -EOPNOTSUPP;
 940
 941         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 942         if (r < 0)
 943                 return r;
 944
 945         r = write_string_file(fs, "0", 0);
 946         if (r < 0)
 947                 return r;
 948
 949         fs = mfree(fs);
 950
 951         r = cg_get_path(controller, NULL, "release_agent", &fs);
 952         if (r < 0)
 953                 return r;
 954
 955         r = write_string_file(fs, "", 0);
 956         if (r < 0)
 957                 return r;
 958
 959         return 0;
 960 }
 961
 962 int cg_is_empty(const char *controller, const char *path) {
 963         _cleanup_fclose_ FILE *f = NULL;
 964         pid_t pid;
 965         int r;
 966
 967         assert(path);
 968
 969         r = cg_enumerate_processes(controller, path, &f);
 970         if (r == -ENOENT)
 971                 return 1;
 972         if (r < 0)
 973                 return r;
 974
 975         r = cg_read_pid(f, &pid);
 976         if (r < 0)
 977                 return r;
 978
 979         return r == 0;
 980 }
 981
 982 int cg_is_empty_recursive(const char *controller, const char *path) {
 983         int unified, r;
 984
 985         assert(path);
 986
 987         /* The root cgroup is always populated */
 988         if (controller && (isempty(path) || path_equal(path, "/")))
 989                 return false;
 990
 991         unified = cg_unified();
 992         if (unified < 0)
 993                 return unified;
 994
 995         if (unified > 0) {
 996                 _cleanup_free_ char *populated = NULL, *t = NULL;
 997
 998                 /* On the unified hierarchy we can check empty state
 999                  * via the "cgroup.populated" attribute. */
1000
1001                 r = cg_get_path(controller, path, "cgroup.populated", &populated);
1002                 if (r < 0)
1003                         return r;
1004
1005                 r = read_one_line_file(populated, &t);
1006                 if (r == -ENOENT)
1007                         return 1;
1008                 if (r < 0)
1009                         return r;
1010
1011                 return streq(t, "0");
1012         } else {
1013                 _cleanup_closedir_ DIR *d = NULL;
1014                 char *fn;
1015
1016                 r = cg_is_empty(controller, path);
1017                 if (r <= 0)
1018                         return r;
1019
1020                 r = cg_enumerate_subgroups(controller, path, &d);
1021                 if (r == -ENOENT)
1022                         return 1;
1023                 if (r < 0)
1024                         return r;
1025
1026                 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1027                         _cleanup_free_ char *p = NULL;
1028
1029                         p = strjoin(path, "/", fn, NULL);
1030                         free(fn);
1031                         if (!p)
1032                                 return -ENOMEM;
1033
1034                         r = cg_is_empty_recursive(controller, p);
1035                         if (r <= 0)
1036                                 return r;
1037                 }
1038                 if (r < 0)
1039                         return r;
1040
1041                 return true;
1042         }
1043 }
1044
1045 int cg_split_spec(const char *spec, char **controller, char **path) {
1046         char *t = NULL, *u = NULL;
1047         const char *e;
1048
1049         assert(spec);
1050
1051         if (*spec == '/') {
1052                 if (!path_is_safe(spec))
1053                         return -EINVAL;
1054
1055                 if (path) {
1056                         t = strdup(spec);
1057                         if (!t)
1058                                 return -ENOMEM;
1059
1060                         *path = path_kill_slashes(t);
1061                 }
1062
1063                 if (controller)
1064                         *controller = NULL;
1065
1066                 return 0;
1067         }
1068
1069         e = strchr(spec, ':');
1070         if (!e) {
1071                 if (!cg_controller_is_valid(spec))
1072                         return -EINVAL;
1073
1074                 if (controller) {
1075                         t = strdup(spec);
1076                         if (!t)
1077                                 return -ENOMEM;
1078
1079                         *controller = t;
1080                 }
1081
1082                 if (path)
1083                         *path = NULL;
1084
1085                 return 0;
1086         }
1087
1088         t = strndup(spec, e-spec);
1089         if (!t)
1090                 return -ENOMEM;
1091         if (!cg_controller_is_valid(t)) {
1092                 free(t);
1093                 return -EINVAL;
1094         }
1095
1096         if (isempty(e+1))
1097                 u = NULL;
1098         else {
1099                 u = strdup(e+1);
1100                 if (!u) {
1101                         free(t);
1102                         return -ENOMEM;
1103                 }
1104
1105                 if (!path_is_safe(u) ||
1106                     !path_is_absolute(u)) {
1107                         free(t);
1108                         free(u);
1109                         return -EINVAL;
1110                 }
1111
1112                 path_kill_slashes(u);
1113         }
1114
1115         if (controller)
1116                 *controller = t;
1117         else
1118                 free(t);
1119
1120         if (path)
1121                 *path = u;
1122         else
1123                 free(u);
1124
1125         return 0;
1126 }
1127
1128 int cg_mangle_path(const char *path, char **result) {
1129         _cleanup_free_ char *c = NULL, *p = NULL;
1130         char *t;
1131         int r;
1132
1133         assert(path);
1134         assert(result);
1135
1136         /* First, check if it already is a filesystem path */
1137         if (path_startswith(path, "/sys/fs/cgroup")) {
1138
1139                 t = strdup(path);
1140                 if (!t)
1141                         return -ENOMEM;
1142
1143                 *result = path_kill_slashes(t);
1144                 return 0;
1145         }
1146
1147         /* Otherwise, treat it as cg spec */
1148         r = cg_split_spec(path, &c, &p);
1149         if (r < 0)
1150                 return r;
1151
1152         return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1153 }
1154
1155 int cg_get_root_path(char **path) {
1156         char *p, *e;
1157         int r;
1158
1159         assert(path);
1160
1161         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1162         if (r < 0)
1163                 return r;
1164
1165         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1166         if (!e)
1167                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1168         if (!e)
1169                 e = endswith(p, "/system"); /* even more legacy */
1170         if (e)
1171                 *e = 0;
1172
1173         *path = p;
1174         return 0;
1175 }
1176
1177 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1178         _cleanup_free_ char *rt = NULL;
1179         char *p;
1180         int r;
1181
1182         assert(cgroup);
1183         assert(shifted);
1184
1185         if (!root) {
1186                 /* If the root was specified let's use that, otherwise
1187                  * let's determine it from PID 1 */
1188
1189                 r = cg_get_root_path(&rt);
1190                 if (r < 0)
1191                         return r;
1192
1193                 root = rt;
1194         }
1195
1196         p = path_startswith(cgroup, root);
1197         if (p && p > cgroup)
1198                 *shifted = p - 1;
1199         else
1200                 *shifted = cgroup;
1201
1202         return 0;
1203 }
1204
1205 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1206         _cleanup_free_ char *raw = NULL;
1207         const char *c;
1208         int r;
1209
1210         assert(pid >= 0);
1211         assert(cgroup);
1212
1213         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1214         if (r < 0)
1215                 return r;
1216
1217         r = cg_shift_path(raw, root, &c);
1218         if (r < 0)
1219                 return r;
1220
1221         if (c == raw) {
1222                 *cgroup = raw;
1223                 raw = NULL;
1224         } else {
1225                 char *n;
1226
1227                 n = strdup(c);
1228                 if (!n)
1229                         return -ENOMEM;
1230
1231                 *cgroup = n;
1232         }
1233
1234         return 0;
1235 }
1236
1237 int cg_path_decode_unit(const char *cgroup, char **unit){
1238         char *c, *s;
1239         size_t n;
1240
1241         assert(cgroup);
1242         assert(unit);
1243
1244         n = strcspn(cgroup, "/");
1245         if (n < 3)
1246                 return -ENXIO;
1247
1248         c = strndupa(cgroup, n);
1249         c = cg_unescape(c);
1250
1251         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1252                 return -ENXIO;
1253
1254         s = strdup(c);
1255         if (!s)
1256                 return -ENOMEM;
1257
1258         *unit = s;
1259         return 0;
1260 }
1261
1262 static bool valid_slice_name(const char *p, size_t n) {
1263
1264         if (!p)
1265                 return false;
1266
1267         if (n < strlen("x.slice"))
1268                 return false;
1269
1270         if (memcmp(p + n - 6, ".slice", 6) == 0) {
1271                 char buf[n+1], *c;
1272
1273                 memcpy(buf, p, n);
1274                 buf[n] = 0;
1275
1276                 c = cg_unescape(buf);
1277
1278                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1279         }
1280
1281         return false;
1282 }
1283
1284 static const char *skip_slices(const char *p) {
1285         assert(p);
1286
1287         /* Skips over all slice assignments */
1288
1289         for (;;) {
1290                 size_t n;
1291
1292                 p += strspn(p, "/");
1293
1294                 n = strcspn(p, "/");
1295                 if (!valid_slice_name(p, n))
1296                         return p;
1297
1298                 p += n;
1299         }
1300 }
1301
1302 int cg_path_get_unit(const char *path, char **ret) {
1303         const char *e;
1304         char *unit;
1305         int r;
1306
1307         assert(path);
1308         assert(ret);
1309
1310         e = skip_slices(path);
1311
1312         r = cg_path_decode_unit(e, &unit);
1313         if (r < 0)
1314                 return r;
1315
1316         /* We skipped over the slices, don't accept any now */
1317         if (endswith(unit, ".slice")) {
1318                 free(unit);
1319                 return -ENXIO;
1320         }
1321
1322         *ret = unit;
1323         return 0;
1324 }
1325
1326 int cg_pid_get_unit(pid_t pid, char **unit) {
1327         _cleanup_free_ char *cgroup = NULL;
1328         int r;
1329
1330         assert(unit);
1331
1332         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1333         if (r < 0)
1334                 return r;
1335
1336         return cg_path_get_unit(cgroup, unit);
1337 }
1338
1339 /**
1340  * Skip session-*.scope, but require it to be there.
1341  */
1342 static const char *skip_session(const char *p) {
1343         size_t n;
1344
1345         if (isempty(p))
1346                 return NULL;
1347
1348         p += strspn(p, "/");
1349
1350         n = strcspn(p, "/");
1351         if (n < strlen("session-x.scope"))
1352                 return NULL;
1353
1354         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1355                 char buf[n - 8 - 6 + 1];
1356
1357                 memcpy(buf, p + 8, n - 8 - 6);
1358                 buf[n - 8 - 6] = 0;
1359
1360                 /* Note that session scopes never need unescaping,
1361                  * since they cannot conflict with the kernel's own
1362                  * names, hence we don't need to call cg_unescape()
1363                  * here. */
1364
1365                 if (!session_id_valid(buf))
1366                         return false;
1367
1368                 p += n;
1369                 p += strspn(p, "/");
1370                 return p;
1371         }
1372
1373         return NULL;
1374 }
1375
1376 /**
1377  * Skip user@*.service, but require it to be there.
1378  */
1379 static const char *skip_user_manager(const char *p) {
1380         size_t n;
1381
1382         if (isempty(p))
1383                 return NULL;
1384
1385         p += strspn(p, "/");
1386
1387         n = strcspn(p, "/");
1388         if (n < strlen("user@x.service"))
1389                 return NULL;
1390
1391         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1392                 char buf[n - 5 - 8 + 1];
1393
1394                 memcpy(buf, p + 5, n - 5 - 8);
1395                 buf[n - 5 - 8] = 0;
1396
1397                 /* Note that user manager services never need unescaping,
1398                  * since they cannot conflict with the kernel's own
1399                  * names, hence we don't need to call cg_unescape()
1400                  * here. */
1401
1402                 if (parse_uid(buf, NULL) < 0)
1403                         return NULL;
1404
1405                 p += n;
1406                 p += strspn(p, "/");
1407
1408                 return p;
1409         }
1410
1411         return NULL;
1412 }
1413
1414 static const char *skip_user_prefix(const char *path) {
1415         const char *e, *t;
1416
1417         assert(path);
1418
1419         /* Skip slices, if there are any */
1420         e = skip_slices(path);
1421
1422         /* Skip the user manager, if it's in the path now... */
1423         t = skip_user_manager(e);
1424         if (t)
1425                 return t;
1426
1427         /* Alternatively skip the user session if it is in the path... */
1428         return skip_session(e);
1429 }
1430
1431 int cg_path_get_user_unit(const char *path, char **ret) {
1432         const char *t;
1433
1434         assert(path);
1435         assert(ret);
1436
1437         t = skip_user_prefix(path);
1438         if (!t)
1439                 return -ENXIO;
1440
1441         /* And from here on it looks pretty much the same as for a
1442          * system unit, hence let's use the same parser from here
1443          * on. */
1444         return cg_path_get_unit(t, ret);
1445 }
1446
1447 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1448         _cleanup_free_ char *cgroup = NULL;
1449         int r;
1450
1451         assert(unit);
1452
1453         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1454         if (r < 0)
1455                 return r;
1456
1457         return cg_path_get_user_unit(cgroup, unit);
1458 }
1459
1460 int cg_path_get_machine_name(const char *path, char **machine) {
1461         _cleanup_free_ char *u = NULL;
1462         const char *sl;
1463         int r;
1464
1465         r = cg_path_get_unit(path, &u);
1466         if (r < 0)
1467                 return r;
1468
1469         sl = strjoina("/run/systemd/machines/unit:", u);
1470         return readlink_malloc(sl, machine);
1471 }
1472
1473 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1474         _cleanup_free_ char *cgroup = NULL;
1475         int r;
1476
1477         assert(machine);
1478
1479         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1480         if (r < 0)
1481                 return r;
1482
1483         return cg_path_get_machine_name(cgroup, machine);
1484 }
1485
1486 int cg_path_get_session(const char *path, char **session) {
1487         _cleanup_free_ char *unit = NULL;
1488         char *start, *end;
1489         int r;
1490
1491         assert(path);
1492
1493         r = cg_path_get_unit(path, &unit);
1494         if (r < 0)
1495                 return r;
1496
1497         start = startswith(unit, "session-");
1498         if (!start)
1499                 return -ENXIO;
1500         end = endswith(start, ".scope");
1501         if (!end)
1502                 return -ENXIO;
1503
1504         *end = 0;
1505         if (!session_id_valid(start))
1506                 return -ENXIO;
1507
1508         if (session) {
1509                 char *rr;
1510
1511                 rr = strdup(start);
1512                 if (!rr)
1513                         return -ENOMEM;
1514
1515                 *session = rr;
1516         }
1517
1518         return 0;
1519 }
1520
1521 int cg_pid_get_session(pid_t pid, char **session) {
1522         _cleanup_free_ char *cgroup = NULL;
1523         int r;
1524
1525         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1526         if (r < 0)
1527                 return r;
1528
1529         return cg_path_get_session(cgroup, session);
1530 }
1531
1532 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1533         _cleanup_free_ char *slice = NULL;
1534         char *start, *end;
1535         int r;
1536
1537         assert(path);
1538
1539         r = cg_path_get_slice(path, &slice);
1540         if (r < 0)
1541                 return r;
1542
1543         start = startswith(slice, "user-");
1544         if (!start)
1545                 return -ENXIO;
1546         end = endswith(start, ".slice");
1547         if (!end)
1548                 return -ENXIO;
1549
1550         *end = 0;
1551         if (parse_uid(start, uid) < 0)
1552                 return -ENXIO;
1553
1554         return 0;
1555 }
1556
1557 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1558         _cleanup_free_ char *cgroup = NULL;
1559         int r;
1560
1561         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1562         if (r < 0)
1563                 return r;
1564
1565         return cg_path_get_owner_uid(cgroup, uid);
1566 }
1567
1568 int cg_path_get_slice(const char *p, char **slice) {
1569         const char *e = NULL;
1570
1571         assert(p);
1572         assert(slice);
1573
1574         /* Finds the right-most slice unit from the beginning, but
1575          * stops before we come to the first non-slice unit. */
1576
1577         for (;;) {
1578                 size_t n;
1579
1580                 p += strspn(p, "/");
1581
1582                 n = strcspn(p, "/");
1583                 if (!valid_slice_name(p, n)) {
1584
1585                         if (!e) {
1586                                 char *s;
1587
1588                                 s = strdup("-.slice");
1589                                 if (!s)
1590                                         return -ENOMEM;
1591
1592                                 *slice = s;
1593                                 return 0;
1594                         }
1595
1596                         return cg_path_decode_unit(e, slice);
1597                 }
1598
1599                 e = p;
1600                 p += n;
1601         }
1602 }
1603
1604 int cg_pid_get_slice(pid_t pid, char **slice) {
1605         _cleanup_free_ char *cgroup = NULL;
1606         int r;
1607
1608         assert(slice);
1609
1610         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1611         if (r < 0)
1612                 return r;
1613
1614         return cg_path_get_slice(cgroup, slice);
1615 }
1616
1617 int cg_path_get_user_slice(const char *p, char **slice) {
1618         const char *t;
1619         assert(p);
1620         assert(slice);
1621
1622         t = skip_user_prefix(p);
1623         if (!t)
1624                 return -ENXIO;
1625
1626         /* And now it looks pretty much the same as for a system
1627          * slice, so let's just use the same parser from here on. */
1628         return cg_path_get_slice(t, slice);
1629 }
1630
1631 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1632         _cleanup_free_ char *cgroup = NULL;
1633         int r;
1634
1635         assert(slice);
1636
1637         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1638         if (r < 0)
1639                 return r;
1640
1641         return cg_path_get_user_slice(cgroup, slice);
1642 }
1643
1644 char *cg_escape(const char *p) {
1645         bool need_prefix = false;
1646
1647         /* This implements very minimal escaping for names to be used
1648          * as file names in the cgroup tree: any name which might
1649          * conflict with a kernel name or is prefixed with '_' is
1650          * prefixed with a '_'. That way, when reading cgroup names it
1651          * is sufficient to remove a single prefixing underscore if
1652          * there is one. */
1653
1654         /* The return value of this function (unlike cg_unescape())
1655          * needs free()! */
1656
1657         if (p[0] == 0 ||
1658             p[0] == '_' ||
1659             p[0] == '.' ||
1660             streq(p, "notify_on_release") ||
1661             streq(p, "release_agent") ||
1662             streq(p, "tasks") ||
1663             startswith(p, "cgroup."))
1664                 need_prefix = true;
1665         else {
1666                 const char *dot;
1667
1668                 dot = strrchr(p, '.');
1669                 if (dot) {
1670                         CGroupController c;
1671                         size_t l = dot - p;
1672
1673                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1674                                 const char *n;
1675
1676                                 n = cgroup_controller_to_string(c);
1677
1678                                 if (l != strlen(n))
1679                                         continue;
1680
1681                                 if (memcmp(p, n, l) != 0)
1682                                         continue;
1683
1684                                 need_prefix = true;
1685                                 break;
1686                         }
1687                 }
1688         }
1689
1690         if (need_prefix)
1691                 return strappend("_", p);
1692
1693         return strdup(p);
1694 }
1695
1696 char *cg_unescape(const char *p) {
1697         assert(p);
1698
1699         /* The return value of this function (unlike cg_escape())
1700          * doesn't need free()! */
1701
1702         if (p[0] == '_')
1703                 return (char*) p+1;
1704
1705         return (char*) p;
1706 }
1707
1708 #define CONTROLLER_VALID                        \
1709         DIGITS LETTERS                          \
1710         "_"
1711
1712 bool cg_controller_is_valid(const char *p) {
1713         const char *t, *s;
1714
1715         if (!p)
1716                 return false;
1717
1718         s = startswith(p, "name=");
1719         if (s)
1720                 p = s;
1721
1722         if (*p == 0 || *p == '_')
1723                 return false;
1724
1725         for (t = p; *t; t++)
1726                 if (!strchr(CONTROLLER_VALID, *t))
1727                         return false;
1728
1729         if (t - p > FILENAME_MAX)
1730                 return false;
1731
1732         return true;
1733 }
1734
1735 int cg_slice_to_path(const char *unit, char **ret) {
1736         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1737         const char *dash;
1738         int r;
1739
1740         assert(unit);
1741         assert(ret);
1742
1743         if (streq(unit, "-.slice")) {
1744                 char *x;
1745
1746                 x = strdup("");
1747                 if (!x)
1748                         return -ENOMEM;
1749                 *ret = x;
1750                 return 0;
1751         }
1752
1753         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1754                 return -EINVAL;
1755
1756         if (!endswith(unit, ".slice"))
1757                 return -EINVAL;
1758
1759         r = unit_name_to_prefix(unit, &p);
1760         if (r < 0)
1761                 return r;
1762
1763         dash = strchr(p, '-');
1764
1765         /* Don't allow initial dashes */
1766         if (dash == p)
1767                 return -EINVAL;
1768
1769         while (dash) {
1770                 _cleanup_free_ char *escaped = NULL;
1771                 char n[dash - p + sizeof(".slice")];
1772
1773                 /* Don't allow trailing or double dashes */
1774                 if (dash[1] == 0 || dash[1] == '-')
1775                         return -EINVAL;
1776
1777                 strcpy(stpncpy(n, p, dash - p), ".slice");
1778                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1779                         return -EINVAL;
1780
1781                 escaped = cg_escape(n);
1782                 if (!escaped)
1783                         return -ENOMEM;
1784
1785                 if (!strextend(&s, escaped, "/", NULL))
1786                         return -ENOMEM;
1787
1788                 dash = strchr(dash+1, '-');
1789         }
1790
1791         e = cg_escape(unit);
1792         if (!e)
1793                 return -ENOMEM;
1794
1795         if (!strextend(&s, e, NULL))
1796                 return -ENOMEM;
1797
1798         *ret = s;
1799         s = NULL;
1800
1801         return 0;
1802 }
1803
1804 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1805         _cleanup_free_ char *p = NULL;
1806         int r;
1807
1808         r = cg_get_path(controller, path, attribute, &p);
1809         if (r < 0)
1810                 return r;
1811
1812         return write_string_file(p, value, 0);
1813 }
1814
1815 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1816         _cleanup_free_ char *p = NULL;
1817         int r;
1818
1819         r = cg_get_path(controller, path, attribute, &p);
1820         if (r < 0)
1821                 return r;
1822
1823         return read_one_line_file(p, ret);
1824 }
1825
1826 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1827         CGroupController c;
1828         int r, unified;
1829
1830         /* This one will create a cgroup in our private tree, but also
1831          * duplicate it in the trees specified in mask, and remove it
1832          * in all others */
1833
1834         /* First create the cgroup in our own hierarchy. */
1835         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1836         if (r < 0)
1837                 return r;
1838
1839         /* If we are in the unified hierarchy, we are done now */
1840         unified = cg_unified();
1841         if (unified < 0)
1842                 return unified;
1843         if (unified > 0)
1844                 return 0;
1845
1846         /* Otherwise, do the same in the other hierarchies */
1847         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1848                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1849                 const char *n;
1850
1851                 n = cgroup_controller_to_string(c);
1852
1853                 if (mask & bit)
1854                         (void) cg_create(n, path);
1855                 else if (supported & bit)
1856                         (void) cg_trim(n, path, true);
1857         }
1858
1859         return 0;
1860 }
1861
1862 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1863         CGroupController c;
1864         int r, unified;
1865
1866         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1867         if (r < 0)
1868                 return r;
1869
1870         unified = cg_unified();
1871         if (unified < 0)
1872                 return unified;
1873         if (unified > 0)
1874                 return 0;
1875
1876         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1877                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1878                 const char *p = NULL;
1879
1880                 if (!(supported & bit))
1881                         continue;
1882
1883                 if (path_callback)
1884                         p = path_callback(bit, userdata);
1885
1886                 if (!p)
1887                         p = path;
1888
1889                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
1890         }
1891
1892         return 0;
1893 }
1894
1895 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1896         Iterator i;
1897         void *pidp;
1898         int r = 0;
1899
1900         SET_FOREACH(pidp, pids, i) {
1901                 pid_t pid = PTR_TO_PID(pidp);
1902                 int q;
1903
1904                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1905                 if (q < 0 && r >= 0)
1906                         r = q;
1907         }
1908
1909         return r;
1910 }
1911
1912 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1913         CGroupController c;
1914         int r = 0, unified;
1915
1916         if (!path_equal(from, to))  {
1917                 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true);
1918                 if (r < 0)
1919                         return r;
1920         }
1921
1922         unified = cg_unified();
1923         if (unified < 0)
1924                 return unified;
1925         if (unified > 0)
1926                 return r;
1927
1928         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1929                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1930                 const char *p = NULL;
1931
1932                 if (!(supported & bit))
1933                         continue;
1934
1935                 if (to_callback)
1936                         p = to_callback(bit, userdata);
1937
1938                 if (!p)
1939                         p = to;
1940
1941                 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, false, false);
1942         }
1943
1944         return 0;
1945 }
1946
1947 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
1948         CGroupController c;
1949         int r, unified;
1950
1951         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
1952         if (r < 0)
1953                 return r;
1954
1955         unified = cg_unified();
1956         if (unified < 0)
1957                 return unified;
1958         if (unified > 0)
1959                 return r;
1960
1961         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1962                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1963
1964                 if (!(supported & bit))
1965                         continue;
1966
1967                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
1968         }
1969
1970         return 0;
1971 }
1972
1973 int cg_mask_supported(CGroupMask *ret) {
1974         CGroupMask mask = 0;
1975         int r, unified;
1976
1977         /* Determines the mask of supported cgroup controllers. Only
1978          * includes controllers we can make sense of and that are
1979          * actually accessible. */
1980
1981         unified = cg_unified();
1982         if (unified < 0)
1983                 return unified;
1984         if (unified > 0) {
1985                 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
1986                 const char *c;
1987
1988                 /* In the unified hierarchy we can read the supported
1989                  * and accessible controllers from a the top-level
1990                  * cgroup attribute */
1991
1992                 r = cg_get_root_path(&root);
1993                 if (r < 0)
1994                         return r;
1995
1996                 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
1997                 if (r < 0)
1998                         return r;
1999
2000                 r = read_one_line_file(path, &controllers);
2001                 if (r < 0)
2002                         return r;
2003
2004                 c = controllers;
2005                 for (;;) {
2006                         _cleanup_free_ char *n = NULL;
2007                         CGroupController v;
2008
2009                         r = extract_first_word(&c, &n, NULL, 0);
2010                         if (r < 0)
2011                                 return r;
2012                         if (r == 0)
2013                                 break;
2014
2015                         v = cgroup_controller_from_string(n);
2016                         if (v < 0)
2017                                 continue;
2018
2019                         mask |= CGROUP_CONTROLLER_TO_MASK(v);
2020                 }
2021
2022                 /* Currently, we only support the memory and pids
2023                  * controller in the unified hierarchy, mask
2024                  * everything else off. */
2025                 mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_PIDS;
2026
2027         } else {
2028                 CGroupController c;
2029
2030                 /* In the legacy hierarchy, we check whether which
2031                  * hierarchies are mounted. */
2032
2033                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2034                         const char *n;
2035
2036                         n = cgroup_controller_to_string(c);
2037                         if (controller_is_accessible(n) >= 0)
2038                                 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2039                 }
2040         }
2041
2042         *ret = mask;
2043         return 0;
2044 }
2045
2046 int cg_kernel_controllers(Set *controllers) {
2047         _cleanup_fclose_ FILE *f = NULL;
2048         char buf[LINE_MAX];
2049         int r;
2050
2051         assert(controllers);
2052
2053         /* Determines the full list of kernel-known controllers. Might
2054          * include controllers we don't actually support, arbitrary
2055          * named hierarchies and controllers that aren't currently
2056          * accessible (because not mounted). */
2057
2058         f = fopen("/proc/cgroups", "re");
2059         if (!f) {
2060                 if (errno == ENOENT)
2061                         return 0;
2062                 return -errno;
2063         }
2064
2065         /* Ignore the header line */
2066         (void) fgets(buf, sizeof(buf), f);
2067
2068         for (;;) {
2069                 char *controller;
2070                 int enabled = 0;
2071
2072                 errno = 0;
2073                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2074
2075                         if (feof(f))
2076                                 break;
2077
2078                         if (ferror(f) && errno != 0)
2079                                 return -errno;
2080
2081                         return -EBADMSG;
2082                 }
2083
2084                 if (!enabled) {
2085                         free(controller);
2086                         continue;
2087                 }
2088
2089                 if (!cg_controller_is_valid(controller)) {
2090                         free(controller);
2091                         return -EBADMSG;
2092                 }
2093
2094                 r = set_consume(controllers, controller);
2095                 if (r < 0)
2096                         return r;
2097         }
2098
2099         return 0;
2100 }
2101
2102 static thread_local int unified_cache = -1;
2103
2104 int cg_unified(void) {
2105         struct statfs fs;
2106
2107         /* Checks if we support the unified hierarchy. Returns an
2108          * error when the cgroup hierarchies aren't mounted yet or we
2109          * have any other trouble determining if the unified hierarchy
2110          * is supported. */
2111
2112         if (unified_cache >= 0)
2113                 return unified_cache;
2114
2115         if (statfs("/sys/fs/cgroup/", &fs) < 0)
2116                 return -errno;
2117
2118         if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC))
2119                 unified_cache = true;
2120         else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2121                 unified_cache = false;
2122         else
2123                 return -ENOEXEC;
2124
2125         return unified_cache;
2126 }
2127
2128 void cg_unified_flush(void) {
2129         unified_cache = -1;
2130 }
2131
2132 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2133         _cleanup_free_ char *fs = NULL;
2134         CGroupController c;
2135         int r, unified;
2136
2137         assert(p);
2138
2139         if (supported == 0)
2140                 return 0;
2141
2142         unified = cg_unified();
2143         if (unified < 0)
2144                 return unified;
2145         if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2146                 return 0;
2147
2148         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2149         if (r < 0)
2150                 return r;
2151
2152         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2153                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2154                 const char *n;
2155
2156                 if (!(supported & bit))
2157                         continue;
2158
2159                 n = cgroup_controller_to_string(c);
2160                 {
2161                         char s[1 + strlen(n) + 1];
2162
2163                         s[0] = mask & bit ? '+' : '-';
2164                         strcpy(s + 1, n);
2165
2166                         r = write_string_file(fs, s, 0);
2167                         if (r < 0)
2168                                 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2169                 }
2170         }
2171
2172         return 0;
2173 }
2174
2175 bool cg_is_unified_wanted(void) {
2176         static thread_local int wanted = -1;
2177         int r, unified;
2178
2179         /* If the hierarchy is already mounted, then follow whatever
2180          * was chosen for it. */
2181         unified = cg_unified();
2182         if (unified >= 0)
2183                 return unified;
2184
2185         /* Otherwise, let's see what the kernel command line has to
2186          * say. Since checking that is expensive, let's cache the
2187          * result. */
2188         if (wanted >= 0)
2189                 return wanted;
2190
2191         r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2192         if (r > 0)
2193                 return (wanted = true);
2194         else {
2195                 _cleanup_free_ char *value = NULL;
2196
2197                 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2198                 if (r < 0)
2199                         return false;
2200                 if (r == 0)
2201                         return (wanted = false);
2202
2203                 return (wanted = parse_boolean(value) > 0);
2204         }
2205 }
2206
2207 bool cg_is_legacy_wanted(void) {
2208         return !cg_is_unified_wanted();
2209 }
2210
2211 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2212         uint64_t u;
2213         int r;
2214
2215         if (isempty(s)) {
2216                 *ret = CGROUP_CPU_SHARES_INVALID;
2217                 return 0;
2218         }
2219
2220         r = safe_atou64(s, &u);
2221         if (r < 0)
2222                 return r;
2223
2224         if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2225                 return -ERANGE;
2226
2227         *ret = u;
2228         return 0;
2229 }
2230
2231 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2232         uint64_t u;
2233         int r;
2234
2235         if (isempty(s)) {
2236                 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2237                 return 0;
2238         }
2239
2240         r = safe_atou64(s, &u);
2241         if (r < 0)
2242                 return r;
2243
2244         if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2245                 return -ERANGE;
2246
2247         *ret = u;
2248         return 0;
2249 }
2250
2251 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2252         [CGROUP_CONTROLLER_CPU] = "cpu",
2253         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2254         [CGROUP_CONTROLLER_BLKIO] = "blkio",
2255         [CGROUP_CONTROLLER_MEMORY] = "memory",
2256         [CGROUP_CONTROLLER_DEVICES] = "devices",
2257         [CGROUP_CONTROLLER_PIDS] = "pids",
2258         [CGROUP_CONTROLLER_NET_CLS] = "net_cls",
2259 };
2260
2261 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);