src/shared/cgroup-setup.c

   1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
   2
   3 #include <ftw.h>
   4 #include <unistd.h>
   5
   6 #include "cgroup-setup.h"
   7 #include "cgroup-util.h"
   8 #include "errno-util.h"
   9 #include "parse-util.h"
  10 #include "path-util.h"
  11 #include "proc-cmdline.h"
  12 #include "stdio-util.h"
  13 #include "string-util.h"
  14 #include "fs-util.h"
  15 #include "mkdir.h"
  16 #include "process-util.h"
  17 #include "fileio.h"
  18 #include "user-util.h"
  19 #include "fd-util.h"
  20
  21 bool cg_is_unified_wanted(void) {
  22         static thread_local int wanted = -1;
  23         bool b;
  24         const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
  25         _cleanup_free_ char *c = NULL;
  26         int r;
  27
  28         /* If we have a cached value, return that. */
  29         if (wanted >= 0)
  30                 return wanted;
  31
  32         /* If the hierarchy is already mounted, then follow whatever was chosen for it. */
  33         r = cg_unified_cached(true);
  34         if (r >= 0)
  35                 return (wanted = r >= CGROUP_UNIFIED_ALL);
  36
  37         /* If we were explicitly passed systemd.unified_cgroup_hierarchy, respect that. */
  38         r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
  39         if (r > 0)
  40                 return (wanted = b);
  41
  42         /* If we passed cgroup_no_v1=all with no other instructions, it seems highly unlikely that we want to
  43          * use hybrid or legacy hierarchy. */
  44         r = proc_cmdline_get_key("cgroup_no_v1", 0, &c);
  45         if (r > 0 && streq_ptr(c, "all"))
  46                 return (wanted = true);
  47
  48         return (wanted = is_default);
  49 }
  50
  51 bool cg_is_legacy_wanted(void) {
  52         static thread_local int wanted = -1;
  53
  54         /* If we have a cached value, return that. */
  55         if (wanted >= 0)
  56                 return wanted;
  57
  58         /* Check if we have cgroup v2 already mounted. */
  59         if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL)
  60                 return (wanted = false);
  61
  62         /* Otherwise, assume that at least partial legacy is wanted,
  63          * since cgroup v2 should already be mounted at this point. */
  64         return (wanted = true);
  65 }
  66
  67 bool cg_is_hybrid_wanted(void) {
  68         static thread_local int wanted = -1;
  69         int r;
  70         bool b;
  71         const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
  72         /* We default to true if the default is "hybrid", obviously, but also when the default is "unified",
  73          * because if we get called, it means that unified hierarchy was not mounted. */
  74
  75         /* If we have a cached value, return that. */
  76         if (wanted >= 0)
  77                 return wanted;
  78
  79         /* If the hierarchy is already mounted, then follow whatever was chosen for it. */
  80         if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL)
  81                 return (wanted = false);
  82
  83         /* Otherwise, let's see what the kernel command line has to say.  Since checking is expensive, cache
  84          * a non-error result. */
  85         r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
  86
  87         /* The meaning of the kernel option is reversed wrt. to the return value of this function, hence the
  88          * negation. */
  89         return (wanted = r > 0 ? !b : is_default);
  90 }
  91
  92 int cg_weight_parse(const char *s, uint64_t *ret) {
  93         uint64_t u;
  94         int r;
  95
  96         if (isempty(s)) {
  97                 *ret = CGROUP_WEIGHT_INVALID;
  98                 return 0;
  99         }
 100
 101         r = safe_atou64(s, &u);
 102         if (r < 0)
 103                 return r;
 104
 105         if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
 106                 return -ERANGE;
 107
 108         *ret = u;
 109         return 0;
 110 }
 111
 112 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
 113         uint64_t u;
 114         int r;
 115
 116         if (isempty(s)) {
 117                 *ret = CGROUP_CPU_SHARES_INVALID;
 118                 return 0;
 119         }
 120
 121         r = safe_atou64(s, &u);
 122         if (r < 0)
 123                 return r;
 124
 125         if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
 126                 return -ERANGE;
 127
 128         *ret = u;
 129         return 0;
 130 }
 131
 132 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
 133         uint64_t u;
 134         int r;
 135
 136         if (isempty(s)) {
 137                 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
 138                 return 0;
 139         }
 140
 141         r = safe_atou64(s, &u);
 142         if (r < 0)
 143                 return r;
 144
 145         if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
 146                 return -ERANGE;
 147
 148         *ret = u;
 149         return 0;
 150 }
 151
 152 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 153         assert(path);
 154         assert(sb);
 155         assert(ftwbuf);
 156
 157         if (typeflag != FTW_DP)
 158                 return 0;
 159
 160         if (ftwbuf->level < 1)
 161                 return 0;
 162
 163         (void) rmdir(path);
 164         return 0;
 165 }
 166
 167 int cg_trim(const char *controller, const char *path, bool delete_root) {
 168         _cleanup_free_ char *fs = NULL;
 169         int r, q;
 170
 171         assert(path);
 172
 173         r = cg_get_path(controller, path, NULL, &fs);
 174         if (r < 0)
 175                 return r;
 176
 177         errno = 0;
 178         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 179                 if (errno == ENOENT)
 180                         r = 0;
 181                 else
 182                         r = errno_or_else(EIO);
 183         }
 184
 185         if (delete_root) {
 186                 if (rmdir(fs) < 0 && errno != ENOENT)
 187                         return -errno;
 188         }
 189
 190         q = cg_hybrid_unified();
 191         if (q < 0)
 192                 return q;
 193         if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 194                 q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
 195                 if (q < 0)
 196                         log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
 197         }
 198
 199         return r;
 200 }
 201
 202 /* Create a cgroup in the hierarchy of controller.
 203  * Returns 0 if the group already existed, 1 on success, negative otherwise.
 204  */
 205 int cg_create(const char *controller, const char *path) {
 206         _cleanup_free_ char *fs = NULL;
 207         int r;
 208
 209         r = cg_get_path_and_check(controller, path, NULL, &fs);
 210         if (r < 0)
 211                 return r;
 212
 213         r = mkdir_parents(fs, 0755);
 214         if (r < 0)
 215                 return r;
 216
 217         r = mkdir_errno_wrapper(fs, 0755);
 218         if (r == -EEXIST)
 219                 return 0;
 220         if (r < 0)
 221                 return r;
 222
 223         r = cg_hybrid_unified();
 224         if (r < 0)
 225                 return r;
 226
 227         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 228                 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
 229                 if (r < 0)
 230                         log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
 231         }
 232
 233         return 1;
 234 }
 235
 236 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 237         int r, q;
 238
 239         assert(pid >= 0);
 240
 241         r = cg_create(controller, path);
 242         if (r < 0)
 243                 return r;
 244
 245         q = cg_attach(controller, path, pid);
 246         if (q < 0)
 247                 return q;
 248
 249         /* This does not remove the cgroup on failure */
 250         return r;
 251 }
 252
 253 int cg_attach(const char *controller, const char *path, pid_t pid) {
 254         _cleanup_free_ char *fs = NULL;
 255         char c[DECIMAL_STR_MAX(pid_t) + 2];
 256         int r;
 257
 258         assert(path);
 259         assert(pid >= 0);
 260
 261         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 262         if (r < 0)
 263                 return r;
 264
 265         if (pid == 0)
 266                 pid = getpid_cached();
 267
 268         xsprintf(c, PID_FMT "\n", pid);
 269
 270         r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
 271         if (r < 0)
 272                 return r;
 273
 274         r = cg_hybrid_unified();
 275         if (r < 0)
 276                 return r;
 277
 278         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 279                 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
 280                 if (r < 0)
 281                         log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
 282         }
 283
 284         return 0;
 285 }
 286
 287 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 288         int r;
 289
 290         assert(controller);
 291         assert(path);
 292         assert(pid >= 0);
 293
 294         r = cg_attach(controller, path, pid);
 295         if (r < 0) {
 296                 char prefix[strlen(path) + 1];
 297
 298                 /* This didn't work? Then let's try all prefixes of
 299                  * the destination */
 300
 301                 PATH_FOREACH_PREFIX(prefix, path) {
 302                         int q;
 303
 304                         q = cg_attach(controller, prefix, pid);
 305                         if (q >= 0)
 306                                 return q;
 307                 }
 308         }
 309
 310         return r;
 311 }
 312
 313 int cg_set_access(
 314                 const char *controller,
 315                 const char *path,
 316                 uid_t uid,
 317                 gid_t gid) {
 318
 319         struct Attribute {
 320                 const char *name;
 321                 bool fatal;
 322         };
 323
 324         /* cgroup v1, aka legacy/non-unified */
 325         static const struct Attribute legacy_attributes[] = {
 326                 { "cgroup.procs",           true  },
 327                 { "tasks",                  false },
 328                 { "cgroup.clone_children",  false },
 329                 {},
 330         };
 331
 332         /* cgroup v2, aka unified */
 333         static const struct Attribute unified_attributes[] = {
 334                 { "cgroup.procs",           true  },
 335                 { "cgroup.subtree_control", true  },
 336                 { "cgroup.threads",         false },
 337                 {},
 338         };
 339
 340         static const struct Attribute* const attributes[] = {
 341                 [false] = legacy_attributes,
 342                 [true]  = unified_attributes,
 343         };
 344
 345         _cleanup_free_ char *fs = NULL;
 346         const struct Attribute *i;
 347         int r, unified;
 348
 349         assert(path);
 350
 351         if (uid == UID_INVALID && gid == GID_INVALID)
 352                 return 0;
 353
 354         unified = cg_unified_controller(controller);
 355         if (unified < 0)
 356                 return unified;
 357
 358         /* Configure access to the cgroup itself */
 359         r = cg_get_path(controller, path, NULL, &fs);
 360         if (r < 0)
 361                 return r;
 362
 363         r = chmod_and_chown(fs, 0755, uid, gid);
 364         if (r < 0)
 365                 return r;
 366
 367         /* Configure access to the cgroup's attributes */
 368         for (i = attributes[unified]; i->name; i++) {
 369                 fs = mfree(fs);
 370
 371                 r = cg_get_path(controller, path, i->name, &fs);
 372                 if (r < 0)
 373                         return r;
 374
 375                 r = chmod_and_chown(fs, 0644, uid, gid);
 376                 if (r < 0) {
 377                         if (i->fatal)
 378                                 return r;
 379
 380                         log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
 381                 }
 382         }
 383
 384         if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
 385                 r = cg_hybrid_unified();
 386                 if (r < 0)
 387                         return r;
 388                 if (r > 0) {
 389                         /* Always propagate access mode from unified to legacy controller */
 390                         r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
 391                         if (r < 0)
 392                                 log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
 393                 }
 394         }
 395
 396         return 0;
 397 }
 398
 399 int cg_migrate(
 400                 const char *cfrom,
 401                 const char *pfrom,
 402                 const char *cto,
 403                 const char *pto,
 404                 CGroupFlags flags) {
 405
 406         bool done = false;
 407         _cleanup_set_free_ Set *s = NULL;
 408         int r, ret = 0;
 409         pid_t my_pid;
 410
 411         assert(cfrom);
 412         assert(pfrom);
 413         assert(cto);
 414         assert(pto);
 415
 416         s = set_new(NULL);
 417         if (!s)
 418                 return -ENOMEM;
 419
 420         my_pid = getpid_cached();
 421
 422         do {
 423                 _cleanup_fclose_ FILE *f = NULL;
 424                 pid_t pid = 0;
 425                 done = true;
 426
 427                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 428                 if (r < 0) {
 429                         if (ret >= 0 && r != -ENOENT)
 430                                 return r;
 431
 432                         return ret;
 433                 }
 434
 435                 while ((r = cg_read_pid(f, &pid)) > 0) {
 436
 437                         /* This might do weird stuff if we aren't a
 438                          * single-threaded program. However, we
 439                          * luckily know we are not */
 440                         if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
 441                                 continue;
 442
 443                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 444                                 continue;
 445
 446                         /* Ignore kernel threads. Since they can only
 447                          * exist in the root cgroup, we only check for
 448                          * them there. */
 449                         if (cfrom &&
 450                             empty_or_root(pfrom) &&
 451                             is_kernel_thread(pid) > 0)
 452                                 continue;
 453
 454                         r = cg_attach(cto, pto, pid);
 455                         if (r < 0) {
 456                                 if (ret >= 0 && r != -ESRCH)
 457                                         ret = r;
 458                         } else if (ret == 0)
 459                                 ret = 1;
 460
 461                         done = false;
 462
 463                         r = set_put(s, PID_TO_PTR(pid));
 464                         if (r < 0) {
 465                                 if (ret >= 0)
 466                                         return r;
 467
 468                                 return ret;
 469                         }
 470                 }
 471
 472                 if (r < 0) {
 473                         if (ret >= 0)
 474                                 return r;
 475
 476                         return ret;
 477                 }
 478         } while (!done);
 479
 480         return ret;
 481 }
 482
 483 int cg_migrate_recursive(
 484                 const char *cfrom,
 485                 const char *pfrom,
 486                 const char *cto,
 487                 const char *pto,
 488                 CGroupFlags flags) {
 489
 490         _cleanup_closedir_ DIR *d = NULL;
 491         int r, ret = 0;
 492         char *fn;
 493
 494         assert(cfrom);
 495         assert(pfrom);
 496         assert(cto);
 497         assert(pto);
 498
 499         ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
 500
 501         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 502         if (r < 0) {
 503                 if (ret >= 0 && r != -ENOENT)
 504                         return r;
 505
 506                 return ret;
 507         }
 508
 509         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 510                 _cleanup_free_ char *p = NULL;
 511
 512                 p = path_join(empty_to_root(pfrom), fn);
 513                 free(fn);
 514                 if (!p)
 515                         return -ENOMEM;
 516
 517                 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
 518                 if (r != 0 && ret >= 0)
 519                         ret = r;
 520         }
 521
 522         if (r < 0 && ret >= 0)
 523                 ret = r;
 524
 525         if (flags & CGROUP_REMOVE) {
 526                 r = cg_rmdir(cfrom, pfrom);
 527                 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
 528                         return r;
 529         }
 530
 531         return ret;
 532 }
 533
 534 int cg_migrate_recursive_fallback(
 535                 const char *cfrom,
 536                 const char *pfrom,
 537                 const char *cto,
 538                 const char *pto,
 539                 CGroupFlags flags) {
 540
 541         int r;
 542
 543         assert(cfrom);
 544         assert(pfrom);
 545         assert(cto);
 546         assert(pto);
 547
 548         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
 549         if (r < 0) {
 550                 char prefix[strlen(pto) + 1];
 551
 552                 /* This didn't work? Then let's try all prefixes of the destination */
 553
 554                 PATH_FOREACH_PREFIX(prefix, pto) {
 555                         int q;
 556
 557                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
 558                         if (q >= 0)
 559                                 return q;
 560                 }
 561         }
 562
 563         return r;
 564 }
 565
 566 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
 567         CGroupController c;
 568         CGroupMask done;
 569         bool created;
 570         int r;
 571
 572         /* This one will create a cgroup in our private tree, but also
 573          * duplicate it in the trees specified in mask, and remove it
 574          * in all others.
 575          *
 576          * Returns 0 if the group already existed in the systemd hierarchy,
 577          * 1 on success, negative otherwise.
 578          */
 579
 580         /* First create the cgroup in our own hierarchy. */
 581         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
 582         if (r < 0)
 583                 return r;
 584         created = r;
 585
 586         /* If we are in the unified hierarchy, we are done now */
 587         r = cg_all_unified();
 588         if (r < 0)
 589                 return r;
 590         if (r > 0)
 591                 return created;
 592
 593         supported &= CGROUP_MASK_V1;
 594         mask = CGROUP_MASK_EXTEND_JOINED(mask);
 595         done = 0;
 596
 597         /* Otherwise, do the same in the other hierarchies */
 598         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
 599                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
 600                 const char *n;
 601
 602                 if (!FLAGS_SET(supported, bit))
 603                         continue;
 604
 605                 if (FLAGS_SET(done, bit))
 606                         continue;
 607
 608                 n = cgroup_controller_to_string(c);
 609                 if (FLAGS_SET(mask, bit))
 610                         (void) cg_create(n, path);
 611
 612                 done |= CGROUP_MASK_EXTEND_JOINED(bit);
 613         }
 614
 615         return created;
 616 }
 617
 618 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
 619         int r;
 620
 621         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
 622         if (r < 0)
 623                 return r;
 624
 625         r = cg_all_unified();
 626         if (r < 0)
 627                 return r;
 628         if (r > 0)
 629                 return 0;
 630
 631         supported &= CGROUP_MASK_V1;
 632         CGroupMask done = 0;
 633
 634         for (CGroupController c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
 635                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
 636                 const char *p = NULL;
 637
 638                 if (!FLAGS_SET(supported, bit))
 639                         continue;
 640
 641                 if (FLAGS_SET(done, bit))
 642                         continue;
 643
 644                 if (path_callback)
 645                         p = path_callback(bit, userdata);
 646                 if (!p)
 647                         p = path;
 648
 649                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
 650                 done |= CGROUP_MASK_EXTEND_JOINED(bit);
 651         }
 652
 653         return 0;
 654 }
 655
 656 int cg_migrate_v1_controllers(CGroupMask supported, CGroupMask mask, const char *from, cg_migrate_callback_t to_callback, void *userdata) {
 657         CGroupController c;
 658         CGroupMask done;
 659         int r = 0, q;
 660
 661         assert(to_callback);
 662
 663         supported &= CGROUP_MASK_V1;
 664         mask = CGROUP_MASK_EXTEND_JOINED(mask);
 665         done = 0;
 666
 667         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
 668                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
 669                 const char *to = NULL;
 670
 671                 if (!FLAGS_SET(supported, bit))
 672                         continue;
 673
 674                 if (FLAGS_SET(done, bit))
 675                         continue;
 676
 677                 if (!FLAGS_SET(mask, bit))
 678                         continue;
 679
 680                 to = to_callback(bit, userdata);
 681
 682                 /* Remember first error and try continuing */
 683                 q = cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, from, cgroup_controller_to_string(c), to, 0);
 684                 r = (r < 0) ? r : q;
 685         }
 686
 687         return r;
 688 }
 689
 690 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
 691         int r, q;
 692
 693         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
 694         if (r < 0)
 695                 return r;
 696
 697         q = cg_all_unified();
 698         if (q < 0)
 699                 return q;
 700         if (q > 0)
 701                 return r;
 702
 703         return cg_trim_v1_controllers(supported, _CGROUP_MASK_ALL, path, delete_root);
 704 }
 705
 706 int cg_trim_v1_controllers(CGroupMask supported, CGroupMask mask, const char *path, bool delete_root) {
 707         CGroupController c;
 708         CGroupMask done;
 709         int r = 0, q;
 710
 711         supported &= CGROUP_MASK_V1;
 712         mask = CGROUP_MASK_EXTEND_JOINED(mask);
 713         done = 0;
 714
 715         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
 716                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
 717
 718                 if (!FLAGS_SET(supported, bit))
 719                         continue;
 720
 721                 if (FLAGS_SET(done, bit))
 722                         continue;
 723
 724                 if (FLAGS_SET(mask, bit)) {
 725                         /* Remember first error and try continuing */
 726                         q = cg_trim(cgroup_controller_to_string(c), path, delete_root);
 727                         r = (r < 0) ? r : q;
 728                 }
 729                 done |= CGROUP_MASK_EXTEND_JOINED(bit);
 730         }
 731
 732         return r;
 733 }
 734
 735 int cg_enable_everywhere(
 736                 CGroupMask supported,
 737                 CGroupMask mask,
 738                 const char *p,
 739                 CGroupMask *ret_result_mask) {
 740
 741         _cleanup_fclose_ FILE *f = NULL;
 742         _cleanup_free_ char *fs = NULL;
 743         CGroupController c;
 744         CGroupMask ret = 0;
 745         int r;
 746
 747         assert(p);
 748
 749         if (supported == 0) {
 750                 if (ret_result_mask)
 751                         *ret_result_mask = 0;
 752                 return 0;
 753         }
 754
 755         r = cg_all_unified();
 756         if (r < 0)
 757                 return r;
 758         if (r == 0) {
 759                 /* On the legacy hierarchy there's no concept of "enabling" controllers in cgroups defined. Let's claim
 760                  * complete success right away. (If you wonder why we return the full mask here, rather than zero: the
 761                  * caller tends to use the returned mask later on to compare if all controllers where properly joined,
 762                  * and if not requeues realization. This use is the primary purpose of the return value, hence let's
 763                  * minimize surprises here and reduce triggers for re-realization by always saying we fully
 764                  * succeeded.) */
 765                 if (ret_result_mask)
 766                         *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with
 767                                                                                * CGROUP_MASK_V2: The 'supported' mask
 768                                                                                * might contain pure-V1 or BPF
 769                                                                                * controllers, and we never want to
 770                                                                                * claim that we could enable those with
 771                                                                                * cgroup.subtree_control */
 772                 return 0;
 773         }
 774
 775         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
 776         if (r < 0)
 777                 return r;
 778
 779         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
 780                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
 781                 const char *n;
 782
 783                 if (!FLAGS_SET(CGROUP_MASK_V2, bit))
 784                         continue;
 785
 786                 if (!FLAGS_SET(supported, bit))
 787                         continue;
 788
 789                 n = cgroup_controller_to_string(c);
 790                 {
 791                         char s[1 + strlen(n) + 1];
 792
 793                         s[0] = FLAGS_SET(mask, bit) ? '+' : '-';
 794                         strcpy(s + 1, n);
 795
 796                         if (!f) {
 797                                 f = fopen(fs, "we");
 798                                 if (!f)
 799                                         return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
 800                         }
 801
 802                         r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
 803                         if (r < 0) {
 804                                 log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
 805                                                 FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
 806                                 clearerr(f);
 807
 808                                 /* If we can't turn off a controller, leave it on in the reported resulting mask. This
 809                                  * happens for example when we attempt to turn off a controller up in the tree that is
 810                                  * used down in the tree. */
 811                                 if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
 812                                                                            * only here, and not follow the same logic
 813                                                                            * for other errors such as EINVAL or
 814                                                                            * EOPNOTSUPP or anything else. That's
 815                                                                            * because EBUSY indicates that the
 816                                                                            * controllers is currently enabled and
 817                                                                            * cannot be disabled because something down
 818                                                                            * the hierarchy is still using it. Any other
 819                                                                            * error most likely means something like "I
 820                                                                            * never heard of this controller" or
 821                                                                            * similar. In the former case it's hence
 822                                                                            * safe to assume the controller is still on
 823                                                                            * after the failed operation, while in the
 824                                                                            * latter case it's safer to assume the
 825                                                                            * controller is unknown and hence certainly
 826                                                                            * not enabled. */
 827                                         ret |= bit;
 828                         } else {
 829                                 /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
 830                                 if (FLAGS_SET(mask, bit))
 831                                         ret |= bit;
 832                         }
 833                 }
 834         }
 835
 836         /* Let's return the precise set of controllers now enabled for the cgroup. */
 837         if (ret_result_mask)
 838                 *ret_result_mask = ret;
 839
 840         return 0;
 841 }