From: Tejun Heo Date: Sat, 14 Jun 2025 01:23:30 +0000 (-1000) Subject: sched/core: Reorganize cgroup bandwidth control interface file writes X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5bc34be478d09c4d16009e665e020ad0fcd0deea;p=thirdparty%2Flinux.git sched/core: Reorganize cgroup bandwidth control interface file writes - Move input parameter validation from tg_set_cfs_bandwidth() to the new outer function tg_set_bandwidth(). The outer function handles parameters in usecs, validates them and calls tg_set_cfs_bandwidth() which converts them into nsecs. This matches tg_bandwidth() on the read side. - max/min_cfs_* consts are now used by tg_set_bandwidth(). Relocate, convert into usecs and drop "cfs" from the names. - Reimplement cpu_cfs_{period|quote|burst}_write_*() using tg_bandwidth() and tg_set_bandwidth() and replace "cfs" in the names with "bw". - Update cpu_max_write() to use tg_set_bandiwdth(). cpu_period_quota_parse() is updated to drop nsec conversion accordingly. This aligns the behavior with cfs_period_quota_print(). - Drop now unused tg_set_cfs_{period|quota|burst}(). - While at it, for consistency, rename default_cfs_period() to default_bw_period_us() and make it return usecs. This is to prepare for adding bandwidth control support to sched_ext. tg_set_bandwidth() will be used as the muxing point. No functional changes intended. Signed-off-by: Tejun Heo Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250614012346.2358261-5-tj@kernel.org --- diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8de93a3bfa276..2f8caa9db78d5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9309,47 +9309,23 @@ static u64 cpu_shares_read_u64(struct cgroup_subsys_state *css, #ifdef CONFIG_CFS_BANDWIDTH static DEFINE_MUTEX(cfs_constraints_mutex); -const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */ -static const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */ -/* More than 203 days if BW_SHIFT equals 20. */ -static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC; - static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime); -static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota, - u64 burst) +static int tg_set_cfs_bandwidth(struct task_group *tg, + u64 period_us, u64 quota_us, u64 burst_us) { int i, ret = 0, runtime_enabled, runtime_was_enabled; struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; + u64 period, quota, burst; - if (tg == &root_task_group) - return -EINVAL; - - /* - * Ensure we have at some amount of bandwidth every period. This is - * to prevent reaching a state of large arrears when throttled via - * entity_tick() resulting in prolonged exit starvation. - */ - if (quota < min_cfs_quota_period || period < min_cfs_quota_period) - return -EINVAL; + period = (u64)period_us * NSEC_PER_USEC; - /* - * Likewise, bound things on the other side by preventing insane quota - * periods. This also allows us to normalize in computing quota - * feasibility. - */ - if (period > max_cfs_quota_period) - return -EINVAL; - - /* - * Bound quota to defend quota against overflow during bandwidth shift. - */ - if (quota != RUNTIME_INF && quota > max_cfs_runtime) - return -EINVAL; + if (quota_us == RUNTIME_INF) + quota = RUNTIME_INF; + else + quota = (u64)quota_us * NSEC_PER_USEC; - if (quota != RUNTIME_INF && (burst > quota || - burst + quota > max_cfs_runtime)) - return -EINVAL; + burst = (u64)burst_us * NSEC_PER_USEC; /* * Prevent race between setting of cfs_rq->runtime_enabled and @@ -9437,50 +9413,6 @@ static u64 tg_get_cfs_burst(struct task_group *tg) return burst_us; } -static int tg_set_cfs_period(struct task_group *tg, long cfs_period_us) -{ - u64 quota, period, burst; - - if ((u64)cfs_period_us > U64_MAX / NSEC_PER_USEC) - return -EINVAL; - - period = (u64)cfs_period_us * NSEC_PER_USEC; - quota = tg->cfs_bandwidth.quota; - burst = tg->cfs_bandwidth.burst; - - return tg_set_cfs_bandwidth(tg, period, quota, burst); -} - -static int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us) -{ - u64 quota, period, burst; - - period = ktime_to_ns(tg->cfs_bandwidth.period); - burst = tg->cfs_bandwidth.burst; - if (cfs_quota_us < 0) - quota = RUNTIME_INF; - else if ((u64)cfs_quota_us <= U64_MAX / NSEC_PER_USEC) - quota = (u64)cfs_quota_us * NSEC_PER_USEC; - else - return -EINVAL; - - return tg_set_cfs_bandwidth(tg, period, quota, burst); -} - -static int tg_set_cfs_burst(struct task_group *tg, long cfs_burst_us) -{ - u64 quota, period, burst; - - if ((u64)cfs_burst_us > U64_MAX / NSEC_PER_USEC) - return -EINVAL; - - burst = (u64)cfs_burst_us * NSEC_PER_USEC; - period = ktime_to_ns(tg->cfs_bandwidth.period); - quota = tg->cfs_bandwidth.quota; - - return tg_set_cfs_bandwidth(tg, period, quota, burst); -} - struct cfs_schedulable_data { struct task_group *tg; u64 period, quota; @@ -9614,6 +9546,11 @@ static int cpu_cfs_local_stat_show(struct seq_file *sf, void *v) return 0; } +const u64 max_bw_quota_period_us = 1 * USEC_PER_SEC; /* 1s */ +static const u64 min_bw_quota_period_us = 1 * USEC_PER_MSEC; /* 1ms */ +/* More than 203 days if BW_SHIFT equals 20. */ +static const u64 max_bw_runtime_us = MAX_BW; + static void tg_bandwidth(struct task_group *tg, u64 *period_us_p, u64 *quota_us_p, u64 *burst_us_p) { @@ -9634,6 +9571,50 @@ static u64 cpu_period_read_u64(struct cgroup_subsys_state *css, return period_us; } +static int tg_set_bandwidth(struct task_group *tg, + u64 period_us, u64 quota_us, u64 burst_us) +{ + const u64 max_usec = U64_MAX / NSEC_PER_USEC; + + if (tg == &root_task_group) + return -EINVAL; + + /* Values should survive translation to nsec */ + if (period_us > max_usec || + (quota_us != RUNTIME_INF && quota_us > max_usec) || + burst_us > max_usec) + return -EINVAL; + + /* + * Ensure we have some amount of bandwidth every period. This is to + * prevent reaching a state of large arrears when throttled via + * entity_tick() resulting in prolonged exit starvation. + */ + if (quota_us < min_bw_quota_period_us || + period_us < min_bw_quota_period_us) + return -EINVAL; + + /* + * Likewise, bound things on the other side by preventing insane quota + * periods. This also allows us to normalize in computing quota + * feasibility. + */ + if (period_us > max_bw_quota_period_us) + return -EINVAL; + + /* + * Bound quota to defend quota against overflow during bandwidth shift. + */ + if (quota_us != RUNTIME_INF && quota_us > max_bw_runtime_us) + return -EINVAL; + + if (quota_us != RUNTIME_INF && (burst_us > quota_us || + burst_us + quota_us > max_bw_runtime_us)) + return -EINVAL; + + return tg_set_cfs_bandwidth(tg, period_us, quota_us, burst_us); +} + static s64 cpu_quota_read_s64(struct cgroup_subsys_state *css, struct cftype *cft) { @@ -9652,22 +9633,37 @@ static u64 cpu_burst_read_u64(struct cgroup_subsys_state *css, return burst_us; } -static int cpu_cfs_period_write_u64(struct cgroup_subsys_state *css, - struct cftype *cftype, u64 cfs_period_us) +static int cpu_period_write_u64(struct cgroup_subsys_state *css, + struct cftype *cftype, u64 period_us) { - return tg_set_cfs_period(css_tg(css), cfs_period_us); + struct task_group *tg = css_tg(css); + u64 quota_us, burst_us; + + tg_bandwidth(tg, NULL, "a_us, &burst_us); + return tg_set_bandwidth(tg, period_us, quota_us, burst_us); } -static int cpu_cfs_quota_write_s64(struct cgroup_subsys_state *css, - struct cftype *cftype, s64 cfs_quota_us) +static int cpu_quota_write_s64(struct cgroup_subsys_state *css, + struct cftype *cftype, s64 quota_us) { - return tg_set_cfs_quota(css_tg(css), cfs_quota_us); + struct task_group *tg = css_tg(css); + u64 period_us, burst_us; + + if (quota_us < 0) + quota_us = RUNTIME_INF; + + tg_bandwidth(tg, &period_us, NULL, &burst_us); + return tg_set_bandwidth(tg, period_us, quota_us, burst_us); } -static int cpu_cfs_burst_write_u64(struct cgroup_subsys_state *css, - struct cftype *cftype, u64 cfs_burst_us) +static int cpu_burst_write_u64(struct cgroup_subsys_state *css, + struct cftype *cftype, u64 burst_us) { - return tg_set_cfs_burst(css_tg(css), cfs_burst_us); + struct task_group *tg = css_tg(css); + u64 period_us, quota_us; + + tg_bandwidth(tg, &period_us, "a_us, NULL); + return tg_set_bandwidth(tg, period_us, quota_us, burst_us); } #endif /* CONFIG_CFS_BANDWIDTH */ @@ -9733,17 +9729,17 @@ static struct cftype cpu_legacy_files[] = { { .name = "cfs_period_us", .read_u64 = cpu_period_read_u64, - .write_u64 = cpu_cfs_period_write_u64, + .write_u64 = cpu_period_write_u64, }, { .name = "cfs_quota_us", .read_s64 = cpu_quota_read_s64, - .write_s64 = cpu_cfs_quota_write_s64, + .write_s64 = cpu_quota_write_s64, }, { .name = "cfs_burst_us", .read_u64 = cpu_burst_read_u64, - .write_u64 = cpu_cfs_burst_write_u64, + .write_u64 = cpu_burst_write_u64, }, { .name = "stat", @@ -9940,22 +9936,20 @@ static void __maybe_unused cpu_period_quota_print(struct seq_file *sf, } /* caller should put the current value in *@periodp before calling */ -static int __maybe_unused cpu_period_quota_parse(char *buf, - u64 *periodp, u64 *quotap) +static int __maybe_unused cpu_period_quota_parse(char *buf, u64 *period_us_p, + u64 *quota_us_p) { char tok[21]; /* U64_MAX */ - if (sscanf(buf, "%20s %llu", tok, periodp) < 1) + if (sscanf(buf, "%20s %llu", tok, period_us_p) < 1) return -EINVAL; - *periodp *= NSEC_PER_USEC; - - if (sscanf(tok, "%llu", quotap)) - *quotap *= NSEC_PER_USEC; - else if (!strcmp(tok, "max")) - *quotap = RUNTIME_INF; - else - return -EINVAL; + if (sscanf(tok, "%llu", quota_us_p) < 1) { + if (!strcmp(tok, "max")) + *quota_us_p = RUNTIME_INF; + else + return -EINVAL; + } return 0; } @@ -9975,14 +9969,13 @@ static ssize_t cpu_max_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct task_group *tg = css_tg(of_css(of)); - u64 period = tg_get_cfs_period(tg); - u64 burst = tg->cfs_bandwidth.burst; - u64 quota; + u64 period_us, quota_us, burst_us; int ret; - ret = cpu_period_quota_parse(buf, &period, "a); + tg_bandwidth(tg, &period_us, NULL, &burst_us); + ret = cpu_period_quota_parse(buf, &period_us, "a_us); if (!ret) - ret = tg_set_cfs_bandwidth(tg, period, quota, burst); + ret = tg_set_bandwidth(tg, period_us, quota_us, burst_us); return ret ?: nbytes; } #endif /* CONFIG_CFS_BANDWIDTH */ @@ -10019,7 +10012,7 @@ static struct cftype cpu_files[] = { .name = "max.burst", .flags = CFTYPE_NOT_ON_ROOT, .read_u64 = cpu_burst_read_u64, - .write_u64 = cpu_cfs_burst_write_u64, + .write_u64 = cpu_burst_write_u64, }, #endif /* CONFIG_CFS_BANDWIDTH */ #ifdef CONFIG_UCLAMP_TASK_GROUP diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 707be4570430f..7e2963efe800d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6422,7 +6422,7 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) * to fail. */ new = old * 2; - if (new < max_cfs_quota_period) { + if (new < max_bw_quota_period_us * NSEC_PER_USEC) { cfs_b->period = ns_to_ktime(new); cfs_b->quota *= 2; cfs_b->burst *= 2; @@ -6456,7 +6456,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth *paren raw_spin_lock_init(&cfs_b->lock); cfs_b->runtime = 0; cfs_b->quota = RUNTIME_INF; - cfs_b->period = ns_to_ktime(default_cfs_period()); + cfs_b->period = us_to_ktime(default_bw_period_us()); cfs_b->burst = 0; cfs_b->hierarchical_quota = parent ? parent->hierarchical_quota : RUNTIME_INF; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e00b80cd998e9..105190b180203 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -403,15 +403,15 @@ static inline bool dl_server_active(struct sched_dl_entity *dl_se) extern struct list_head task_groups; #ifdef CONFIG_CFS_BANDWIDTH -extern const u64 max_cfs_quota_period; +extern const u64 max_bw_quota_period_us; /* - * default period for cfs group bandwidth. - * default: 0.1s, units: nanoseconds + * default period for group bandwidth. + * default: 0.1s, units: microseconds */ -static inline u64 default_cfs_period(void) +static inline u64 default_bw_period_us(void) { - return 100000000ULL; + return 100000ULL; } #endif /* CONFIG_CFS_BANDWIDTH */