#ifdef CONFIG_EXT_GROUP_SCHED
u32 flags; /* SCX_TG_* */
u32 weight;
+ u64 bw_period_us;
+ u64 bw_quota_us;
+ u64 bw_burst_us;
#endif
};
config GROUP_SCHED_WEIGHT
def_bool n
+config GROUP_SCHED_BANDWIDTH
+ def_bool n
+
config FAIR_GROUP_SCHED
bool "Group scheduling for SCHED_OTHER"
depends on CGROUP_SCHED
config CFS_BANDWIDTH
bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED"
depends on FAIR_GROUP_SCHED
+ select GROUP_SCHED_BANDWIDTH
default n
help
This option allows users to define CPU bandwidth rates (limits) for
bool
depends on SCHED_CLASS_EXT && CGROUP_SCHED
select GROUP_SCHED_WEIGHT
+ select GROUP_SCHED_BANDWIDTH
default y
endif #CGROUP_SCHED
return 0;
}
+#endif /* CONFIG_CFS_BANDWIDTH */
+#ifdef CONFIG_GROUP_SCHED_BANDWIDTH
const u64 max_bw_quota_period_us = 1 * USEC_PER_SEC; /* 1s */
static const u64 min_bw_quota_period_us = 1 * USEC_PER_MSEC; /* 1ms */
/* More than 203 days if BW_SHIFT equals 20. */
static void tg_bandwidth(struct task_group *tg,
u64 *period_us_p, u64 *quota_us_p, u64 *burst_us_p)
{
+#ifdef CONFIG_CFS_BANDWIDTH
if (period_us_p)
*period_us_p = tg_get_cfs_period(tg);
if (quota_us_p)
*quota_us_p = tg_get_cfs_quota(tg);
if (burst_us_p)
*burst_us_p = tg_get_cfs_burst(tg);
+#else /* !CONFIG_CFS_BANDWIDTH */
+ if (period_us_p)
+ *period_us_p = tg->scx.bw_period_us;
+ if (quota_us_p)
+ *quota_us_p = tg->scx.bw_quota_us;
+ if (burst_us_p)
+ *burst_us_p = tg->scx.bw_burst_us;
+#endif /* CONFIG_CFS_BANDWIDTH */
}
static u64 cpu_period_read_u64(struct cgroup_subsys_state *css,
u64 period_us, u64 quota_us, u64 burst_us)
{
const u64 max_usec = U64_MAX / NSEC_PER_USEC;
+ int ret = 0;
if (tg == &root_task_group)
return -EINVAL;
burst_us + quota_us > max_bw_runtime_us))
return -EINVAL;
- return tg_set_cfs_bandwidth(tg, period_us, quota_us, burst_us);
+#ifdef CONFIG_CFS_BANDWIDTH
+ ret = tg_set_cfs_bandwidth(tg, period_us, quota_us, burst_us);
+#endif /* CONFIG_CFS_BANDWIDTH */
+ if (!ret)
+ scx_group_set_bandwidth(tg, period_us, quota_us, burst_us);
+ return ret;
}
static s64 cpu_quota_read_s64(struct cgroup_subsys_state *css,
tg_bandwidth(tg, &period_us, "a_us, NULL);
return tg_set_bandwidth(tg, period_us, quota_us, burst_us);
}
-#endif /* CONFIG_CFS_BANDWIDTH */
+#endif /* CONFIG_GROUP_SCHED_BANDWIDTH */
#ifdef CONFIG_RT_GROUP_SCHED
static int cpu_rt_runtime_write(struct cgroup_subsys_state *css,
.write_s64 = cpu_idle_write_s64,
},
#endif
-#ifdef CONFIG_CFS_BANDWIDTH
+#ifdef CONFIG_GROUP_SCHED_BANDWIDTH
{
.name = "cfs_period_us",
.read_u64 = cpu_period_read_u64,
.read_u64 = cpu_burst_read_u64,
.write_u64 = cpu_burst_write_u64,
},
+#endif
+#ifdef CONFIG_CFS_BANDWIDTH
{
.name = "stat",
.seq_show = cpu_cfs_stat_show,
return 0;
}
-#ifdef CONFIG_CFS_BANDWIDTH
+#ifdef CONFIG_GROUP_SCHED_BANDWIDTH
static int cpu_max_show(struct seq_file *sf, void *v)
{
struct task_group *tg = css_tg(seq_css(sf));
.write_s64 = cpu_idle_write_s64,
},
#endif
-#ifdef CONFIG_CFS_BANDWIDTH
+#ifdef CONFIG_GROUP_SCHED_BANDWIDTH
{
.name = "max",
.flags = CFTYPE_NOT_ON_ROOT,
struct scx_cgroup_init_args {
/* the weight of the cgroup [1..10000] */
u32 weight;
+
+ /* bandwidth control parameters from cpu.max and cpu.max.burst */
+ u64 bw_period_us;
+ u64 bw_quota_us;
+ u64 bw_burst_us;
};
enum scx_cpu_preempt_reason {
* @cgrp: cgroup whose weight is being updated
* @weight: new weight [1..10000]
*
- * Update @tg's weight to @weight.
+ * Update @cgrp's weight to @weight.
*/
void (*cgroup_set_weight)(struct cgroup *cgrp, u32 weight);
+
+ /**
+ * @cgroup_set_bandwidth: A cgroup's bandwidth is being changed
+ * @cgrp: cgroup whose bandwidth is being updated
+ * @period_us: bandwidth control period
+ * @quota_us: bandwidth control quota
+ * @burst_us: bandwidth control burst
+ *
+ * Update @cgrp's bandwidth control parameters. This is from the cpu.max
+ * cgroup interface.
+ *
+ * @quota_us / @period_us determines the CPU bandwidth @cgrp is entitled
+ * to. For example, if @period_us is 1_000_000 and @quota_us is
+ * 2_500_000. @cgrp is entitled to 2.5 CPUs. @burst_us can be
+ * interpreted in the same fashion and specifies how much @cgrp can
+ * burst temporarily. The specific control mechanism and thus the
+ * interpretation of @period_us and burstiness is upto to the BPF
+ * scheduler.
+ */
+ void (*cgroup_set_bandwidth)(struct cgroup *cgrp,
+ u64 period_us, u64 quota_us, u64 burst_us);
+
#endif /* CONFIG_EXT_GROUP_SCHED */
/*
void scx_tg_init(struct task_group *tg)
{
tg->scx.weight = CGROUP_WEIGHT_DFL;
+ tg->scx.bw_period_us = default_bw_period_us();
+ tg->scx.bw_quota_us = RUNTIME_INF;
}
int scx_tg_online(struct task_group *tg)
if (scx_cgroup_enabled) {
if (SCX_HAS_OP(sch, cgroup_init)) {
struct scx_cgroup_init_args args =
- { .weight = tg->scx.weight };
+ { .weight = tg->scx.weight,
+ .bw_period_us = tg->scx.bw_period_us,
+ .bw_quota_us = tg->scx.bw_quota_us,
+ .bw_burst_us = tg->scx.bw_burst_us };
ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED, cgroup_init,
NULL, tg->css.cgroup, &args);
/* TODO: Implement ops->cgroup_set_idle() */
}
+void scx_group_set_bandwidth(struct task_group *tg,
+ u64 period_us, u64 quota_us, u64 burst_us)
+{
+ struct scx_sched *sch = scx_root;
+
+ percpu_down_read(&scx_cgroup_rwsem);
+
+ if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_bandwidth) &&
+ (tg->scx.bw_period_us != period_us ||
+ tg->scx.bw_quota_us != quota_us ||
+ tg->scx.bw_burst_us != burst_us))
+ SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_bandwidth, NULL,
+ tg_cgrp(tg), period_us, quota_us, burst_us);
+
+ tg->scx.bw_period_us = period_us;
+ tg->scx.bw_quota_us = quota_us;
+ tg->scx.bw_burst_us = burst_us;
+
+ percpu_up_read(&scx_cgroup_rwsem);
+}
+
static void scx_cgroup_lock(void)
{
percpu_down_write(&scx_cgroup_rwsem);
rcu_read_lock();
css_for_each_descendant_pre(css, &root_task_group.css) {
struct task_group *tg = css_tg(css);
- struct scx_cgroup_init_args args = { .weight = tg->scx.weight };
+ struct scx_cgroup_init_args args = {
+ .weight = tg->scx.weight,
+ .bw_period_us = tg->scx.bw_period_us,
+ .bw_quota_us = tg->scx.bw_quota_us,
+ .bw_burst_us = tg->scx.bw_burst_us,
+ };
if ((tg->scx.flags &
(SCX_TG_ONLINE | SCX_TG_INITED)) != SCX_TG_ONLINE)
static void sched_ext_ops__cgroup_move(struct task_struct *p, struct cgroup *from, struct cgroup *to) {}
static void sched_ext_ops__cgroup_cancel_move(struct task_struct *p, struct cgroup *from, struct cgroup *to) {}
static void sched_ext_ops__cgroup_set_weight(struct cgroup *cgrp, u32 weight) {}
+static void sched_ext_ops__cgroup_set_bandwidth(struct cgroup *cgrp, u64 period_us, u64 quota_us, u64 burst_us) {}
#endif
static void sched_ext_ops__cpu_online(s32 cpu) {}
static void sched_ext_ops__cpu_offline(s32 cpu) {}
.cgroup_move = sched_ext_ops__cgroup_move,
.cgroup_cancel_move = sched_ext_ops__cgroup_cancel_move,
.cgroup_set_weight = sched_ext_ops__cgroup_set_weight,
+ .cgroup_set_bandwidth = sched_ext_ops__cgroup_set_bandwidth,
#endif
.cpu_online = sched_ext_ops__cpu_online,
.cpu_offline = sched_ext_ops__cpu_offline,
void scx_cgroup_cancel_attach(struct cgroup_taskset *tset);
void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight);
void scx_group_set_idle(struct task_group *tg, bool idle);
+void scx_group_set_bandwidth(struct task_group *tg, u64 period_us, u64 quota_us, u64 burst_us);
#else /* CONFIG_EXT_GROUP_SCHED */
static inline void scx_tg_init(struct task_group *tg) {}
static inline int scx_tg_online(struct task_group *tg) { return 0; }
static inline void scx_cgroup_cancel_attach(struct cgroup_taskset *tset) {}
static inline void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight) {}
static inline void scx_group_set_idle(struct task_group *tg, bool idle) {}
+static inline void scx_group_set_bandwidth(struct task_group *tg, u64 period_us, u64 quota_us, u64 burst_us) {}
#endif /* CONFIG_EXT_GROUP_SCHED */
#endif /* CONFIG_CGROUP_SCHED */
extern struct list_head task_groups;
-#ifdef CONFIG_CFS_BANDWIDTH
+#ifdef CONFIG_GROUP_SCHED_BANDWIDTH
extern const u64 max_bw_quota_period_us;
/*
{
return 100000ULL;
}
-#endif /* CONFIG_CFS_BANDWIDTH */
+#endif /* CONFIG_GROUP_SCHED_BANDWIDTH */
struct cfs_bandwidth {
#ifdef CONFIG_CFS_BANDWIDTH
taskc->force_local, taskc->core_sched_seq);
}
+s32 BPF_STRUCT_OPS(qmap_cgroup_init, struct cgroup *cgrp, struct scx_cgroup_init_args *args)
+{
+ bpf_printk("CGRP INIT %llu weight=%u period=%lu quota=%ld burst=%lu",
+ cgrp->kn->id, args->weight, args->bw_period_us,
+ args->bw_quota_us, args->bw_burst_us);
+ return 0;
+}
+
+void BPF_STRUCT_OPS(qmap_cgroup_set_weight, struct cgroup *cgrp, u32 weight)
+{
+ bpf_printk("CGRP SET %llu weight=%u", cgrp->kn->id, weight);
+}
+
+void BPF_STRUCT_OPS(qmap_cgroup_set_bandwidth, struct cgroup *cgrp,
+ u64 period_us, u64 quota_us, u64 burst_us)
+{
+ bpf_printk("CGRP SET %llu period=%lu quota=%ld burst=%lu", cgrp->kn->id,
+ period_us, quota_us, burst_us);
+}
+
/*
* Print out the online and possible CPU map using bpf_printk() as a
* demonstration of using the cpumask kfuncs and ops.cpu_on/offline().
.dump = (void *)qmap_dump,
.dump_cpu = (void *)qmap_dump_cpu,
.dump_task = (void *)qmap_dump_task,
+ .cgroup_init = (void *)qmap_cgroup_init,
+ .cgroup_set_weight = (void *)qmap_cgroup_set_weight,
+ .cgroup_set_bandwidth = (void *)qmap_cgroup_set_bandwidth,
.cpu_online = (void *)qmap_cpu_online,
.cpu_offline = (void *)qmap_cpu_offline,
.init = (void *)qmap_init,
void BPF_STRUCT_OPS(maximal_cgroup_set_weight, struct cgroup *cgrp, u32 weight)
{}
+void BPF_STRUCT_OPS(maximal_cgroup_set_bandwidth, struct cgroup *cgrp,
+ u64 period_us, u64 quota_us, u64 burst_us)
+{}
+
s32 BPF_STRUCT_OPS_SLEEPABLE(maximal_init)
{
return scx_bpf_create_dsq(DSQ_ID, -1);
.cgroup_move = (void *) maximal_cgroup_move,
.cgroup_cancel_move = (void *) maximal_cgroup_cancel_move,
.cgroup_set_weight = (void *) maximal_cgroup_set_weight,
+ .cgroup_set_bandwidth = (void *) maximal_cgroup_set_bandwidth,
.init = (void *) maximal_init,
.exit = (void *) maximal_exit,
.name = "maximal",