From f13f15a36ed6ca2fa48a6f32ec93bcc53382fdfa Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 24 Mar 2022 15:19:16 +0100 Subject: [PATCH] 5.10-stable patches added patches: cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch cgroup-v1-correct-privileges-check-in-release_agent-writes.patch --- ...p_file_ctx-for-kernfs_open_file-priv.patch | 267 ++++++++++++++++++ ...ce-for-process-migration-perm-checks.patch | 178 ++++++++++++ ...ileges-check-in-release_agent-writes.patch | 55 ++++ queue-5.10/series | 3 + 4 files changed, 503 insertions(+) create mode 100644 queue-5.10/cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch create mode 100644 queue-5.10/cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch create mode 100644 queue-5.10/cgroup-v1-correct-privileges-check-in-release_agent-writes.patch diff --git a/queue-5.10/cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch b/queue-5.10/cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch new file mode 100644 index 00000000000..b57e0d6cb82 --- /dev/null +++ b/queue-5.10/cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch @@ -0,0 +1,267 @@ +From 0d2b5955b36250a9428c832664f2079cbf723bec Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Thu, 6 Jan 2022 11:02:29 -1000 +Subject: cgroup: Allocate cgroup_file_ctx for kernfs_open_file->priv +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Tejun Heo + +commit 0d2b5955b36250a9428c832664f2079cbf723bec upstream. + +of->priv is currently used by each interface file implementation to store +private information. This patch collects the current two private data usages +into struct cgroup_file_ctx which is allocated and freed by the common path. +This allows generic private data which applies to multiple files, which will +be used to in the following patch. + +Note that cgroup_procs iterator is now embedded as procs.iter in the new +cgroup_file_ctx so that it doesn't need to be allocated and freed +separately. + +v2: union dropped from cgroup_file_ctx and the procs iterator is embedded in + cgroup_file_ctx as suggested by Linus. + +v3: Michal pointed out that cgroup1's procs pidlist uses of->priv too. + Converted. Didn't change to embedded allocation as cgroup1 pidlists get + stored for caching. + +Signed-off-by: Tejun Heo +Cc: Linus Torvalds +Reviewed-by: Michal Koutný +[mkoutny: v5.10: modify cgroup.pressure handlers, adjust context] +Signed-off-by: Michal Koutný +Signed-off-by: Greg Kroah-Hartman +--- + kernel/cgroup/cgroup-internal.h | 17 ++++++++++++ + kernel/cgroup/cgroup-v1.c | 26 ++++++++++--------- + kernel/cgroup/cgroup.c | 54 +++++++++++++++++++++++++--------------- + 3 files changed, 65 insertions(+), 32 deletions(-) + +--- a/kernel/cgroup/cgroup-internal.h ++++ b/kernel/cgroup/cgroup-internal.h +@@ -65,6 +65,23 @@ static inline struct cgroup_fs_context * + return container_of(kfc, struct cgroup_fs_context, kfc); + } + ++struct cgroup_pidlist; ++ ++struct cgroup_file_ctx { ++ struct { ++ void *trigger; ++ } psi; ++ ++ struct { ++ bool started; ++ struct css_task_iter iter; ++ } procs; ++ ++ struct { ++ struct cgroup_pidlist *pidlist; ++ } procs1; ++}; ++ + /* + * A cgroup can be associated with multiple css_sets as different tasks may + * belong to different cgroups on different hierarchies. In the other +--- a/kernel/cgroup/cgroup-v1.c ++++ b/kernel/cgroup/cgroup-v1.c +@@ -393,6 +393,7 @@ static void *cgroup_pidlist_start(struct + * next pid to display, if any + */ + struct kernfs_open_file *of = s->private; ++ struct cgroup_file_ctx *ctx = of->priv; + struct cgroup *cgrp = seq_css(s)->cgroup; + struct cgroup_pidlist *l; + enum cgroup_filetype type = seq_cft(s)->private; +@@ -402,25 +403,24 @@ static void *cgroup_pidlist_start(struct + mutex_lock(&cgrp->pidlist_mutex); + + /* +- * !NULL @of->priv indicates that this isn't the first start() +- * after open. If the matching pidlist is around, we can use that. +- * Look for it. Note that @of->priv can't be used directly. It +- * could already have been destroyed. ++ * !NULL @ctx->procs1.pidlist indicates that this isn't the first ++ * start() after open. If the matching pidlist is around, we can use ++ * that. Look for it. Note that @ctx->procs1.pidlist can't be used ++ * directly. It could already have been destroyed. + */ +- if (of->priv) +- of->priv = cgroup_pidlist_find(cgrp, type); ++ if (ctx->procs1.pidlist) ++ ctx->procs1.pidlist = cgroup_pidlist_find(cgrp, type); + + /* + * Either this is the first start() after open or the matching + * pidlist has been destroyed inbetween. Create a new one. + */ +- if (!of->priv) { +- ret = pidlist_array_load(cgrp, type, +- (struct cgroup_pidlist **)&of->priv); ++ if (!ctx->procs1.pidlist) { ++ ret = pidlist_array_load(cgrp, type, &ctx->procs1.pidlist); + if (ret) + return ERR_PTR(ret); + } +- l = of->priv; ++ l = ctx->procs1.pidlist; + + if (pid) { + int end = l->length; +@@ -448,7 +448,8 @@ static void *cgroup_pidlist_start(struct + static void cgroup_pidlist_stop(struct seq_file *s, void *v) + { + struct kernfs_open_file *of = s->private; +- struct cgroup_pidlist *l = of->priv; ++ struct cgroup_file_ctx *ctx = of->priv; ++ struct cgroup_pidlist *l = ctx->procs1.pidlist; + + if (l) + mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, +@@ -459,7 +460,8 @@ static void cgroup_pidlist_stop(struct s + static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) + { + struct kernfs_open_file *of = s->private; +- struct cgroup_pidlist *l = of->priv; ++ struct cgroup_file_ctx *ctx = of->priv; ++ struct cgroup_pidlist *l = ctx->procs1.pidlist; + pid_t *p = v; + pid_t *end = l->list + l->length; + /* +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -3590,6 +3590,7 @@ static int cgroup_cpu_pressure_show(stru + static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, enum psi_res res) + { ++ struct cgroup_file_ctx *ctx = of->priv; + struct psi_trigger *new; + struct cgroup *cgrp; + struct psi_group *psi; +@@ -3602,7 +3603,7 @@ static ssize_t cgroup_pressure_write(str + cgroup_kn_unlock(of->kn); + + /* Allow only one trigger per file descriptor */ +- if (of->priv) { ++ if (ctx->psi.trigger) { + cgroup_put(cgrp); + return -EBUSY; + } +@@ -3614,7 +3615,7 @@ static ssize_t cgroup_pressure_write(str + return PTR_ERR(new); + } + +- smp_store_release(&of->priv, new); ++ smp_store_release(&ctx->psi.trigger, new); + cgroup_put(cgrp); + + return nbytes; +@@ -3644,12 +3645,15 @@ static ssize_t cgroup_cpu_pressure_write + static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of, + poll_table *pt) + { +- return psi_trigger_poll(&of->priv, of->file, pt); ++ struct cgroup_file_ctx *ctx = of->priv; ++ return psi_trigger_poll(&ctx->psi.trigger, of->file, pt); + } + + static void cgroup_pressure_release(struct kernfs_open_file *of) + { +- psi_trigger_destroy(of->priv); ++ struct cgroup_file_ctx *ctx = of->priv; ++ ++ psi_trigger_destroy(ctx->psi.trigger); + } + #endif /* CONFIG_PSI */ + +@@ -3690,18 +3694,31 @@ static ssize_t cgroup_freeze_write(struc + static int cgroup_file_open(struct kernfs_open_file *of) + { + struct cftype *cft = of->kn->priv; ++ struct cgroup_file_ctx *ctx; ++ int ret; + +- if (cft->open) +- return cft->open(of); +- return 0; ++ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); ++ if (!ctx) ++ return -ENOMEM; ++ of->priv = ctx; ++ ++ if (!cft->open) ++ return 0; ++ ++ ret = cft->open(of); ++ if (ret) ++ kfree(ctx); ++ return ret; + } + + static void cgroup_file_release(struct kernfs_open_file *of) + { + struct cftype *cft = of->kn->priv; ++ struct cgroup_file_ctx *ctx = of->priv; + + if (cft->release) + cft->release(of); ++ kfree(ctx); + } + + static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, +@@ -4625,21 +4642,21 @@ void css_task_iter_end(struct css_task_i + + static void cgroup_procs_release(struct kernfs_open_file *of) + { +- if (of->priv) { +- css_task_iter_end(of->priv); +- kfree(of->priv); +- } ++ struct cgroup_file_ctx *ctx = of->priv; ++ ++ if (ctx->procs.started) ++ css_task_iter_end(&ctx->procs.iter); + } + + static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) + { + struct kernfs_open_file *of = s->private; +- struct css_task_iter *it = of->priv; ++ struct cgroup_file_ctx *ctx = of->priv; + + if (pos) + (*pos)++; + +- return css_task_iter_next(it); ++ return css_task_iter_next(&ctx->procs.iter); + } + + static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, +@@ -4647,21 +4664,18 @@ static void *__cgroup_procs_start(struct + { + struct kernfs_open_file *of = s->private; + struct cgroup *cgrp = seq_css(s)->cgroup; +- struct css_task_iter *it = of->priv; ++ struct cgroup_file_ctx *ctx = of->priv; ++ struct css_task_iter *it = &ctx->procs.iter; + + /* + * When a seq_file is seeked, it's always traversed sequentially + * from position 0, so we can simply keep iterating on !0 *pos. + */ +- if (!it) { ++ if (!ctx->procs.started) { + if (WARN_ON_ONCE((*pos))) + return ERR_PTR(-EINVAL); +- +- it = kzalloc(sizeof(*it), GFP_KERNEL); +- if (!it) +- return ERR_PTR(-ENOMEM); +- of->priv = it; + css_task_iter_start(&cgrp->self, iter_flags, it); ++ ctx->procs.started = true; + } else if (!(*pos)) { + css_task_iter_end(it); + css_task_iter_start(&cgrp->self, iter_flags, it); diff --git a/queue-5.10/cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch b/queue-5.10/cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch new file mode 100644 index 00000000000..2fde516e52f --- /dev/null +++ b/queue-5.10/cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch @@ -0,0 +1,178 @@ +From e57457641613fef0d147ede8bd6a3047df588b95 Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Thu, 6 Jan 2022 11:02:29 -1000 +Subject: cgroup: Use open-time cgroup namespace for process migration perm checks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Tejun Heo + +commit e57457641613fef0d147ede8bd6a3047df588b95 upstream. + +cgroup process migration permission checks are performed at write time as +whether a given operation is allowed or not is dependent on the content of +the write - the PID. This currently uses current's cgroup namespace which is +a potential security weakness as it may allow scenarios where a less +privileged process tricks a more privileged one into writing into a fd that +it created. + +This patch makes cgroup remember the cgroup namespace at the time of open +and uses it for migration permission checks instad of current's. Note that +this only applies to cgroup2 as cgroup1 doesn't have namespace support. + +This also fixes a use-after-free bug on cgroupns reported in + + https://lore.kernel.org/r/00000000000048c15c05d0083397@google.com + +Note that backporting this fix also requires the preceding patch. + +Reported-by: "Eric W. Biederman" +Suggested-by: Linus Torvalds +Cc: Michal Koutný +Cc: Oleg Nesterov +Reviewed-by: Michal Koutný +Reported-by: syzbot+50f5cf33a284ce738b62@syzkaller.appspotmail.com +Link: https://lore.kernel.org/r/00000000000048c15c05d0083397@google.com +Fixes: 5136f6365ce3 ("cgroup: implement "nsdelegate" mount option") +Signed-off-by: Tejun Heo +[mkoutny: v5.10: duplicate ns check in procs/threads write handler, adjust context] +Signed-off-by: Michal Koutný +Signed-off-by: Greg Kroah-Hartman +--- + kernel/cgroup/cgroup-internal.h | 2 ++ + kernel/cgroup/cgroup.c | 32 ++++++++++++++++++++++---------- + 2 files changed, 24 insertions(+), 10 deletions(-) + +--- a/kernel/cgroup/cgroup-internal.h ++++ b/kernel/cgroup/cgroup-internal.h +@@ -68,6 +68,8 @@ static inline struct cgroup_fs_context * + struct cgroup_pidlist; + + struct cgroup_file_ctx { ++ struct cgroup_namespace *ns; ++ + struct { + void *trigger; + } psi; +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -3700,14 +3700,19 @@ static int cgroup_file_open(struct kernf + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; ++ ++ ctx->ns = current->nsproxy->cgroup_ns; ++ get_cgroup_ns(ctx->ns); + of->priv = ctx; + + if (!cft->open) + return 0; + + ret = cft->open(of); +- if (ret) ++ if (ret) { ++ put_cgroup_ns(ctx->ns); + kfree(ctx); ++ } + return ret; + } + +@@ -3718,13 +3723,14 @@ static void cgroup_file_release(struct k + + if (cft->release) + cft->release(of); ++ put_cgroup_ns(ctx->ns); + kfree(ctx); + } + + static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) + { +- struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; ++ struct cgroup_file_ctx *ctx = of->priv; + struct cgroup *cgrp = of->kn->parent->priv; + struct cftype *cft = of->kn->priv; + struct cgroup_subsys_state *css; +@@ -3741,7 +3747,7 @@ static ssize_t cgroup_file_write(struct + */ + if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) && + !(cft->flags & CFTYPE_NS_DELEGATABLE) && +- ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp) ++ ctx->ns != &init_cgroup_ns && ctx->ns->root_cset->dfl_cgrp == cgrp) + return -EPERM; + + if (cft->write) +@@ -4726,9 +4732,9 @@ static int cgroup_may_write(const struct + + static int cgroup_procs_write_permission(struct cgroup *src_cgrp, + struct cgroup *dst_cgrp, +- struct super_block *sb) ++ struct super_block *sb, ++ struct cgroup_namespace *ns) + { +- struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; + struct cgroup *com_cgrp = src_cgrp; + int ret; + +@@ -4757,11 +4763,12 @@ static int cgroup_procs_write_permission + + static int cgroup_attach_permissions(struct cgroup *src_cgrp, + struct cgroup *dst_cgrp, +- struct super_block *sb, bool threadgroup) ++ struct super_block *sb, bool threadgroup, ++ struct cgroup_namespace *ns) + { + int ret = 0; + +- ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb); ++ ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb, ns); + if (ret) + return ret; + +@@ -4778,6 +4785,7 @@ static int cgroup_attach_permissions(str + static ssize_t cgroup_procs_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) + { ++ struct cgroup_file_ctx *ctx = of->priv; + struct cgroup *src_cgrp, *dst_cgrp; + struct task_struct *task; + ssize_t ret; +@@ -4798,7 +4806,8 @@ static ssize_t cgroup_procs_write(struct + spin_unlock_irq(&css_set_lock); + + ret = cgroup_attach_permissions(src_cgrp, dst_cgrp, +- of->file->f_path.dentry->d_sb, true); ++ of->file->f_path.dentry->d_sb, true, ++ ctx->ns); + if (ret) + goto out_finish; + +@@ -4820,6 +4829,7 @@ static void *cgroup_threads_start(struct + static ssize_t cgroup_threads_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) + { ++ struct cgroup_file_ctx *ctx = of->priv; + struct cgroup *src_cgrp, *dst_cgrp; + struct task_struct *task; + ssize_t ret; +@@ -4843,7 +4853,8 @@ static ssize_t cgroup_threads_write(stru + + /* thread migrations follow the cgroup.procs delegation rule */ + ret = cgroup_attach_permissions(src_cgrp, dst_cgrp, +- of->file->f_path.dentry->d_sb, false); ++ of->file->f_path.dentry->d_sb, false, ++ ctx->ns); + if (ret) + goto out_finish; + +@@ -6023,7 +6034,8 @@ static int cgroup_css_set_fork(struct ke + goto err; + + ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb, +- !(kargs->flags & CLONE_THREAD)); ++ !(kargs->flags & CLONE_THREAD), ++ current->nsproxy->cgroup_ns); + if (ret) + goto err; + diff --git a/queue-5.10/cgroup-v1-correct-privileges-check-in-release_agent-writes.patch b/queue-5.10/cgroup-v1-correct-privileges-check-in-release_agent-writes.patch new file mode 100644 index 00000000000..9496e48e165 --- /dev/null +++ b/queue-5.10/cgroup-v1-correct-privileges-check-in-release_agent-writes.patch @@ -0,0 +1,55 @@ +From 467a726b754f474936980da793b4ff2ec3e382a7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Michal=20Koutn=C3=BD?= +Date: Thu, 17 Feb 2022 17:11:28 +0100 +Subject: cgroup-v1: Correct privileges check in release_agent writes +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Michal Koutný + +commit 467a726b754f474936980da793b4ff2ec3e382a7 upstream. + +The idea is to check: a) the owning user_ns of cgroup_ns, b) +capabilities in init_user_ns. + +The commit 24f600856418 ("cgroup-v1: Require capabilities to set +release_agent") got this wrong in the write handler of release_agent +since it checked user_ns of the opener (may be different from the owning +user_ns of cgroup_ns). +Secondly, to avoid possibly confused deputy, the capability of the +opener must be checked. + +Fixes: 24f600856418 ("cgroup-v1: Require capabilities to set release_agent") +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/stable/20220216121142.GB30035@blackbody.suse.cz/ +Signed-off-by: Michal Koutný +Reviewed-by: Masami Ichikawa(CIP) +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman +--- + kernel/cgroup/cgroup-v1.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/kernel/cgroup/cgroup-v1.c ++++ b/kernel/cgroup/cgroup-v1.c +@@ -544,6 +544,7 @@ static ssize_t cgroup_release_agent_writ + char *buf, size_t nbytes, loff_t off) + { + struct cgroup *cgrp; ++ struct cgroup_file_ctx *ctx; + + BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); + +@@ -551,8 +552,9 @@ static ssize_t cgroup_release_agent_writ + * Release agent gets called with all capabilities, + * require capabilities to set release agent. + */ +- if ((of->file->f_cred->user_ns != &init_user_ns) || +- !capable(CAP_SYS_ADMIN)) ++ ctx = of->priv; ++ if ((ctx->ns->user_ns != &init_user_ns) || ++ !file_ns_capable(of->file, &init_user_ns, CAP_SYS_ADMIN)) + return -EPERM; + + cgrp = cgroup_kn_lock_live(of->kn, false); diff --git a/queue-5.10/series b/queue-5.10/series index 4ddbabe1541..19f5efc9955 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -1,3 +1,6 @@ nfc-st21nfca-fix-potential-buffer-overflows-in-evt_transaction.patch net-ipv6-fix-skb_over_panic-in-__ip6_append_data.patch exfat-avoid-incorrectly-releasing-for-root-inode.patch +cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch +cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch +cgroup-v1-correct-privileges-check-in-release_agent-writes.patch -- 2.47.3