From: Greg Kroah-Hartman Date: Sat, 8 Jan 2022 14:43:31 +0000 (+0100) Subject: 5.15-stable patches X-Git-Tag: v4.4.299~19 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a761d61ccea577c22200bb852ebf9a802a919d2a;p=thirdparty%2Fkernel%2Fstable-queue.git 5.15-stable patches added patches: cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch cgroup-use-open-time-credentials-for-process-migraton-perm-checks.patch --- diff --git a/queue-5.15/cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch b/queue-5.15/cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch new file mode 100644 index 00000000000..533b143299e --- /dev/null +++ b/queue-5.15/cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch @@ -0,0 +1,257 @@ +From 0d2b5955b36250a9428c832664f2079cbf723bec Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Thu, 6 Jan 2022 11:02:29 -1000 +Subject: cgroup: Allocate cgroup_file_ctx for kernfs_open_file->priv +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Tejun Heo + +commit 0d2b5955b36250a9428c832664f2079cbf723bec upstream. + +of->priv is currently used by each interface file implementation to store +private information. This patch collects the current two private data usages +into struct cgroup_file_ctx which is allocated and freed by the common path. +This allows generic private data which applies to multiple files, which will +be used to in the following patch. + +Note that cgroup_procs iterator is now embedded as procs.iter in the new +cgroup_file_ctx so that it doesn't need to be allocated and freed +separately. + +v2: union dropped from cgroup_file_ctx and the procs iterator is embedded in + cgroup_file_ctx as suggested by Linus. + +v3: Michal pointed out that cgroup1's procs pidlist uses of->priv too. + Converted. Didn't change to embedded allocation as cgroup1 pidlists get + stored for caching. + +Signed-off-by: Tejun Heo +Cc: Linus Torvalds +Reviewed-by: Michal Koutný +Signed-off-by: Greg Kroah-Hartman +--- + kernel/cgroup/cgroup-internal.h | 17 ++++++++++++ + kernel/cgroup/cgroup-v1.c | 26 ++++++++++--------- + kernel/cgroup/cgroup.c | 53 +++++++++++++++++++++++++--------------- + 3 files changed, 65 insertions(+), 31 deletions(-) + +--- a/kernel/cgroup/cgroup-internal.h ++++ b/kernel/cgroup/cgroup-internal.h +@@ -65,6 +65,23 @@ static inline struct cgroup_fs_context * + return container_of(kfc, struct cgroup_fs_context, kfc); + } + ++struct cgroup_pidlist; ++ ++struct cgroup_file_ctx { ++ struct { ++ void *trigger; ++ } psi; ++ ++ struct { ++ bool started; ++ struct css_task_iter iter; ++ } procs; ++ ++ struct { ++ struct cgroup_pidlist *pidlist; ++ } procs1; ++}; ++ + /* + * A cgroup can be associated with multiple css_sets as different tasks may + * belong to different cgroups on different hierarchies. In the other +--- a/kernel/cgroup/cgroup-v1.c ++++ b/kernel/cgroup/cgroup-v1.c +@@ -397,6 +397,7 @@ static void *cgroup_pidlist_start(struct + * next pid to display, if any + */ + struct kernfs_open_file *of = s->private; ++ struct cgroup_file_ctx *ctx = of->priv; + struct cgroup *cgrp = seq_css(s)->cgroup; + struct cgroup_pidlist *l; + enum cgroup_filetype type = seq_cft(s)->private; +@@ -406,25 +407,24 @@ static void *cgroup_pidlist_start(struct + mutex_lock(&cgrp->pidlist_mutex); + + /* +- * !NULL @of->priv indicates that this isn't the first start() +- * after open. If the matching pidlist is around, we can use that. +- * Look for it. Note that @of->priv can't be used directly. It +- * could already have been destroyed. ++ * !NULL @ctx->procs1.pidlist indicates that this isn't the first ++ * start() after open. If the matching pidlist is around, we can use ++ * that. Look for it. Note that @ctx->procs1.pidlist can't be used ++ * directly. It could already have been destroyed. + */ +- if (of->priv) +- of->priv = cgroup_pidlist_find(cgrp, type); ++ if (ctx->procs1.pidlist) ++ ctx->procs1.pidlist = cgroup_pidlist_find(cgrp, type); + + /* + * Either this is the first start() after open or the matching + * pidlist has been destroyed inbetween. Create a new one. + */ +- if (!of->priv) { +- ret = pidlist_array_load(cgrp, type, +- (struct cgroup_pidlist **)&of->priv); ++ if (!ctx->procs1.pidlist) { ++ ret = pidlist_array_load(cgrp, type, &ctx->procs1.pidlist); + if (ret) + return ERR_PTR(ret); + } +- l = of->priv; ++ l = ctx->procs1.pidlist; + + if (pid) { + int end = l->length; +@@ -452,7 +452,8 @@ static void *cgroup_pidlist_start(struct + static void cgroup_pidlist_stop(struct seq_file *s, void *v) + { + struct kernfs_open_file *of = s->private; +- struct cgroup_pidlist *l = of->priv; ++ struct cgroup_file_ctx *ctx = of->priv; ++ struct cgroup_pidlist *l = ctx->procs1.pidlist; + + if (l) + mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, +@@ -463,7 +464,8 @@ static void cgroup_pidlist_stop(struct s + static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) + { + struct kernfs_open_file *of = s->private; +- struct cgroup_pidlist *l = of->priv; ++ struct cgroup_file_ctx *ctx = of->priv; ++ struct cgroup_pidlist *l = ctx->procs1.pidlist; + pid_t *p = v; + pid_t *end = l->list + l->length; + /* +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -3630,6 +3630,7 @@ static int cgroup_cpu_pressure_show(stru + static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, enum psi_res res) + { ++ struct cgroup_file_ctx *ctx = of->priv; + struct psi_trigger *new; + struct cgroup *cgrp; + struct psi_group *psi; +@@ -3648,7 +3649,7 @@ static ssize_t cgroup_pressure_write(str + return PTR_ERR(new); + } + +- psi_trigger_replace(&of->priv, new); ++ psi_trigger_replace(&ctx->psi.trigger, new); + + cgroup_put(cgrp); + +@@ -3679,12 +3680,16 @@ static ssize_t cgroup_cpu_pressure_write + static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of, + poll_table *pt) + { +- return psi_trigger_poll(&of->priv, of->file, pt); ++ struct cgroup_file_ctx *ctx = of->priv; ++ ++ return psi_trigger_poll(&ctx->psi.trigger, of->file, pt); + } + + static void cgroup_pressure_release(struct kernfs_open_file *of) + { +- psi_trigger_replace(&of->priv, NULL); ++ struct cgroup_file_ctx *ctx = of->priv; ++ ++ psi_trigger_replace(&ctx->psi.trigger, NULL); + } + + bool cgroup_psi_enabled(void) +@@ -3811,18 +3816,31 @@ static ssize_t cgroup_kill_write(struct + static int cgroup_file_open(struct kernfs_open_file *of) + { + struct cftype *cft = of_cft(of); ++ struct cgroup_file_ctx *ctx; ++ int ret; + +- if (cft->open) +- return cft->open(of); +- return 0; ++ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); ++ if (!ctx) ++ return -ENOMEM; ++ of->priv = ctx; ++ ++ if (!cft->open) ++ return 0; ++ ++ ret = cft->open(of); ++ if (ret) ++ kfree(ctx); ++ return ret; + } + + static void cgroup_file_release(struct kernfs_open_file *of) + { + struct cftype *cft = of_cft(of); ++ struct cgroup_file_ctx *ctx = of->priv; + + if (cft->release) + cft->release(of); ++ kfree(ctx); + } + + static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, +@@ -4751,21 +4769,21 @@ void css_task_iter_end(struct css_task_i + + static void cgroup_procs_release(struct kernfs_open_file *of) + { +- if (of->priv) { +- css_task_iter_end(of->priv); +- kfree(of->priv); +- } ++ struct cgroup_file_ctx *ctx = of->priv; ++ ++ if (ctx->procs.started) ++ css_task_iter_end(&ctx->procs.iter); + } + + static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) + { + struct kernfs_open_file *of = s->private; +- struct css_task_iter *it = of->priv; ++ struct cgroup_file_ctx *ctx = of->priv; + + if (pos) + (*pos)++; + +- return css_task_iter_next(it); ++ return css_task_iter_next(&ctx->procs.iter); + } + + static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, +@@ -4773,21 +4791,18 @@ static void *__cgroup_procs_start(struct + { + struct kernfs_open_file *of = s->private; + struct cgroup *cgrp = seq_css(s)->cgroup; +- struct css_task_iter *it = of->priv; ++ struct cgroup_file_ctx *ctx = of->priv; ++ struct css_task_iter *it = &ctx->procs.iter; + + /* + * When a seq_file is seeked, it's always traversed sequentially + * from position 0, so we can simply keep iterating on !0 *pos. + */ +- if (!it) { ++ if (!ctx->procs.started) { + if (WARN_ON_ONCE((*pos))) + return ERR_PTR(-EINVAL); +- +- it = kzalloc(sizeof(*it), GFP_KERNEL); +- if (!it) +- return ERR_PTR(-ENOMEM); +- of->priv = it; + css_task_iter_start(&cgrp->self, iter_flags, it); ++ ctx->procs.started = true; + } else if (!(*pos)) { + css_task_iter_end(it); + css_task_iter_start(&cgrp->self, iter_flags, it); diff --git a/queue-5.15/cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch b/queue-5.15/cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch new file mode 100644 index 00000000000..a4fde394ec2 --- /dev/null +++ b/queue-5.15/cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch @@ -0,0 +1,158 @@ +From e57457641613fef0d147ede8bd6a3047df588b95 Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Thu, 6 Jan 2022 11:02:29 -1000 +Subject: cgroup: Use open-time cgroup namespace for process migration perm checks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Tejun Heo + +commit e57457641613fef0d147ede8bd6a3047df588b95 upstream. + +cgroup process migration permission checks are performed at write time as +whether a given operation is allowed or not is dependent on the content of +the write - the PID. This currently uses current's cgroup namespace which is +a potential security weakness as it may allow scenarios where a less +privileged process tricks a more privileged one into writing into a fd that +it created. + +This patch makes cgroup remember the cgroup namespace at the time of open +and uses it for migration permission checks instad of current's. Note that +this only applies to cgroup2 as cgroup1 doesn't have namespace support. + +This also fixes a use-after-free bug on cgroupns reported in + + https://lore.kernel.org/r/00000000000048c15c05d0083397@google.com + +Note that backporting this fix also requires the preceding patch. + +Reported-by: "Eric W. Biederman" +Suggested-by: Linus Torvalds +Cc: Michal Koutný +Cc: Oleg Nesterov +Reviewed-by: Michal Koutný +Reported-by: syzbot+50f5cf33a284ce738b62@syzkaller.appspotmail.com +Link: https://lore.kernel.org/r/00000000000048c15c05d0083397@google.com +Fixes: 5136f6365ce3 ("cgroup: implement "nsdelegate" mount option") +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman +--- + kernel/cgroup/cgroup-internal.h | 2 ++ + kernel/cgroup/cgroup.c | 28 +++++++++++++++++++--------- + 2 files changed, 21 insertions(+), 9 deletions(-) + +--- a/kernel/cgroup/cgroup-internal.h ++++ b/kernel/cgroup/cgroup-internal.h +@@ -68,6 +68,8 @@ static inline struct cgroup_fs_context * + struct cgroup_pidlist; + + struct cgroup_file_ctx { ++ struct cgroup_namespace *ns; ++ + struct { + void *trigger; + } psi; +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -3822,14 +3822,19 @@ static int cgroup_file_open(struct kernf + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; ++ ++ ctx->ns = current->nsproxy->cgroup_ns; ++ get_cgroup_ns(ctx->ns); + of->priv = ctx; + + if (!cft->open) + return 0; + + ret = cft->open(of); +- if (ret) ++ if (ret) { ++ put_cgroup_ns(ctx->ns); + kfree(ctx); ++ } + return ret; + } + +@@ -3840,13 +3845,14 @@ static void cgroup_file_release(struct k + + if (cft->release) + cft->release(of); ++ put_cgroup_ns(ctx->ns); + kfree(ctx); + } + + static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) + { +- struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; ++ struct cgroup_file_ctx *ctx = of->priv; + struct cgroup *cgrp = of->kn->parent->priv; + struct cftype *cft = of_cft(of); + struct cgroup_subsys_state *css; +@@ -3863,7 +3869,7 @@ static ssize_t cgroup_file_write(struct + */ + if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) && + !(cft->flags & CFTYPE_NS_DELEGATABLE) && +- ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp) ++ ctx->ns != &init_cgroup_ns && ctx->ns->root_cset->dfl_cgrp == cgrp) + return -EPERM; + + if (cft->write) +@@ -4853,9 +4859,9 @@ static int cgroup_may_write(const struct + + static int cgroup_procs_write_permission(struct cgroup *src_cgrp, + struct cgroup *dst_cgrp, +- struct super_block *sb) ++ struct super_block *sb, ++ struct cgroup_namespace *ns) + { +- struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; + struct cgroup *com_cgrp = src_cgrp; + int ret; + +@@ -4884,11 +4890,12 @@ static int cgroup_procs_write_permission + + static int cgroup_attach_permissions(struct cgroup *src_cgrp, + struct cgroup *dst_cgrp, +- struct super_block *sb, bool threadgroup) ++ struct super_block *sb, bool threadgroup, ++ struct cgroup_namespace *ns) + { + int ret = 0; + +- ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb); ++ ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb, ns); + if (ret) + return ret; + +@@ -4905,6 +4912,7 @@ static int cgroup_attach_permissions(str + static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, + bool threadgroup) + { ++ struct cgroup_file_ctx *ctx = of->priv; + struct cgroup *src_cgrp, *dst_cgrp; + struct task_struct *task; + const struct cred *saved_cred; +@@ -4932,7 +4940,8 @@ static ssize_t __cgroup_procs_write(stru + */ + saved_cred = override_creds(of->file->f_cred); + ret = cgroup_attach_permissions(src_cgrp, dst_cgrp, +- of->file->f_path.dentry->d_sb, threadgroup); ++ of->file->f_path.dentry->d_sb, ++ threadgroup, ctx->ns); + revert_creds(saved_cred); + if (ret) + goto out_finish; +@@ -6149,7 +6158,8 @@ static int cgroup_css_set_fork(struct ke + goto err; + + ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb, +- !(kargs->flags & CLONE_THREAD)); ++ !(kargs->flags & CLONE_THREAD), ++ current->nsproxy->cgroup_ns); + if (ret) + goto err; + diff --git a/queue-5.15/cgroup-use-open-time-credentials-for-process-migraton-perm-checks.patch b/queue-5.15/cgroup-use-open-time-credentials-for-process-migraton-perm-checks.patch new file mode 100644 index 00000000000..cca36d44a41 --- /dev/null +++ b/queue-5.15/cgroup-use-open-time-credentials-for-process-migraton-perm-checks.patch @@ -0,0 +1,78 @@ +From 1756d7994ad85c2479af6ae5a9750b92324685af Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Thu, 6 Jan 2022 11:02:28 -1000 +Subject: cgroup: Use open-time credentials for process migraton perm checks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Tejun Heo + +commit 1756d7994ad85c2479af6ae5a9750b92324685af upstream. + +cgroup process migration permission checks are performed at write time as +whether a given operation is allowed or not is dependent on the content of +the write - the PID. This currently uses current's credentials which is a +potential security weakness as it may allow scenarios where a less +privileged process tricks a more privileged one into writing into a fd that +it created. + +This patch makes both cgroup2 and cgroup1 process migration interfaces to +use the credentials saved at the time of open (file->f_cred) instead of +current's. + +Reported-by: "Eric W. Biederman" +Suggested-by: Linus Torvalds +Fixes: 187fe84067bd ("cgroup: require write perm on common ancestor when moving processes on the default hierarchy") +Reviewed-by: Michal Koutný +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman +--- + kernel/cgroup/cgroup-v1.c | 7 ++++--- + kernel/cgroup/cgroup.c | 9 ++++++++- + 2 files changed, 12 insertions(+), 4 deletions(-) + +--- a/kernel/cgroup/cgroup-v1.c ++++ b/kernel/cgroup/cgroup-v1.c +@@ -507,10 +507,11 @@ static ssize_t __cgroup1_procs_write(str + goto out_unlock; + + /* +- * Even if we're attaching all tasks in the thread group, we only +- * need to check permissions on one of them. ++ * Even if we're attaching all tasks in the thread group, we only need ++ * to check permissions on one of them. Check permissions using the ++ * credentials from file open to protect against inherited fd attacks. + */ +- cred = current_cred(); ++ cred = of->file->f_cred; + tcred = get_task_cred(task); + if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && + !uid_eq(cred->euid, tcred->uid) && +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -4892,6 +4892,7 @@ static ssize_t __cgroup_procs_write(stru + { + struct cgroup *src_cgrp, *dst_cgrp; + struct task_struct *task; ++ const struct cred *saved_cred; + ssize_t ret; + bool locked; + +@@ -4909,9 +4910,15 @@ static ssize_t __cgroup_procs_write(stru + src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); + spin_unlock_irq(&css_set_lock); + +- /* process and thread migrations follow same delegation rule */ ++ /* ++ * Process and thread migrations follow same delegation rule. Check ++ * permissions using the credentials from file open to protect against ++ * inherited fd attacks. ++ */ ++ saved_cred = override_creds(of->file->f_cred); + ret = cgroup_attach_permissions(src_cgrp, dst_cgrp, + of->file->f_path.dentry->d_sb, threadgroup); ++ revert_creds(saved_cred); + if (ret) + goto out_finish; + diff --git a/queue-5.15/series b/queue-5.15/series index f58f12e40ae..cd789cdd978 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -32,3 +32,6 @@ net-ena-fix-error-handling-when-calculating-max-io-queues-number.patch md-raid1-fix-missing-bitmap-update-w-o-writemostly-devices.patch edac-i10nm-release-mdev-mbase-when-failing-to-detect-hbm.patch kvm-x86-check-for-rmaps-allocation.patch +cgroup-use-open-time-credentials-for-process-migraton-perm-checks.patch +cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch +cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch