From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 8 Jan 2022 14:43:31 +0000 (+0100)
Subject: 5.15-stable patches
X-Git-Tag: v4.4.299~19
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a761d61ccea577c22200bb852ebf9a802a919d2a;p=thirdparty%2Fkernel%2Fstable-queue.git

5.15-stable patches

added patches:
	cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch
	cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch
	cgroup-use-open-time-credentials-for-process-migraton-perm-checks.patch
---

diff --git a/queue-5.15/cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch b/queue-5.15/cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch
new file mode 100644
index 00000000000..533b143299e
--- /dev/null
+++ b/queue-5.15/cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch
@@ -0,0 +1,257 @@
+From 0d2b5955b36250a9428c832664f2079cbf723bec Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Thu, 6 Jan 2022 11:02:29 -1000
+Subject: cgroup: Allocate cgroup_file_ctx for kernfs_open_file->priv
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 0d2b5955b36250a9428c832664f2079cbf723bec upstream.
+
+of->priv is currently used by each interface file implementation to store
+private information. This patch collects the current two private data usages
+into struct cgroup_file_ctx which is allocated and freed by the common path.
+This allows generic private data which applies to multiple files, which will
+be used to in the following patch.
+
+Note that cgroup_procs iterator is now embedded as procs.iter in the new
+cgroup_file_ctx so that it doesn't need to be allocated and freed
+separately.
+
+v2: union dropped from cgroup_file_ctx and the procs iterator is embedded in
+    cgroup_file_ctx as suggested by Linus.
+
+v3: Michal pointed out that cgroup1's procs pidlist uses of->priv too.
+    Converted. Didn't change to embedded allocation as cgroup1 pidlists get
+    stored for caching.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Reviewed-by: Michal KoutnÃ½ <mkoutny@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cgroup-internal.h |   17 ++++++++++++
+ kernel/cgroup/cgroup-v1.c       |   26 ++++++++++---------
+ kernel/cgroup/cgroup.c          |   53 +++++++++++++++++++++++++---------------
+ 3 files changed, 65 insertions(+), 31 deletions(-)
+
+--- a/kernel/cgroup/cgroup-internal.h
++++ b/kernel/cgroup/cgroup-internal.h
+@@ -65,6 +65,23 @@ static inline struct cgroup_fs_context *
+ 	return container_of(kfc, struct cgroup_fs_context, kfc);
+ }
+ 
++struct cgroup_pidlist;
++
++struct cgroup_file_ctx {
++	struct {
++		void			*trigger;
++	} psi;
++
++	struct {
++		bool			started;
++		struct css_task_iter	iter;
++	} procs;
++
++	struct {
++		struct cgroup_pidlist	*pidlist;
++	} procs1;
++};
++
+ /*
+  * A cgroup can be associated with multiple css_sets as different tasks may
+  * belong to different cgroups on different hierarchies.  In the other
+--- a/kernel/cgroup/cgroup-v1.c
++++ b/kernel/cgroup/cgroup-v1.c
+@@ -397,6 +397,7 @@ static void *cgroup_pidlist_start(struct
+ 	 * next pid to display, if any
+ 	 */
+ 	struct kernfs_open_file *of = s->private;
++	struct cgroup_file_ctx *ctx = of->priv;
+ 	struct cgroup *cgrp = seq_css(s)->cgroup;
+ 	struct cgroup_pidlist *l;
+ 	enum cgroup_filetype type = seq_cft(s)->private;
+@@ -406,25 +407,24 @@ static void *cgroup_pidlist_start(struct
+ 	mutex_lock(&cgrp->pidlist_mutex);
+ 
+ 	/*
+-	 * !NULL @of->priv indicates that this isn't the first start()
+-	 * after open.  If the matching pidlist is around, we can use that.
+-	 * Look for it.  Note that @of->priv can't be used directly.  It
+-	 * could already have been destroyed.
++	 * !NULL @ctx->procs1.pidlist indicates that this isn't the first
++	 * start() after open. If the matching pidlist is around, we can use
++	 * that. Look for it. Note that @ctx->procs1.pidlist can't be used
++	 * directly. It could already have been destroyed.
+ 	 */
+-	if (of->priv)
+-		of->priv = cgroup_pidlist_find(cgrp, type);
++	if (ctx->procs1.pidlist)
++		ctx->procs1.pidlist = cgroup_pidlist_find(cgrp, type);
+ 
+ 	/*
+ 	 * Either this is the first start() after open or the matching
+ 	 * pidlist has been destroyed inbetween.  Create a new one.
+ 	 */
+-	if (!of->priv) {
+-		ret = pidlist_array_load(cgrp, type,
+-					 (struct cgroup_pidlist **)&of->priv);
++	if (!ctx->procs1.pidlist) {
++		ret = pidlist_array_load(cgrp, type, &ctx->procs1.pidlist);
+ 		if (ret)
+ 			return ERR_PTR(ret);
+ 	}
+-	l = of->priv;
++	l = ctx->procs1.pidlist;
+ 
+ 	if (pid) {
+ 		int end = l->length;
+@@ -452,7 +452,8 @@ static void *cgroup_pidlist_start(struct
+ static void cgroup_pidlist_stop(struct seq_file *s, void *v)
+ {
+ 	struct kernfs_open_file *of = s->private;
+-	struct cgroup_pidlist *l = of->priv;
++	struct cgroup_file_ctx *ctx = of->priv;
++	struct cgroup_pidlist *l = ctx->procs1.pidlist;
+ 
+ 	if (l)
+ 		mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork,
+@@ -463,7 +464,8 @@ static void cgroup_pidlist_stop(struct s
+ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
+ {
+ 	struct kernfs_open_file *of = s->private;
+-	struct cgroup_pidlist *l = of->priv;
++	struct cgroup_file_ctx *ctx = of->priv;
++	struct cgroup_pidlist *l = ctx->procs1.pidlist;
+ 	pid_t *p = v;
+ 	pid_t *end = l->list + l->length;
+ 	/*
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -3630,6 +3630,7 @@ static int cgroup_cpu_pressure_show(stru
+ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
+ 					  size_t nbytes, enum psi_res res)
+ {
++	struct cgroup_file_ctx *ctx = of->priv;
+ 	struct psi_trigger *new;
+ 	struct cgroup *cgrp;
+ 	struct psi_group *psi;
+@@ -3648,7 +3649,7 @@ static ssize_t cgroup_pressure_write(str
+ 		return PTR_ERR(new);
+ 	}
+ 
+-	psi_trigger_replace(&of->priv, new);
++	psi_trigger_replace(&ctx->psi.trigger, new);
+ 
+ 	cgroup_put(cgrp);
+ 
+@@ -3679,12 +3680,16 @@ static ssize_t cgroup_cpu_pressure_write
+ static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
+ 					  poll_table *pt)
+ {
+-	return psi_trigger_poll(&of->priv, of->file, pt);
++	struct cgroup_file_ctx *ctx = of->priv;
++
++	return psi_trigger_poll(&ctx->psi.trigger, of->file, pt);
+ }
+ 
+ static void cgroup_pressure_release(struct kernfs_open_file *of)
+ {
+-	psi_trigger_replace(&of->priv, NULL);
++	struct cgroup_file_ctx *ctx = of->priv;
++
++	psi_trigger_replace(&ctx->psi.trigger, NULL);
+ }
+ 
+ bool cgroup_psi_enabled(void)
+@@ -3811,18 +3816,31 @@ static ssize_t cgroup_kill_write(struct
+ static int cgroup_file_open(struct kernfs_open_file *of)
+ {
+ 	struct cftype *cft = of_cft(of);
++	struct cgroup_file_ctx *ctx;
++	int ret;
+ 
+-	if (cft->open)
+-		return cft->open(of);
+-	return 0;
++	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
++	if (!ctx)
++		return -ENOMEM;
++	of->priv = ctx;
++
++	if (!cft->open)
++		return 0;
++
++	ret = cft->open(of);
++	if (ret)
++		kfree(ctx);
++	return ret;
+ }
+ 
+ static void cgroup_file_release(struct kernfs_open_file *of)
+ {
+ 	struct cftype *cft = of_cft(of);
++	struct cgroup_file_ctx *ctx = of->priv;
+ 
+ 	if (cft->release)
+ 		cft->release(of);
++	kfree(ctx);
+ }
+ 
+ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
+@@ -4751,21 +4769,21 @@ void css_task_iter_end(struct css_task_i
+ 
+ static void cgroup_procs_release(struct kernfs_open_file *of)
+ {
+-	if (of->priv) {
+-		css_task_iter_end(of->priv);
+-		kfree(of->priv);
+-	}
++	struct cgroup_file_ctx *ctx = of->priv;
++
++	if (ctx->procs.started)
++		css_task_iter_end(&ctx->procs.iter);
+ }
+ 
+ static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
+ {
+ 	struct kernfs_open_file *of = s->private;
+-	struct css_task_iter *it = of->priv;
++	struct cgroup_file_ctx *ctx = of->priv;
+ 
+ 	if (pos)
+ 		(*pos)++;
+ 
+-	return css_task_iter_next(it);
++	return css_task_iter_next(&ctx->procs.iter);
+ }
+ 
+ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
+@@ -4773,21 +4791,18 @@ static void *__cgroup_procs_start(struct
+ {
+ 	struct kernfs_open_file *of = s->private;
+ 	struct cgroup *cgrp = seq_css(s)->cgroup;
+-	struct css_task_iter *it = of->priv;
++	struct cgroup_file_ctx *ctx = of->priv;
++	struct css_task_iter *it = &ctx->procs.iter;
+ 
+ 	/*
+ 	 * When a seq_file is seeked, it's always traversed sequentially
+ 	 * from position 0, so we can simply keep iterating on !0 *pos.
+ 	 */
+-	if (!it) {
++	if (!ctx->procs.started) {
+ 		if (WARN_ON_ONCE((*pos)))
+ 			return ERR_PTR(-EINVAL);
+-
+-		it = kzalloc(sizeof(*it), GFP_KERNEL);
+-		if (!it)
+-			return ERR_PTR(-ENOMEM);
+-		of->priv = it;
+ 		css_task_iter_start(&cgrp->self, iter_flags, it);
++		ctx->procs.started = true;
+ 	} else if (!(*pos)) {
+ 		css_task_iter_end(it);
+ 		css_task_iter_start(&cgrp->self, iter_flags, it);
diff --git a/queue-5.15/cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch b/queue-5.15/cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch
new file mode 100644
index 00000000000..a4fde394ec2
--- /dev/null
+++ b/queue-5.15/cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch
@@ -0,0 +1,158 @@
+From e57457641613fef0d147ede8bd6a3047df588b95 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Thu, 6 Jan 2022 11:02:29 -1000
+Subject: cgroup: Use open-time cgroup namespace for process migration perm checks
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Tejun Heo <tj@kernel.org>
+
+commit e57457641613fef0d147ede8bd6a3047df588b95 upstream.
+
+cgroup process migration permission checks are performed at write time as
+whether a given operation is allowed or not is dependent on the content of
+the write - the PID. This currently uses current's cgroup namespace which is
+a potential security weakness as it may allow scenarios where a less
+privileged process tricks a more privileged one into writing into a fd that
+it created.
+
+This patch makes cgroup remember the cgroup namespace at the time of open
+and uses it for migration permission checks instad of current's. Note that
+this only applies to cgroup2 as cgroup1 doesn't have namespace support.
+
+This also fixes a use-after-free bug on cgroupns reported in
+
+ https://lore.kernel.org/r/00000000000048c15c05d0083397@google.com
+
+Note that backporting this fix also requires the preceding patch.
+
+Reported-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Suggested-by: Linus Torvalds <torvalds@linuxfoundation.org>
+Cc: Michal KoutnÃ½ <mkoutny@suse.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Reviewed-by: Michal KoutnÃ½ <mkoutny@suse.com>
+Reported-by: syzbot+50f5cf33a284ce738b62@syzkaller.appspotmail.com
+Link: https://lore.kernel.org/r/00000000000048c15c05d0083397@google.com
+Fixes: 5136f6365ce3 ("cgroup: implement "nsdelegate" mount option")
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cgroup-internal.h |    2 ++
+ kernel/cgroup/cgroup.c          |   28 +++++++++++++++++++---------
+ 2 files changed, 21 insertions(+), 9 deletions(-)
+
+--- a/kernel/cgroup/cgroup-internal.h
++++ b/kernel/cgroup/cgroup-internal.h
+@@ -68,6 +68,8 @@ static inline struct cgroup_fs_context *
+ struct cgroup_pidlist;
+ 
+ struct cgroup_file_ctx {
++	struct cgroup_namespace	*ns;
++
+ 	struct {
+ 		void			*trigger;
+ 	} psi;
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -3822,14 +3822,19 @@ static int cgroup_file_open(struct kernf
+ 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ 	if (!ctx)
+ 		return -ENOMEM;
++
++	ctx->ns = current->nsproxy->cgroup_ns;
++	get_cgroup_ns(ctx->ns);
+ 	of->priv = ctx;
+ 
+ 	if (!cft->open)
+ 		return 0;
+ 
+ 	ret = cft->open(of);
+-	if (ret)
++	if (ret) {
++		put_cgroup_ns(ctx->ns);
+ 		kfree(ctx);
++	}
+ 	return ret;
+ }
+ 
+@@ -3840,13 +3845,14 @@ static void cgroup_file_release(struct k
+ 
+ 	if (cft->release)
+ 		cft->release(of);
++	put_cgroup_ns(ctx->ns);
+ 	kfree(ctx);
+ }
+ 
+ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
+ 				 size_t nbytes, loff_t off)
+ {
+-	struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
++	struct cgroup_file_ctx *ctx = of->priv;
+ 	struct cgroup *cgrp = of->kn->parent->priv;
+ 	struct cftype *cft = of_cft(of);
+ 	struct cgroup_subsys_state *css;
+@@ -3863,7 +3869,7 @@ static ssize_t cgroup_file_write(struct
+ 	 */
+ 	if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) &&
+ 	    !(cft->flags & CFTYPE_NS_DELEGATABLE) &&
+-	    ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp)
++	    ctx->ns != &init_cgroup_ns && ctx->ns->root_cset->dfl_cgrp == cgrp)
+ 		return -EPERM;
+ 
+ 	if (cft->write)
+@@ -4853,9 +4859,9 @@ static int cgroup_may_write(const struct
+ 
+ static int cgroup_procs_write_permission(struct cgroup *src_cgrp,
+ 					 struct cgroup *dst_cgrp,
+-					 struct super_block *sb)
++					 struct super_block *sb,
++					 struct cgroup_namespace *ns)
+ {
+-	struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
+ 	struct cgroup *com_cgrp = src_cgrp;
+ 	int ret;
+ 
+@@ -4884,11 +4890,12 @@ static int cgroup_procs_write_permission
+ 
+ static int cgroup_attach_permissions(struct cgroup *src_cgrp,
+ 				     struct cgroup *dst_cgrp,
+-				     struct super_block *sb, bool threadgroup)
++				     struct super_block *sb, bool threadgroup,
++				     struct cgroup_namespace *ns)
+ {
+ 	int ret = 0;
+ 
+-	ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb);
++	ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb, ns);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -4905,6 +4912,7 @@ static int cgroup_attach_permissions(str
+ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
+ 				    bool threadgroup)
+ {
++	struct cgroup_file_ctx *ctx = of->priv;
+ 	struct cgroup *src_cgrp, *dst_cgrp;
+ 	struct task_struct *task;
+ 	const struct cred *saved_cred;
+@@ -4932,7 +4940,8 @@ static ssize_t __cgroup_procs_write(stru
+ 	 */
+ 	saved_cred = override_creds(of->file->f_cred);
+ 	ret = cgroup_attach_permissions(src_cgrp, dst_cgrp,
+-					of->file->f_path.dentry->d_sb, threadgroup);
++					of->file->f_path.dentry->d_sb,
++					threadgroup, ctx->ns);
+ 	revert_creds(saved_cred);
+ 	if (ret)
+ 		goto out_finish;
+@@ -6149,7 +6158,8 @@ static int cgroup_css_set_fork(struct ke
+ 		goto err;
+ 
+ 	ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb,
+-					!(kargs->flags & CLONE_THREAD));
++					!(kargs->flags & CLONE_THREAD),
++					current->nsproxy->cgroup_ns);
+ 	if (ret)
+ 		goto err;
+ 
diff --git a/queue-5.15/cgroup-use-open-time-credentials-for-process-migraton-perm-checks.patch b/queue-5.15/cgroup-use-open-time-credentials-for-process-migraton-perm-checks.patch
new file mode 100644
index 00000000000..cca36d44a41
--- /dev/null
+++ b/queue-5.15/cgroup-use-open-time-credentials-for-process-migraton-perm-checks.patch
@@ -0,0 +1,78 @@
+From 1756d7994ad85c2479af6ae5a9750b92324685af Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Thu, 6 Jan 2022 11:02:28 -1000
+Subject: cgroup: Use open-time credentials for process migraton perm checks
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 1756d7994ad85c2479af6ae5a9750b92324685af upstream.
+
+cgroup process migration permission checks are performed at write time as
+whether a given operation is allowed or not is dependent on the content of
+the write - the PID. This currently uses current's credentials which is a
+potential security weakness as it may allow scenarios where a less
+privileged process tricks a more privileged one into writing into a fd that
+it created.
+
+This patch makes both cgroup2 and cgroup1 process migration interfaces to
+use the credentials saved at the time of open (file->f_cred) instead of
+current's.
+
+Reported-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Suggested-by: Linus Torvalds <torvalds@linuxfoundation.org>
+Fixes: 187fe84067bd ("cgroup: require write perm on common ancestor when moving processes on the default hierarchy")
+Reviewed-by: Michal KoutnÃ½ <mkoutny@suse.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cgroup-v1.c |    7 ++++---
+ kernel/cgroup/cgroup.c    |    9 ++++++++-
+ 2 files changed, 12 insertions(+), 4 deletions(-)
+
+--- a/kernel/cgroup/cgroup-v1.c
++++ b/kernel/cgroup/cgroup-v1.c
+@@ -507,10 +507,11 @@ static ssize_t __cgroup1_procs_write(str
+ 		goto out_unlock;
+ 
+ 	/*
+-	 * Even if we're attaching all tasks in the thread group, we only
+-	 * need to check permissions on one of them.
++	 * Even if we're attaching all tasks in the thread group, we only need
++	 * to check permissions on one of them. Check permissions using the
++	 * credentials from file open to protect against inherited fd attacks.
+ 	 */
+-	cred = current_cred();
++	cred = of->file->f_cred;
+ 	tcred = get_task_cred(task);
+ 	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
+ 	    !uid_eq(cred->euid, tcred->uid) &&
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -4892,6 +4892,7 @@ static ssize_t __cgroup_procs_write(stru
+ {
+ 	struct cgroup *src_cgrp, *dst_cgrp;
+ 	struct task_struct *task;
++	const struct cred *saved_cred;
+ 	ssize_t ret;
+ 	bool locked;
+ 
+@@ -4909,9 +4910,15 @@ static ssize_t __cgroup_procs_write(stru
+ 	src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
+ 	spin_unlock_irq(&css_set_lock);
+ 
+-	/* process and thread migrations follow same delegation rule */
++	/*
++	 * Process and thread migrations follow same delegation rule. Check
++	 * permissions using the credentials from file open to protect against
++	 * inherited fd attacks.
++	 */
++	saved_cred = override_creds(of->file->f_cred);
+ 	ret = cgroup_attach_permissions(src_cgrp, dst_cgrp,
+ 					of->file->f_path.dentry->d_sb, threadgroup);
++	revert_creds(saved_cred);
+ 	if (ret)
+ 		goto out_finish;
+ 
diff --git a/queue-5.15/series b/queue-5.15/series
index f58f12e40ae..cd789cdd978 100644
--- a/queue-5.15/series
+++ b/queue-5.15/series
@@ -32,3 +32,6 @@ net-ena-fix-error-handling-when-calculating-max-io-queues-number.patch
 md-raid1-fix-missing-bitmap-update-w-o-writemostly-devices.patch
 edac-i10nm-release-mdev-mbase-when-failing-to-detect-hbm.patch
 kvm-x86-check-for-rmaps-allocation.patch
+cgroup-use-open-time-credentials-for-process-migraton-perm-checks.patch
+cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch
+cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch