--- /dev/null
+From foo@baz Thu Apr 14 12:33:33 PM CEST 2022
+From: Ovidiu Panait <ovidiu.panait@windriver.com>
+Date: Thu, 14 Apr 2022 12:06:56 +0300
+Subject: cgroup: Allocate cgroup_file_ctx for kernfs_open_file->priv
+To: stable@vger.kernel.org
+Cc: tj@kernel.org, mkoutny@suse.com
+Message-ID: <20220414090700.2729576-3-ovidiu.panait@windriver.com>
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 0d2b5955b36250a9428c832664f2079cbf723bec upstream.
+
+of->priv is currently used by each interface file implementation to store
+private information. This patch collects the current two private data usages
+into struct cgroup_file_ctx which is allocated and freed by the common path.
+This allows generic private data which applies to multiple files, which will
+be used to in the following patch.
+
+Note that cgroup_procs iterator is now embedded as procs.iter in the new
+cgroup_file_ctx so that it doesn't need to be allocated and freed
+separately.
+
+v2: union dropped from cgroup_file_ctx and the procs iterator is embedded in
+ cgroup_file_ctx as suggested by Linus.
+
+v3: Michal pointed out that cgroup1's procs pidlist uses of->priv too.
+ Converted. Didn't change to embedded allocation as cgroup1 pidlists get
+ stored for caching.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Reviewed-by: Michal Koutný <mkoutny@suse.com>
+[mkoutny: v5.10: modify cgroup.pressure handlers, adjust context]
+Signed-off-by: Michal Koutný <mkoutny@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[OP: backport to v4.19: drop changes to cgroup_pressure_*() functions]
+Signed-off-by: Ovidiu Panait <ovidiu.panait@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cgroup-internal.h | 17 ++++++++++++++++
+ kernel/cgroup/cgroup-v1.c | 26 +++++++++++++-----------
+ kernel/cgroup/cgroup.c | 42 ++++++++++++++++++++++++----------------
+ 3 files changed, 57 insertions(+), 28 deletions(-)
+
+--- a/kernel/cgroup/cgroup-internal.h
++++ b/kernel/cgroup/cgroup-internal.h
+@@ -34,6 +34,23 @@ extern char trace_cgroup_path[TRACE_CGRO
+ } \
+ } while (0)
+
++struct cgroup_pidlist;
++
++struct cgroup_file_ctx {
++ struct {
++ void *trigger;
++ } psi;
++
++ struct {
++ bool started;
++ struct css_task_iter iter;
++ } procs;
++
++ struct {
++ struct cgroup_pidlist *pidlist;
++ } procs1;
++};
++
+ /*
+ * A cgroup can be associated with multiple css_sets as different tasks may
+ * belong to different cgroups on different hierarchies. In the other
+--- a/kernel/cgroup/cgroup-v1.c
++++ b/kernel/cgroup/cgroup-v1.c
+@@ -426,6 +426,7 @@ static void *cgroup_pidlist_start(struct
+ * next pid to display, if any
+ */
+ struct kernfs_open_file *of = s->private;
++ struct cgroup_file_ctx *ctx = of->priv;
+ struct cgroup *cgrp = seq_css(s)->cgroup;
+ struct cgroup_pidlist *l;
+ enum cgroup_filetype type = seq_cft(s)->private;
+@@ -435,25 +436,24 @@ static void *cgroup_pidlist_start(struct
+ mutex_lock(&cgrp->pidlist_mutex);
+
+ /*
+- * !NULL @of->priv indicates that this isn't the first start()
+- * after open. If the matching pidlist is around, we can use that.
+- * Look for it. Note that @of->priv can't be used directly. It
+- * could already have been destroyed.
++ * !NULL @ctx->procs1.pidlist indicates that this isn't the first
++ * start() after open. If the matching pidlist is around, we can use
++ * that. Look for it. Note that @ctx->procs1.pidlist can't be used
++ * directly. It could already have been destroyed.
+ */
+- if (of->priv)
+- of->priv = cgroup_pidlist_find(cgrp, type);
++ if (ctx->procs1.pidlist)
++ ctx->procs1.pidlist = cgroup_pidlist_find(cgrp, type);
+
+ /*
+ * Either this is the first start() after open or the matching
+ * pidlist has been destroyed inbetween. Create a new one.
+ */
+- if (!of->priv) {
+- ret = pidlist_array_load(cgrp, type,
+- (struct cgroup_pidlist **)&of->priv);
++ if (!ctx->procs1.pidlist) {
++ ret = pidlist_array_load(cgrp, type, &ctx->procs1.pidlist);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+- l = of->priv;
++ l = ctx->procs1.pidlist;
+
+ if (pid) {
+ int end = l->length;
+@@ -481,7 +481,8 @@ static void *cgroup_pidlist_start(struct
+ static void cgroup_pidlist_stop(struct seq_file *s, void *v)
+ {
+ struct kernfs_open_file *of = s->private;
+- struct cgroup_pidlist *l = of->priv;
++ struct cgroup_file_ctx *ctx = of->priv;
++ struct cgroup_pidlist *l = ctx->procs1.pidlist;
+
+ if (l)
+ mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork,
+@@ -492,7 +493,8 @@ static void cgroup_pidlist_stop(struct s
+ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
+ {
+ struct kernfs_open_file *of = s->private;
+- struct cgroup_pidlist *l = of->priv;
++ struct cgroup_file_ctx *ctx = of->priv;
++ struct cgroup_pidlist *l = ctx->procs1.pidlist;
+ pid_t *p = v;
+ pid_t *end = l->list + l->length;
+ /*
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -3451,18 +3451,31 @@ static int cpu_stat_show(struct seq_file
+ static int cgroup_file_open(struct kernfs_open_file *of)
+ {
+ struct cftype *cft = of->kn->priv;
++ struct cgroup_file_ctx *ctx;
++ int ret;
+
+- if (cft->open)
+- return cft->open(of);
+- return 0;
++ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
++ if (!ctx)
++ return -ENOMEM;
++ of->priv = ctx;
++
++ if (!cft->open)
++ return 0;
++
++ ret = cft->open(of);
++ if (ret)
++ kfree(ctx);
++ return ret;
+ }
+
+ static void cgroup_file_release(struct kernfs_open_file *of)
+ {
+ struct cftype *cft = of->kn->priv;
++ struct cgroup_file_ctx *ctx = of->priv;
+
+ if (cft->release)
+ cft->release(of);
++ kfree(ctx);
+ }
+
+ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
+@@ -4376,21 +4389,21 @@ void css_task_iter_end(struct css_task_i
+
+ static void cgroup_procs_release(struct kernfs_open_file *of)
+ {
+- if (of->priv) {
+- css_task_iter_end(of->priv);
+- kfree(of->priv);
+- }
++ struct cgroup_file_ctx *ctx = of->priv;
++
++ if (ctx->procs.started)
++ css_task_iter_end(&ctx->procs.iter);
+ }
+
+ static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
+ {
+ struct kernfs_open_file *of = s->private;
+- struct css_task_iter *it = of->priv;
++ struct cgroup_file_ctx *ctx = of->priv;
+
+ if (pos)
+ (*pos)++;
+
+- return css_task_iter_next(it);
++ return css_task_iter_next(&ctx->procs.iter);
+ }
+
+ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
+@@ -4398,21 +4411,18 @@ static void *__cgroup_procs_start(struct
+ {
+ struct kernfs_open_file *of = s->private;
+ struct cgroup *cgrp = seq_css(s)->cgroup;
+- struct css_task_iter *it = of->priv;
++ struct cgroup_file_ctx *ctx = of->priv;
++ struct css_task_iter *it = &ctx->procs.iter;
+
+ /*
+ * When a seq_file is seeked, it's always traversed sequentially
+ * from position 0, so we can simply keep iterating on !0 *pos.
+ */
+- if (!it) {
++ if (!ctx->procs.started) {
+ if (WARN_ON_ONCE((*pos)))
+ return ERR_PTR(-EINVAL);
+-
+- it = kzalloc(sizeof(*it), GFP_KERNEL);
+- if (!it)
+- return ERR_PTR(-ENOMEM);
+- of->priv = it;
+ css_task_iter_start(&cgrp->self, iter_flags, it);
++ ctx->procs.started = true;
+ } else if (!(*pos)) {
+ css_task_iter_end(it);
+ css_task_iter_start(&cgrp->self, iter_flags, it);
--- /dev/null
+From foo@baz Thu Apr 14 12:33:33 PM CEST 2022
+From: Ovidiu Panait <ovidiu.panait@windriver.com>
+Date: Thu, 14 Apr 2022 12:06:57 +0300
+Subject: cgroup: Use open-time cgroup namespace for process migration perm checks
+To: stable@vger.kernel.org
+Cc: tj@kernel.org, mkoutny@suse.com
+Message-ID: <20220414090700.2729576-4-ovidiu.panait@windriver.com>
+
+From: Tejun Heo <tj@kernel.org>
+
+commit e57457641613fef0d147ede8bd6a3047df588b95 upstream.
+
+cgroup process migration permission checks are performed at write time as
+whether a given operation is allowed or not is dependent on the content of
+the write - the PID. This currently uses current's cgroup namespace which is
+a potential security weakness as it may allow scenarios where a less
+privileged process tricks a more privileged one into writing into a fd that
+it created.
+
+This patch makes cgroup remember the cgroup namespace at the time of open
+and uses it for migration permission checks instad of current's. Note that
+this only applies to cgroup2 as cgroup1 doesn't have namespace support.
+
+This also fixes a use-after-free bug on cgroupns reported in
+
+ https://lore.kernel.org/r/00000000000048c15c05d0083397@google.com
+
+Note that backporting this fix also requires the preceding patch.
+
+Reported-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Suggested-by: Linus Torvalds <torvalds@linuxfoundation.org>
+Cc: Michal Koutný <mkoutny@suse.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Reviewed-by: Michal Koutný <mkoutny@suse.com>
+Reported-by: syzbot+50f5cf33a284ce738b62@syzkaller.appspotmail.com
+Link: https://lore.kernel.org/r/00000000000048c15c05d0083397@google.com
+Fixes: 5136f6365ce3 ("cgroup: implement "nsdelegate" mount option")
+Signed-off-by: Tejun Heo <tj@kernel.org>
+[mkoutny: v5.10: duplicate ns check in procs/threads write handler, adjust context]
+Signed-off-by: Michal Koutný <mkoutny@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[OP: backport to v4.19: drop changes to cgroup_attach_permissions() and
+cgroup_css_set_fork(), adjust cgroup_procs_write_permission() calls]
+Signed-off-by: Ovidiu Panait <ovidiu.panait@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cgroup-internal.h | 2 ++
+ kernel/cgroup/cgroup.c | 24 +++++++++++++++++-------
+ 2 files changed, 19 insertions(+), 7 deletions(-)
+
+--- a/kernel/cgroup/cgroup-internal.h
++++ b/kernel/cgroup/cgroup-internal.h
+@@ -37,6 +37,8 @@ extern char trace_cgroup_path[TRACE_CGRO
+ struct cgroup_pidlist;
+
+ struct cgroup_file_ctx {
++ struct cgroup_namespace *ns;
++
+ struct {
+ void *trigger;
+ } psi;
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -3457,14 +3457,19 @@ static int cgroup_file_open(struct kernf
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
++
++ ctx->ns = current->nsproxy->cgroup_ns;
++ get_cgroup_ns(ctx->ns);
+ of->priv = ctx;
+
+ if (!cft->open)
+ return 0;
+
+ ret = cft->open(of);
+- if (ret)
++ if (ret) {
++ put_cgroup_ns(ctx->ns);
+ kfree(ctx);
++ }
+ return ret;
+ }
+
+@@ -3475,13 +3480,14 @@ static void cgroup_file_release(struct k
+
+ if (cft->release)
+ cft->release(of);
++ put_cgroup_ns(ctx->ns);
+ kfree(ctx);
+ }
+
+ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, loff_t off)
+ {
+- struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
++ struct cgroup_file_ctx *ctx = of->priv;
+ struct cgroup *cgrp = of->kn->parent->priv;
+ struct cftype *cft = of->kn->priv;
+ struct cgroup_subsys_state *css;
+@@ -3495,7 +3501,7 @@ static ssize_t cgroup_file_write(struct
+ */
+ if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) &&
+ !(cft->flags & CFTYPE_NS_DELEGATABLE) &&
+- ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp)
++ ctx->ns != &init_cgroup_ns && ctx->ns->root_cset->dfl_cgrp == cgrp)
+ return -EPERM;
+
+ if (cft->write)
+@@ -4457,9 +4463,9 @@ static int cgroup_procs_show(struct seq_
+
+ static int cgroup_procs_write_permission(struct cgroup *src_cgrp,
+ struct cgroup *dst_cgrp,
+- struct super_block *sb)
++ struct super_block *sb,
++ struct cgroup_namespace *ns)
+ {
+- struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
+ struct cgroup *com_cgrp = src_cgrp;
+ struct inode *inode;
+ int ret;
+@@ -4495,6 +4501,7 @@ static int cgroup_procs_write_permission
+ static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+ {
++ struct cgroup_file_ctx *ctx = of->priv;
+ struct cgroup *src_cgrp, *dst_cgrp;
+ struct task_struct *task;
+ const struct cred *saved_cred;
+@@ -4521,7 +4528,8 @@ static ssize_t cgroup_procs_write(struct
+ */
+ saved_cred = override_creds(of->file->f_cred);
+ ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp,
+- of->file->f_path.dentry->d_sb);
++ of->file->f_path.dentry->d_sb,
++ ctx->ns);
+ revert_creds(saved_cred);
+ if (ret)
+ goto out_finish;
+@@ -4544,6 +4552,7 @@ static void *cgroup_threads_start(struct
+ static ssize_t cgroup_threads_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+ {
++ struct cgroup_file_ctx *ctx = of->priv;
+ struct cgroup *src_cgrp, *dst_cgrp;
+ struct task_struct *task;
+ const struct cred *saved_cred;
+@@ -4572,7 +4581,8 @@ static ssize_t cgroup_threads_write(stru
+ */
+ saved_cred = override_creds(of->file->f_cred);
+ ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp,
+- of->file->f_path.dentry->d_sb);
++ of->file->f_path.dentry->d_sb,
++ ctx->ns);
+ revert_creds(saved_cred);
+ if (ret)
+ goto out_finish;
--- /dev/null
+From foo@baz Thu Apr 14 12:33:33 PM CEST 2022
+From: Ovidiu Panait <ovidiu.panait@windriver.com>
+Date: Thu, 14 Apr 2022 12:06:55 +0300
+Subject: cgroup: Use open-time credentials for process migraton perm checks
+To: stable@vger.kernel.org
+Cc: tj@kernel.org, mkoutny@suse.com
+Message-ID: <20220414090700.2729576-2-ovidiu.panait@windriver.com>
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 1756d7994ad85c2479af6ae5a9750b92324685af upstream.
+
+cgroup process migration permission checks are performed at write time as
+whether a given operation is allowed or not is dependent on the content of
+the write - the PID. This currently uses current's credentials which is a
+potential security weakness as it may allow scenarios where a less
+privileged process tricks a more privileged one into writing into a fd that
+it created.
+
+This patch makes both cgroup2 and cgroup1 process migration interfaces to
+use the credentials saved at the time of open (file->f_cred) instead of
+current's.
+
+Reported-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Suggested-by: Linus Torvalds <torvalds@linuxfoundation.org>
+Fixes: 187fe84067bd ("cgroup: require write perm on common ancestor when moving processes on the default hierarchy")
+Reviewed-by: Michal Koutný <mkoutny@suse.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+[OP: backport to v4.19: apply original __cgroup_procs_write() changes to
+cgroup_threads_write() and cgroup_procs_write()]
+Signed-off-by: Ovidiu Panait <ovidiu.panait@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cgroup-v1.c | 7 ++++---
+ kernel/cgroup/cgroup.c | 17 ++++++++++++++++-
+ 2 files changed, 20 insertions(+), 4 deletions(-)
+
+--- a/kernel/cgroup/cgroup-v1.c
++++ b/kernel/cgroup/cgroup-v1.c
+@@ -535,10 +535,11 @@ static ssize_t __cgroup1_procs_write(str
+ goto out_unlock;
+
+ /*
+- * Even if we're attaching all tasks in the thread group, we only
+- * need to check permissions on one of them.
++ * Even if we're attaching all tasks in the thread group, we only need
++ * to check permissions on one of them. Check permissions using the
++ * credentials from file open to protect against inherited fd attacks.
+ */
+- cred = current_cred();
++ cred = of->file->f_cred;
+ tcred = get_task_cred(task);
+ if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
+ !uid_eq(cred->euid, tcred->uid) &&
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -4487,6 +4487,7 @@ static ssize_t cgroup_procs_write(struct
+ {
+ struct cgroup *src_cgrp, *dst_cgrp;
+ struct task_struct *task;
++ const struct cred *saved_cred;
+ ssize_t ret;
+
+ dst_cgrp = cgroup_kn_lock_live(of->kn, false);
+@@ -4503,8 +4504,15 @@ static ssize_t cgroup_procs_write(struct
+ src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
+ spin_unlock_irq(&css_set_lock);
+
++ /*
++ * Process and thread migrations follow same delegation rule. Check
++ * permissions using the credentials from file open to protect against
++ * inherited fd attacks.
++ */
++ saved_cred = override_creds(of->file->f_cred);
+ ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp,
+ of->file->f_path.dentry->d_sb);
++ revert_creds(saved_cred);
+ if (ret)
+ goto out_finish;
+
+@@ -4528,6 +4536,7 @@ static ssize_t cgroup_threads_write(stru
+ {
+ struct cgroup *src_cgrp, *dst_cgrp;
+ struct task_struct *task;
++ const struct cred *saved_cred;
+ ssize_t ret;
+
+ buf = strstrip(buf);
+@@ -4546,9 +4555,15 @@ static ssize_t cgroup_threads_write(stru
+ src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
+ spin_unlock_irq(&css_set_lock);
+
+- /* thread migrations follow the cgroup.procs delegation rule */
++ /*
++ * Process and thread migrations follow same delegation rule. Check
++ * permissions using the credentials from file open to protect against
++ * inherited fd attacks.
++ */
++ saved_cred = override_creds(of->file->f_cred);
+ ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp,
+ of->file->f_path.dentry->d_sb);
++ revert_creds(saved_cred);
+ if (ret)
+ goto out_finish;
+
--- /dev/null
+From foo@baz Thu Apr 14 12:33:33 PM CEST 2022
+From: Ovidiu Panait <ovidiu.panait@windriver.com>
+Date: Thu, 14 Apr 2022 12:06:58 +0300
+Subject: selftests: cgroup: Make cg_create() use 0755 for permission instead of 0644
+To: stable@vger.kernel.org
+Cc: tj@kernel.org, mkoutny@suse.com
+Message-ID: <20220414090700.2729576-5-ovidiu.panait@windriver.com>
+
+From: Tejun Heo <tj@kernel.org>
+
+commit b09c2baa56347ae65795350dfcc633dedb1c2970 upstream.
+
+0644 is an odd perm to create a cgroup which is a directory. Use the regular
+0755 instead. This is necessary for euid switching test case.
+
+Reviewed-by: Michal Koutný <mkoutny@suse.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+[OP: backport to 4.19: adjust context]
+Signed-off-by: Ovidiu Panait <ovidiu.panait@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/cgroup/cgroup_util.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/testing/selftests/cgroup/cgroup_util.c
++++ b/tools/testing/selftests/cgroup/cgroup_util.c
+@@ -192,7 +192,7 @@ int cg_find_unified_root(char *root, siz
+
+ int cg_create(const char *cgroup)
+ {
+- return mkdir(cgroup, 0644);
++ return mkdir(cgroup, 0755);
+ }
+
+ static int cg_killall(const char *cgroup)
--- /dev/null
+From foo@baz Thu Apr 14 12:33:33 PM CEST 2022
+From: Ovidiu Panait <ovidiu.panait@windriver.com>
+Date: Thu, 14 Apr 2022 12:07:00 +0300
+Subject: selftests: cgroup: Test open-time cgroup namespace usage for migration checks
+To: stable@vger.kernel.org
+Cc: tj@kernel.org, mkoutny@suse.com
+Message-ID: <20220414090700.2729576-7-ovidiu.panait@windriver.com>
+
+From: Tejun Heo <tj@kernel.org>
+
+commit bf35a7879f1dfb0d050fe779168bcf25c7de66f5 upstream.
+
+When a task is writing to an fd opened by a different task, the perm check
+should use the cgroup namespace of the latter task. Add a test for it.
+
+Tested-by: Michal Koutný <mkoutny@suse.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+[OP: backport to v4.19: adjust context, add wait.h and fcntl.h includes]
+Signed-off-by: Ovidiu Panait <ovidiu.panait@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/cgroup/test_core.c | 99 +++++++++++++++++++++++++++++
+ 1 file changed, 99 insertions(+)
+
+--- a/tools/testing/selftests/cgroup/test_core.c
++++ b/tools/testing/selftests/cgroup/test_core.c
+@@ -1,8 +1,13 @@
+ /* SPDX-License-Identifier: GPL-2.0 */
+
++#define _GNU_SOURCE
+ #include <linux/limits.h>
++#include <linux/sched.h>
+ #include <sys/types.h>
++#include <sys/wait.h>
+ #include <unistd.h>
++#include <fcntl.h>
++#include <sched.h>
+ #include <stdio.h>
+ #include <errno.h>
+
+@@ -421,6 +426,99 @@ cleanup:
+ return ret;
+ }
+
++struct lesser_ns_open_thread_arg {
++ const char *path;
++ int fd;
++ int err;
++};
++
++static int lesser_ns_open_thread_fn(void *arg)
++{
++ struct lesser_ns_open_thread_arg *targ = arg;
++
++ targ->fd = open(targ->path, O_RDWR);
++ targ->err = errno;
++ return 0;
++}
++
++/*
++ * cgroup migration permission check should be performed based on the cgroup
++ * namespace at the time of open instead of write.
++ */
++static int test_cgcore_lesser_ns_open(const char *root)
++{
++ static char stack[65536];
++ const uid_t test_euid = 65534; /* usually nobody, any !root is fine */
++ int ret = KSFT_FAIL;
++ char *cg_test_a = NULL, *cg_test_b = NULL;
++ char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
++ int cg_test_b_procs_fd = -1;
++ struct lesser_ns_open_thread_arg targ = { .fd = -1 };
++ pid_t pid;
++ int status;
++
++ cg_test_a = cg_name(root, "cg_test_a");
++ cg_test_b = cg_name(root, "cg_test_b");
++
++ if (!cg_test_a || !cg_test_b)
++ goto cleanup;
++
++ cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
++ cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
++
++ if (!cg_test_a_procs || !cg_test_b_procs)
++ goto cleanup;
++
++ if (cg_create(cg_test_a) || cg_create(cg_test_b))
++ goto cleanup;
++
++ if (cg_enter_current(cg_test_b))
++ goto cleanup;
++
++ if (chown(cg_test_a_procs, test_euid, -1) ||
++ chown(cg_test_b_procs, test_euid, -1))
++ goto cleanup;
++
++ targ.path = cg_test_b_procs;
++ pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack),
++ CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD,
++ &targ);
++ if (pid < 0)
++ goto cleanup;
++
++ if (waitpid(pid, &status, 0) < 0)
++ goto cleanup;
++
++ if (!WIFEXITED(status))
++ goto cleanup;
++
++ cg_test_b_procs_fd = targ.fd;
++ if (cg_test_b_procs_fd < 0)
++ goto cleanup;
++
++ if (cg_enter_current(cg_test_a))
++ goto cleanup;
++
++ if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT)
++ goto cleanup;
++
++ ret = KSFT_PASS;
++
++cleanup:
++ cg_enter_current(root);
++ if (cg_test_b_procs_fd >= 0)
++ close(cg_test_b_procs_fd);
++ if (cg_test_b)
++ cg_destroy(cg_test_b);
++ if (cg_test_a)
++ cg_destroy(cg_test_a);
++ free(cg_test_b_procs);
++ free(cg_test_a_procs);
++ free(cg_test_b);
++ free(cg_test_a);
++ return ret;
++}
++
+ #define T(x) { x, #x }
+ struct corecg_test {
+ int (*fn)(const char *root);
+@@ -434,6 +532,7 @@ struct corecg_test {
+ T(test_cgcore_invalid_domain),
+ T(test_cgcore_populated),
+ T(test_cgcore_lesser_euid_open),
++ T(test_cgcore_lesser_ns_open),
+ };
+ #undef T
+
--- /dev/null
+From foo@baz Thu Apr 14 12:33:33 PM CEST 2022
+From: Ovidiu Panait <ovidiu.panait@windriver.com>
+Date: Thu, 14 Apr 2022 12:06:59 +0300
+Subject: selftests: cgroup: Test open-time credential usage for migration checks
+To: stable@vger.kernel.org
+Cc: tj@kernel.org, mkoutny@suse.com
+Message-ID: <20220414090700.2729576-6-ovidiu.panait@windriver.com>
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 613e040e4dc285367bff0f8f75ea59839bc10947 upstream.
+
+When a task is writing to an fd opened by a different task, the perm check
+should use the credentials of the latter task. Add a test for it.
+
+Tested-by: Michal Koutný <mkoutny@suse.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+[OP: backport to v4.19: adjust context]
+Signed-off-by: Ovidiu Panait <ovidiu.panait@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/cgroup/test_core.c | 68 +++++++++++++++++++++++++++++
+ 1 file changed, 68 insertions(+)
+
+--- a/tools/testing/selftests/cgroup/test_core.c
++++ b/tools/testing/selftests/cgroup/test_core.c
+@@ -354,6 +354,73 @@ cleanup:
+ return ret;
+ }
+
++/*
++ * cgroup migration permission check should be performed based on the
++ * credentials at the time of open instead of write.
++ */
++static int test_cgcore_lesser_euid_open(const char *root)
++{
++ const uid_t test_euid = 65534; /* usually nobody, any !root is fine */
++ int ret = KSFT_FAIL;
++ char *cg_test_a = NULL, *cg_test_b = NULL;
++ char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
++ int cg_test_b_procs_fd = -1;
++ uid_t saved_uid;
++
++ cg_test_a = cg_name(root, "cg_test_a");
++ cg_test_b = cg_name(root, "cg_test_b");
++
++ if (!cg_test_a || !cg_test_b)
++ goto cleanup;
++
++ cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
++ cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
++
++ if (!cg_test_a_procs || !cg_test_b_procs)
++ goto cleanup;
++
++ if (cg_create(cg_test_a) || cg_create(cg_test_b))
++ goto cleanup;
++
++ if (cg_enter_current(cg_test_a))
++ goto cleanup;
++
++ if (chown(cg_test_a_procs, test_euid, -1) ||
++ chown(cg_test_b_procs, test_euid, -1))
++ goto cleanup;
++
++ saved_uid = geteuid();
++ if (seteuid(test_euid))
++ goto cleanup;
++
++ cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR);
++
++ if (seteuid(saved_uid))
++ goto cleanup;
++
++ if (cg_test_b_procs_fd < 0)
++ goto cleanup;
++
++ if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES)
++ goto cleanup;
++
++ ret = KSFT_PASS;
++
++cleanup:
++ cg_enter_current(root);
++ if (cg_test_b_procs_fd >= 0)
++ close(cg_test_b_procs_fd);
++ if (cg_test_b)
++ cg_destroy(cg_test_b);
++ if (cg_test_a)
++ cg_destroy(cg_test_a);
++ free(cg_test_b_procs);
++ free(cg_test_a_procs);
++ free(cg_test_b);
++ free(cg_test_a);
++ return ret;
++}
++
+ #define T(x) { x, #x }
+ struct corecg_test {
+ int (*fn)(const char *root);
+@@ -366,6 +433,7 @@ struct corecg_test {
+ T(test_cgcore_parent_becomes_threaded),
+ T(test_cgcore_invalid_domain),
+ T(test_cgcore_populated),
++ T(test_cgcore_lesser_euid_open),
+ };
+ #undef T
+
mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch
arm64-module-remove-noload-from-linker-script.patch
mm-sparsemem-fix-mem_section-will-never-be-null-gcc-12-warning.patch
+cgroup-use-open-time-credentials-for-process-migraton-perm-checks.patch
+cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch
+cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch
+selftests-cgroup-make-cg_create-use-0755-for-permission-instead-of-0644.patch
+selftests-cgroup-test-open-time-credential-usage-for-migration-checks.patch
+selftests-cgroup-test-open-time-cgroup-namespace-usage-for-migration-checks.patch