From: Greg Kroah-Hartman Date: Thu, 14 Apr 2022 11:04:04 +0000 (+0200) Subject: 5.4-stable patches X-Git-Tag: v4.19.238~18 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e2ba7d21fb9034727315552f877e34a967c12d1b;p=thirdparty%2Fkernel%2Fstable-queue.git 5.4-stable patches added patches: acpi-processor-idle-check-for-architectural-support-for-lpi.patch cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch cgroup-use-open-time-credentials-for-process-migraton-perm-checks.patch cpuidle-psci-move-the-has_lpi-check-to-the-beginning-of-the-function.patch io_uring-fix-fs-users-overflow.patch selftests-cgroup-make-cg_create-use-0755-for-permission-instead-of-0644.patch selftests-cgroup-test-open-time-cgroup-namespace-usage-for-migration-checks.patch selftests-cgroup-test-open-time-credential-usage-for-migration-checks.patch --- diff --git a/queue-5.4/acpi-processor-idle-check-for-architectural-support-for-lpi.patch b/queue-5.4/acpi-processor-idle-check-for-architectural-support-for-lpi.patch new file mode 100644 index 00000000000..f3e2ea52b00 --- /dev/null +++ b/queue-5.4/acpi-processor-idle-check-for-architectural-support-for-lpi.patch @@ -0,0 +1,68 @@ +From eb087f305919ee8169ad65665610313e74260463 Mon Sep 17 00:00:00 2001 +From: Mario Limonciello +Date: Fri, 25 Feb 2022 13:06:46 -0600 +Subject: ACPI: processor idle: Check for architectural support for LPI + +From: Mario Limonciello + +commit eb087f305919ee8169ad65665610313e74260463 upstream. + +When `osc_pc_lpi_support_confirmed` is set through `_OSC` and `_LPI` is +populated then the cpuidle driver assumes that LPI is fully functional. + +However currently the kernel only provides architectural support for LPI +on ARM. This leads to high power consumption on X86 platforms that +otherwise try to enable LPI. + +So probe whether or not LPI support is implemented before enabling LPI in +the kernel. This is done by overloading `acpi_processor_ffh_lpi_probe` to +check whether it returns `-EOPNOTSUPP`. It also means that all future +implementations of `acpi_processor_ffh_lpi_probe` will need to follow +these semantics as well. + +Reviewed-by: Sudeep Holla +Signed-off-by: Mario Limonciello +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/processor_idle.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +--- a/drivers/acpi/processor_idle.c ++++ b/drivers/acpi/processor_idle.c +@@ -1201,6 +1201,11 @@ static int flatten_lpi_states(struct acp + return 0; + } + ++int __weak acpi_processor_ffh_lpi_probe(unsigned int cpu) ++{ ++ return -EOPNOTSUPP; ++} ++ + static int acpi_processor_get_lpi_info(struct acpi_processor *pr) + { + int ret, i; +@@ -1209,6 +1214,11 @@ static int acpi_processor_get_lpi_info(s + struct acpi_device *d = NULL; + struct acpi_lpi_states_array info[2], *tmp, *prev, *curr; + ++ /* make sure our architecture has support */ ++ ret = acpi_processor_ffh_lpi_probe(pr->id); ++ if (ret == -EOPNOTSUPP) ++ return ret; ++ + if (!osc_pc_lpi_support_confirmed) + return -EOPNOTSUPP; + +@@ -1260,11 +1270,6 @@ static int acpi_processor_get_lpi_info(s + return 0; + } + +-int __weak acpi_processor_ffh_lpi_probe(unsigned int cpu) +-{ +- return -ENODEV; +-} +- + int __weak acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi) + { + return -ENODEV; diff --git a/queue-5.4/cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch b/queue-5.4/cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch new file mode 100644 index 00000000000..132f553e187 --- /dev/null +++ b/queue-5.4/cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch @@ -0,0 +1,269 @@ +From foo@baz Thu Apr 14 12:32:11 PM CEST 2022 +From: Ovidiu Panait +Date: Thu, 14 Apr 2022 11:44:46 +0300 +Subject: cgroup: Allocate cgroup_file_ctx for kernfs_open_file->priv +To: stable@vger.kernel.org +Cc: tj@kernel.org, mkoutny@suse.com +Message-ID: <20220414084450.2728917-3-ovidiu.panait@windriver.com> + +From: Tejun Heo + +commit 0d2b5955b36250a9428c832664f2079cbf723bec upstream. + +of->priv is currently used by each interface file implementation to store +private information. This patch collects the current two private data usages +into struct cgroup_file_ctx which is allocated and freed by the common path. +This allows generic private data which applies to multiple files, which will +be used to in the following patch. + +Note that cgroup_procs iterator is now embedded as procs.iter in the new +cgroup_file_ctx so that it doesn't need to be allocated and freed +separately. + +v2: union dropped from cgroup_file_ctx and the procs iterator is embedded in + cgroup_file_ctx as suggested by Linus. + +v3: Michal pointed out that cgroup1's procs pidlist uses of->priv too. + Converted. Didn't change to embedded allocation as cgroup1 pidlists get + stored for caching. + +Signed-off-by: Tejun Heo +Cc: Linus Torvalds +Reviewed-by: Michal Koutný +[mkoutny: v5.10: modify cgroup.pressure handlers, adjust context] +Signed-off-by: Michal Koutný +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Ovidiu Panait +Signed-off-by: Greg Kroah-Hartman +--- + kernel/cgroup/cgroup-internal.h | 17 ++++++++++++ + kernel/cgroup/cgroup-v1.c | 26 ++++++++++--------- + kernel/cgroup/cgroup.c | 54 +++++++++++++++++++++++++--------------- + 3 files changed, 65 insertions(+), 32 deletions(-) + +--- a/kernel/cgroup/cgroup-internal.h ++++ b/kernel/cgroup/cgroup-internal.h +@@ -65,6 +65,23 @@ static inline struct cgroup_fs_context * + return container_of(kfc, struct cgroup_fs_context, kfc); + } + ++struct cgroup_pidlist; ++ ++struct cgroup_file_ctx { ++ struct { ++ void *trigger; ++ } psi; ++ ++ struct { ++ bool started; ++ struct css_task_iter iter; ++ } procs; ++ ++ struct { ++ struct cgroup_pidlist *pidlist; ++ } procs1; ++}; ++ + /* + * A cgroup can be associated with multiple css_sets as different tasks may + * belong to different cgroups on different hierarchies. In the other +--- a/kernel/cgroup/cgroup-v1.c ++++ b/kernel/cgroup/cgroup-v1.c +@@ -398,6 +398,7 @@ static void *cgroup_pidlist_start(struct + * next pid to display, if any + */ + struct kernfs_open_file *of = s->private; ++ struct cgroup_file_ctx *ctx = of->priv; + struct cgroup *cgrp = seq_css(s)->cgroup; + struct cgroup_pidlist *l; + enum cgroup_filetype type = seq_cft(s)->private; +@@ -407,25 +408,24 @@ static void *cgroup_pidlist_start(struct + mutex_lock(&cgrp->pidlist_mutex); + + /* +- * !NULL @of->priv indicates that this isn't the first start() +- * after open. If the matching pidlist is around, we can use that. +- * Look for it. Note that @of->priv can't be used directly. It +- * could already have been destroyed. ++ * !NULL @ctx->procs1.pidlist indicates that this isn't the first ++ * start() after open. If the matching pidlist is around, we can use ++ * that. Look for it. Note that @ctx->procs1.pidlist can't be used ++ * directly. It could already have been destroyed. + */ +- if (of->priv) +- of->priv = cgroup_pidlist_find(cgrp, type); ++ if (ctx->procs1.pidlist) ++ ctx->procs1.pidlist = cgroup_pidlist_find(cgrp, type); + + /* + * Either this is the first start() after open or the matching + * pidlist has been destroyed inbetween. Create a new one. + */ +- if (!of->priv) { +- ret = pidlist_array_load(cgrp, type, +- (struct cgroup_pidlist **)&of->priv); ++ if (!ctx->procs1.pidlist) { ++ ret = pidlist_array_load(cgrp, type, &ctx->procs1.pidlist); + if (ret) + return ERR_PTR(ret); + } +- l = of->priv; ++ l = ctx->procs1.pidlist; + + if (pid) { + int end = l->length; +@@ -453,7 +453,8 @@ static void *cgroup_pidlist_start(struct + static void cgroup_pidlist_stop(struct seq_file *s, void *v) + { + struct kernfs_open_file *of = s->private; +- struct cgroup_pidlist *l = of->priv; ++ struct cgroup_file_ctx *ctx = of->priv; ++ struct cgroup_pidlist *l = ctx->procs1.pidlist; + + if (l) + mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, +@@ -464,7 +465,8 @@ static void cgroup_pidlist_stop(struct s + static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) + { + struct kernfs_open_file *of = s->private; +- struct cgroup_pidlist *l = of->priv; ++ struct cgroup_file_ctx *ctx = of->priv; ++ struct cgroup_pidlist *l = ctx->procs1.pidlist; + pid_t *p = v; + pid_t *end = l->list + l->length; + /* +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -3648,6 +3648,7 @@ static int cgroup_cpu_pressure_show(stru + static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, enum psi_res res) + { ++ struct cgroup_file_ctx *ctx = of->priv; + struct psi_trigger *new; + struct cgroup *cgrp; + struct psi_group *psi; +@@ -3660,7 +3661,7 @@ static ssize_t cgroup_pressure_write(str + cgroup_kn_unlock(of->kn); + + /* Allow only one trigger per file descriptor */ +- if (of->priv) { ++ if (ctx->psi.trigger) { + cgroup_put(cgrp); + return -EBUSY; + } +@@ -3672,7 +3673,7 @@ static ssize_t cgroup_pressure_write(str + return PTR_ERR(new); + } + +- smp_store_release(&of->priv, new); ++ smp_store_release(&ctx->psi.trigger, new); + cgroup_put(cgrp); + + return nbytes; +@@ -3702,12 +3703,15 @@ static ssize_t cgroup_cpu_pressure_write + static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of, + poll_table *pt) + { +- return psi_trigger_poll(&of->priv, of->file, pt); ++ struct cgroup_file_ctx *ctx = of->priv; ++ return psi_trigger_poll(&ctx->psi.trigger, of->file, pt); + } + + static void cgroup_pressure_release(struct kernfs_open_file *of) + { +- psi_trigger_destroy(of->priv); ++ struct cgroup_file_ctx *ctx = of->priv; ++ ++ psi_trigger_destroy(ctx->psi.trigger); + } + #endif /* CONFIG_PSI */ + +@@ -3748,18 +3752,31 @@ static ssize_t cgroup_freeze_write(struc + static int cgroup_file_open(struct kernfs_open_file *of) + { + struct cftype *cft = of->kn->priv; ++ struct cgroup_file_ctx *ctx; ++ int ret; + +- if (cft->open) +- return cft->open(of); +- return 0; ++ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); ++ if (!ctx) ++ return -ENOMEM; ++ of->priv = ctx; ++ ++ if (!cft->open) ++ return 0; ++ ++ ret = cft->open(of); ++ if (ret) ++ kfree(ctx); ++ return ret; + } + + static void cgroup_file_release(struct kernfs_open_file *of) + { + struct cftype *cft = of->kn->priv; ++ struct cgroup_file_ctx *ctx = of->priv; + + if (cft->release) + cft->release(of); ++ kfree(ctx); + } + + static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, +@@ -4687,21 +4704,21 @@ void css_task_iter_end(struct css_task_i + + static void cgroup_procs_release(struct kernfs_open_file *of) + { +- if (of->priv) { +- css_task_iter_end(of->priv); +- kfree(of->priv); +- } ++ struct cgroup_file_ctx *ctx = of->priv; ++ ++ if (ctx->procs.started) ++ css_task_iter_end(&ctx->procs.iter); + } + + static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) + { + struct kernfs_open_file *of = s->private; +- struct css_task_iter *it = of->priv; ++ struct cgroup_file_ctx *ctx = of->priv; + + if (pos) + (*pos)++; + +- return css_task_iter_next(it); ++ return css_task_iter_next(&ctx->procs.iter); + } + + static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, +@@ -4709,21 +4726,18 @@ static void *__cgroup_procs_start(struct + { + struct kernfs_open_file *of = s->private; + struct cgroup *cgrp = seq_css(s)->cgroup; +- struct css_task_iter *it = of->priv; ++ struct cgroup_file_ctx *ctx = of->priv; ++ struct css_task_iter *it = &ctx->procs.iter; + + /* + * When a seq_file is seeked, it's always traversed sequentially + * from position 0, so we can simply keep iterating on !0 *pos. + */ +- if (!it) { ++ if (!ctx->procs.started) { + if (WARN_ON_ONCE((*pos))) + return ERR_PTR(-EINVAL); +- +- it = kzalloc(sizeof(*it), GFP_KERNEL); +- if (!it) +- return ERR_PTR(-ENOMEM); +- of->priv = it; + css_task_iter_start(&cgrp->self, iter_flags, it); ++ ctx->procs.started = true; + } else if (!(*pos)) { + css_task_iter_end(it); + css_task_iter_start(&cgrp->self, iter_flags, it); diff --git a/queue-5.4/cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch b/queue-5.4/cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch new file mode 100644 index 00000000000..3f2c7645026 --- /dev/null +++ b/queue-5.4/cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch @@ -0,0 +1,157 @@ +From foo@baz Thu Apr 14 12:32:11 PM CEST 2022 +From: Ovidiu Panait +Date: Thu, 14 Apr 2022 11:44:47 +0300 +Subject: cgroup: Use open-time cgroup namespace for process migration perm checks +To: stable@vger.kernel.org +Cc: tj@kernel.org, mkoutny@suse.com +Message-ID: <20220414084450.2728917-4-ovidiu.panait@windriver.com> + +From: Tejun Heo + +commit e57457641613fef0d147ede8bd6a3047df588b95 upstream. + +cgroup process migration permission checks are performed at write time as +whether a given operation is allowed or not is dependent on the content of +the write - the PID. This currently uses current's cgroup namespace which is +a potential security weakness as it may allow scenarios where a less +privileged process tricks a more privileged one into writing into a fd that +it created. + +This patch makes cgroup remember the cgroup namespace at the time of open +and uses it for migration permission checks instad of current's. Note that +this only applies to cgroup2 as cgroup1 doesn't have namespace support. + +This also fixes a use-after-free bug on cgroupns reported in + + https://lore.kernel.org/r/00000000000048c15c05d0083397@google.com + +Note that backporting this fix also requires the preceding patch. + +Reported-by: "Eric W. Biederman" +Suggested-by: Linus Torvalds +Cc: Michal Koutný +Cc: Oleg Nesterov +Reviewed-by: Michal Koutný +Reported-by: syzbot+50f5cf33a284ce738b62@syzkaller.appspotmail.com +Link: https://lore.kernel.org/r/00000000000048c15c05d0083397@google.com +Fixes: 5136f6365ce3 ("cgroup: implement "nsdelegate" mount option") +Signed-off-by: Tejun Heo +[mkoutny: v5.10: duplicate ns check in procs/threads write handler, adjust context] +Signed-off-by: Michal Koutný +Signed-off-by: Greg Kroah-Hartman +[OP: backport to v5.4: drop changes to cgroup_attach_permissions() and +cgroup_css_set_fork(), adjust cgroup_procs_write_permission() calls] +Signed-off-by: Ovidiu Panait +Signed-off-by: Greg Kroah-Hartman +--- + kernel/cgroup/cgroup-internal.h | 2 ++ + kernel/cgroup/cgroup.c | 24 +++++++++++++++++------- + 2 files changed, 19 insertions(+), 7 deletions(-) + +--- a/kernel/cgroup/cgroup-internal.h ++++ b/kernel/cgroup/cgroup-internal.h +@@ -68,6 +68,8 @@ static inline struct cgroup_fs_context * + struct cgroup_pidlist; + + struct cgroup_file_ctx { ++ struct cgroup_namespace *ns; ++ + struct { + void *trigger; + } psi; +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -3758,14 +3758,19 @@ static int cgroup_file_open(struct kernf + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; ++ ++ ctx->ns = current->nsproxy->cgroup_ns; ++ get_cgroup_ns(ctx->ns); + of->priv = ctx; + + if (!cft->open) + return 0; + + ret = cft->open(of); +- if (ret) ++ if (ret) { ++ put_cgroup_ns(ctx->ns); + kfree(ctx); ++ } + return ret; + } + +@@ -3776,13 +3781,14 @@ static void cgroup_file_release(struct k + + if (cft->release) + cft->release(of); ++ put_cgroup_ns(ctx->ns); + kfree(ctx); + } + + static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) + { +- struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; ++ struct cgroup_file_ctx *ctx = of->priv; + struct cgroup *cgrp = of->kn->parent->priv; + struct cftype *cft = of->kn->priv; + struct cgroup_subsys_state *css; +@@ -3796,7 +3802,7 @@ static ssize_t cgroup_file_write(struct + */ + if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) && + !(cft->flags & CFTYPE_NS_DELEGATABLE) && +- ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp) ++ ctx->ns != &init_cgroup_ns && ctx->ns->root_cset->dfl_cgrp == cgrp) + return -EPERM; + + if (cft->write) +@@ -4772,9 +4778,9 @@ static int cgroup_procs_show(struct seq_ + + static int cgroup_procs_write_permission(struct cgroup *src_cgrp, + struct cgroup *dst_cgrp, +- struct super_block *sb) ++ struct super_block *sb, ++ struct cgroup_namespace *ns) + { +- struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; + struct cgroup *com_cgrp = src_cgrp; + struct inode *inode; + int ret; +@@ -4810,6 +4816,7 @@ static int cgroup_procs_write_permission + static ssize_t cgroup_procs_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) + { ++ struct cgroup_file_ctx *ctx = of->priv; + struct cgroup *src_cgrp, *dst_cgrp; + struct task_struct *task; + const struct cred *saved_cred; +@@ -4836,7 +4843,8 @@ static ssize_t cgroup_procs_write(struct + */ + saved_cred = override_creds(of->file->f_cred); + ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, +- of->file->f_path.dentry->d_sb); ++ of->file->f_path.dentry->d_sb, ++ ctx->ns); + revert_creds(saved_cred); + if (ret) + goto out_finish; +@@ -4859,6 +4867,7 @@ static void *cgroup_threads_start(struct + static ssize_t cgroup_threads_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) + { ++ struct cgroup_file_ctx *ctx = of->priv; + struct cgroup *src_cgrp, *dst_cgrp; + struct task_struct *task; + const struct cred *saved_cred; +@@ -4887,7 +4896,8 @@ static ssize_t cgroup_threads_write(stru + */ + saved_cred = override_creds(of->file->f_cred); + ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, +- of->file->f_path.dentry->d_sb); ++ of->file->f_path.dentry->d_sb, ++ ctx->ns); + revert_creds(saved_cred); + if (ret) + goto out_finish; diff --git a/queue-5.4/cgroup-use-open-time-credentials-for-process-migraton-perm-checks.patch b/queue-5.4/cgroup-use-open-time-credentials-for-process-migraton-perm-checks.patch new file mode 100644 index 00000000000..d054ca3512a --- /dev/null +++ b/queue-5.4/cgroup-use-open-time-credentials-for-process-migraton-perm-checks.patch @@ -0,0 +1,105 @@ +From foo@baz Thu Apr 14 12:32:11 PM CEST 2022 +From: Ovidiu Panait +Date: Thu, 14 Apr 2022 11:44:45 +0300 +Subject: cgroup: Use open-time credentials for process migraton perm checks +To: stable@vger.kernel.org +Cc: tj@kernel.org, mkoutny@suse.com +Message-ID: <20220414084450.2728917-2-ovidiu.panait@windriver.com> + +From: Tejun Heo + +commit 1756d7994ad85c2479af6ae5a9750b92324685af upstream. + +cgroup process migration permission checks are performed at write time as +whether a given operation is allowed or not is dependent on the content of +the write - the PID. This currently uses current's credentials which is a +potential security weakness as it may allow scenarios where a less +privileged process tricks a more privileged one into writing into a fd that +it created. + +This patch makes both cgroup2 and cgroup1 process migration interfaces to +use the credentials saved at the time of open (file->f_cred) instead of +current's. + +Reported-by: "Eric W. Biederman" +Suggested-by: Linus Torvalds +Fixes: 187fe84067bd ("cgroup: require write perm on common ancestor when moving processes on the default hierarchy") +Reviewed-by: Michal Koutný +Signed-off-by: Tejun Heo +[OP: backport to 5.4: apply original __cgroup_procs_write() changes to +cgroup_threads_write() and cgroup_procs_write()] +Signed-off-by: Ovidiu Panait +Signed-off-by: Greg Kroah-Hartman +--- + kernel/cgroup/cgroup-v1.c | 7 ++++--- + kernel/cgroup/cgroup.c | 17 ++++++++++++++++- + 2 files changed, 20 insertions(+), 4 deletions(-) + +--- a/kernel/cgroup/cgroup-v1.c ++++ b/kernel/cgroup/cgroup-v1.c +@@ -507,10 +507,11 @@ static ssize_t __cgroup1_procs_write(str + goto out_unlock; + + /* +- * Even if we're attaching all tasks in the thread group, we only +- * need to check permissions on one of them. ++ * Even if we're attaching all tasks in the thread group, we only need ++ * to check permissions on one of them. Check permissions using the ++ * credentials from file open to protect against inherited fd attacks. + */ +- cred = current_cred(); ++ cred = of->file->f_cred; + tcred = get_task_cred(task); + if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && + !uid_eq(cred->euid, tcred->uid) && +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -4798,6 +4798,7 @@ static ssize_t cgroup_procs_write(struct + { + struct cgroup *src_cgrp, *dst_cgrp; + struct task_struct *task; ++ const struct cred *saved_cred; + ssize_t ret; + + dst_cgrp = cgroup_kn_lock_live(of->kn, false); +@@ -4814,8 +4815,15 @@ static ssize_t cgroup_procs_write(struct + src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); + spin_unlock_irq(&css_set_lock); + ++ /* ++ * Process and thread migrations follow same delegation rule. Check ++ * permissions using the credentials from file open to protect against ++ * inherited fd attacks. ++ */ ++ saved_cred = override_creds(of->file->f_cred); + ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, + of->file->f_path.dentry->d_sb); ++ revert_creds(saved_cred); + if (ret) + goto out_finish; + +@@ -4839,6 +4847,7 @@ static ssize_t cgroup_threads_write(stru + { + struct cgroup *src_cgrp, *dst_cgrp; + struct task_struct *task; ++ const struct cred *saved_cred; + ssize_t ret; + + buf = strstrip(buf); +@@ -4857,9 +4866,15 @@ static ssize_t cgroup_threads_write(stru + src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); + spin_unlock_irq(&css_set_lock); + +- /* thread migrations follow the cgroup.procs delegation rule */ ++ /* ++ * Process and thread migrations follow same delegation rule. Check ++ * permissions using the credentials from file open to protect against ++ * inherited fd attacks. ++ */ ++ saved_cred = override_creds(of->file->f_cred); + ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, + of->file->f_path.dentry->d_sb); ++ revert_creds(saved_cred); + if (ret) + goto out_finish; + diff --git a/queue-5.4/cpuidle-psci-move-the-has_lpi-check-to-the-beginning-of-the-function.patch b/queue-5.4/cpuidle-psci-move-the-has_lpi-check-to-the-beginning-of-the-function.patch new file mode 100644 index 00000000000..287e9d97953 --- /dev/null +++ b/queue-5.4/cpuidle-psci-move-the-has_lpi-check-to-the-beginning-of-the-function.patch @@ -0,0 +1,46 @@ +From 01f6c7338ce267959975da65d86ba34f44d54220 Mon Sep 17 00:00:00 2001 +From: Mario Limonciello +Date: Fri, 25 Feb 2022 13:06:45 -0600 +Subject: cpuidle: PSCI: Move the `has_lpi` check to the beginning of the function + +From: Mario Limonciello + +commit 01f6c7338ce267959975da65d86ba34f44d54220 upstream. + +Currently the first thing checked is whether the PCSI cpu_suspend function +has been initialized. + +Another change will be overloading `acpi_processor_ffh_lpi_probe` and +calling it sooner. So make the `has_lpi` check the first thing checked +to prepare for that change. + +Reviewed-by: Sudeep Holla +Signed-off-by: Mario Limonciello +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kernel/cpuidle.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/arm64/kernel/cpuidle.c ++++ b/arch/arm64/kernel/cpuidle.c +@@ -53,6 +53,9 @@ static int psci_acpi_cpu_init_idle(unsig + struct acpi_lpi_state *lpi; + struct acpi_processor *pr = per_cpu(processors, cpu); + ++ if (unlikely(!pr || !pr->flags.has_lpi)) ++ return -EINVAL; ++ + /* + * If the PSCI cpu_suspend function hook has not been initialized + * idle states must not be enabled, so bail out +@@ -60,9 +63,6 @@ static int psci_acpi_cpu_init_idle(unsig + if (!psci_ops.cpu_suspend) + return -EOPNOTSUPP; + +- if (unlikely(!pr || !pr->flags.has_lpi)) +- return -EINVAL; +- + count = pr->power.count - 1; + if (count <= 0) + return -ENODEV; diff --git a/queue-5.4/io_uring-fix-fs-users-overflow.patch b/queue-5.4/io_uring-fix-fs-users-overflow.patch new file mode 100644 index 00000000000..7224d39b8a4 --- /dev/null +++ b/queue-5.4/io_uring-fix-fs-users-overflow.patch @@ -0,0 +1,72 @@ +From asml.silence@gmail.com Thu Apr 14 12:29:47 2022 +From: Pavel Begunkov +Date: Thu, 14 Apr 2022 08:50:50 +0100 +Subject: io_uring: fix fs->users overflow +To: Greg KH + +From: Pavel Begunkov + +There is a bunch of cases where we can grab req->fs but not put it, this +can be used to cause a controllable overflow with further implications. +Release req->fs in the request free path and make sure we zero the field +to be sure we don't do it twice. + +Fixes: cac68d12c531 ("io_uring: grab ->fs as part of async offload") +Reported-by: Bing-Jhong Billy Jheng +Signed-off-by: Pavel Begunkov +Signed-off-by: Greg Kroah-Hartman +--- + fs/io_uring.c | 28 ++++++++++++++++++---------- + 1 file changed, 18 insertions(+), 10 deletions(-) + +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -438,6 +438,22 @@ static struct io_ring_ctx *io_ring_ctx_a + return ctx; + } + ++static void io_req_put_fs(struct io_kiocb *req) ++{ ++ struct fs_struct *fs = req->fs; ++ ++ if (!fs) ++ return; ++ ++ spin_lock(&req->fs->lock); ++ if (--fs->users) ++ fs = NULL; ++ spin_unlock(&req->fs->lock); ++ if (fs) ++ free_fs_struct(fs); ++ req->fs = NULL; ++} ++ + static inline bool __io_sequence_defer(struct io_ring_ctx *ctx, + struct io_kiocb *req) + { +@@ -695,6 +711,7 @@ static void io_free_req_many(struct io_r + + static void __io_free_req(struct io_kiocb *req) + { ++ io_req_put_fs(req); + if (req->file && !(req->flags & REQ_F_FIXED_FILE)) + fput(req->file); + percpu_ref_put(&req->ctx->refs); +@@ -1701,16 +1718,7 @@ static int io_send_recvmsg(struct io_kio + ret = -EINTR; + } + +- if (req->fs) { +- struct fs_struct *fs = req->fs; +- +- spin_lock(&req->fs->lock); +- if (--fs->users) +- fs = NULL; +- spin_unlock(&req->fs->lock); +- if (fs) +- free_fs_struct(fs); +- } ++ io_req_put_fs(req); + io_cqring_add_event(req->ctx, sqe->user_data, ret); + io_put_req(req); + return 0; diff --git a/queue-5.4/selftests-cgroup-make-cg_create-use-0755-for-permission-instead-of-0644.patch b/queue-5.4/selftests-cgroup-make-cg_create-use-0755-for-permission-instead-of-0644.patch new file mode 100644 index 00000000000..2bccf7cd1e5 --- /dev/null +++ b/queue-5.4/selftests-cgroup-make-cg_create-use-0755-for-permission-instead-of-0644.patch @@ -0,0 +1,34 @@ +From foo@baz Thu Apr 14 12:32:11 PM CEST 2022 +From: Ovidiu Panait +Date: Thu, 14 Apr 2022 11:44:48 +0300 +Subject: selftests: cgroup: Make cg_create() use 0755 for permission instead of 0644 +To: stable@vger.kernel.org +Cc: tj@kernel.org, mkoutny@suse.com +Message-ID: <20220414084450.2728917-5-ovidiu.panait@windriver.com> + +From: Tejun Heo + +commit b09c2baa56347ae65795350dfcc633dedb1c2970 upstream. + +0644 is an odd perm to create a cgroup which is a directory. Use the regular +0755 instead. This is necessary for euid switching test case. + +Reviewed-by: Michal Koutný +Signed-off-by: Tejun Heo +Signed-off-by: Ovidiu Panait +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/cgroup/cgroup_util.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/testing/selftests/cgroup/cgroup_util.c ++++ b/tools/testing/selftests/cgroup/cgroup_util.c +@@ -202,7 +202,7 @@ int cg_find_unified_root(char *root, siz + + int cg_create(const char *cgroup) + { +- return mkdir(cgroup, 0644); ++ return mkdir(cgroup, 0755); + } + + int cg_wait_for_proc_count(const char *cgroup, int count) diff --git a/queue-5.4/selftests-cgroup-test-open-time-cgroup-namespace-usage-for-migration-checks.patch b/queue-5.4/selftests-cgroup-test-open-time-cgroup-namespace-usage-for-migration-checks.patch new file mode 100644 index 00000000000..212c6eb803b --- /dev/null +++ b/queue-5.4/selftests-cgroup-test-open-time-cgroup-namespace-usage-for-migration-checks.patch @@ -0,0 +1,148 @@ +From foo@baz Thu Apr 14 12:32:11 PM CEST 2022 +From: Ovidiu Panait +Date: Thu, 14 Apr 2022 11:44:50 +0300 +Subject: selftests: cgroup: Test open-time cgroup namespace usage for migration checks +To: stable@vger.kernel.org +Cc: tj@kernel.org, mkoutny@suse.com +Message-ID: <20220414084450.2728917-7-ovidiu.panait@windriver.com> + +From: Tejun Heo + +commit bf35a7879f1dfb0d050fe779168bcf25c7de66f5 upstream. + +When a task is writing to an fd opened by a different task, the perm check +should use the cgroup namespace of the latter task. Add a test for it. + +Tested-by: Michal Koutný +Signed-off-by: Tejun Heo +[OP: backport to v5.4: adjust context, add wait.h and fcntl.h includes] +Signed-off-by: Ovidiu Panait +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/cgroup/test_core.c | 99 +++++++++++++++++++++++++++++ + 1 file changed, 99 insertions(+) + +--- a/tools/testing/selftests/cgroup/test_core.c ++++ b/tools/testing/selftests/cgroup/test_core.c +@@ -1,8 +1,13 @@ + /* SPDX-License-Identifier: GPL-2.0 */ + ++#define _GNU_SOURCE + #include ++#include + #include ++#include + #include ++#include ++#include + #include + #include + +@@ -421,6 +426,99 @@ cleanup: + return ret; + } + ++struct lesser_ns_open_thread_arg { ++ const char *path; ++ int fd; ++ int err; ++}; ++ ++static int lesser_ns_open_thread_fn(void *arg) ++{ ++ struct lesser_ns_open_thread_arg *targ = arg; ++ ++ targ->fd = open(targ->path, O_RDWR); ++ targ->err = errno; ++ return 0; ++} ++ ++/* ++ * cgroup migration permission check should be performed based on the cgroup ++ * namespace at the time of open instead of write. ++ */ ++static int test_cgcore_lesser_ns_open(const char *root) ++{ ++ static char stack[65536]; ++ const uid_t test_euid = 65534; /* usually nobody, any !root is fine */ ++ int ret = KSFT_FAIL; ++ char *cg_test_a = NULL, *cg_test_b = NULL; ++ char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL; ++ int cg_test_b_procs_fd = -1; ++ struct lesser_ns_open_thread_arg targ = { .fd = -1 }; ++ pid_t pid; ++ int status; ++ ++ cg_test_a = cg_name(root, "cg_test_a"); ++ cg_test_b = cg_name(root, "cg_test_b"); ++ ++ if (!cg_test_a || !cg_test_b) ++ goto cleanup; ++ ++ cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs"); ++ cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs"); ++ ++ if (!cg_test_a_procs || !cg_test_b_procs) ++ goto cleanup; ++ ++ if (cg_create(cg_test_a) || cg_create(cg_test_b)) ++ goto cleanup; ++ ++ if (cg_enter_current(cg_test_b)) ++ goto cleanup; ++ ++ if (chown(cg_test_a_procs, test_euid, -1) || ++ chown(cg_test_b_procs, test_euid, -1)) ++ goto cleanup; ++ ++ targ.path = cg_test_b_procs; ++ pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack), ++ CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD, ++ &targ); ++ if (pid < 0) ++ goto cleanup; ++ ++ if (waitpid(pid, &status, 0) < 0) ++ goto cleanup; ++ ++ if (!WIFEXITED(status)) ++ goto cleanup; ++ ++ cg_test_b_procs_fd = targ.fd; ++ if (cg_test_b_procs_fd < 0) ++ goto cleanup; ++ ++ if (cg_enter_current(cg_test_a)) ++ goto cleanup; ++ ++ if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT) ++ goto cleanup; ++ ++ ret = KSFT_PASS; ++ ++cleanup: ++ cg_enter_current(root); ++ if (cg_test_b_procs_fd >= 0) ++ close(cg_test_b_procs_fd); ++ if (cg_test_b) ++ cg_destroy(cg_test_b); ++ if (cg_test_a) ++ cg_destroy(cg_test_a); ++ free(cg_test_b_procs); ++ free(cg_test_a_procs); ++ free(cg_test_b); ++ free(cg_test_a); ++ return ret; ++} ++ + #define T(x) { x, #x } + struct corecg_test { + int (*fn)(const char *root); +@@ -434,6 +532,7 @@ struct corecg_test { + T(test_cgcore_invalid_domain), + T(test_cgcore_populated), + T(test_cgcore_lesser_euid_open), ++ T(test_cgcore_lesser_ns_open), + }; + #undef T + diff --git a/queue-5.4/selftests-cgroup-test-open-time-credential-usage-for-migration-checks.patch b/queue-5.4/selftests-cgroup-test-open-time-credential-usage-for-migration-checks.patch new file mode 100644 index 00000000000..8030bf1eab9 --- /dev/null +++ b/queue-5.4/selftests-cgroup-test-open-time-credential-usage-for-migration-checks.patch @@ -0,0 +1,108 @@ +From foo@baz Thu Apr 14 12:32:11 PM CEST 2022 +From: Ovidiu Panait +Date: Thu, 14 Apr 2022 11:44:49 +0300 +Subject: selftests: cgroup: Test open-time credential usage for migration checks +To: stable@vger.kernel.org +Cc: tj@kernel.org, mkoutny@suse.com +Message-ID: <20220414084450.2728917-6-ovidiu.panait@windriver.com> + +From: Tejun Heo + +commit 613e040e4dc285367bff0f8f75ea59839bc10947 upstream. + +When a task is writing to an fd opened by a different task, the perm check +should use the credentials of the latter task. Add a test for it. + +Tested-by: Michal Koutný +Signed-off-by: Tejun Heo +[OP: backport to v5.4: adjust context] +Signed-off-by: Ovidiu Panait +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/cgroup/test_core.c | 68 +++++++++++++++++++++++++++++ + 1 file changed, 68 insertions(+) + +--- a/tools/testing/selftests/cgroup/test_core.c ++++ b/tools/testing/selftests/cgroup/test_core.c +@@ -354,6 +354,73 @@ cleanup: + return ret; + } + ++/* ++ * cgroup migration permission check should be performed based on the ++ * credentials at the time of open instead of write. ++ */ ++static int test_cgcore_lesser_euid_open(const char *root) ++{ ++ const uid_t test_euid = 65534; /* usually nobody, any !root is fine */ ++ int ret = KSFT_FAIL; ++ char *cg_test_a = NULL, *cg_test_b = NULL; ++ char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL; ++ int cg_test_b_procs_fd = -1; ++ uid_t saved_uid; ++ ++ cg_test_a = cg_name(root, "cg_test_a"); ++ cg_test_b = cg_name(root, "cg_test_b"); ++ ++ if (!cg_test_a || !cg_test_b) ++ goto cleanup; ++ ++ cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs"); ++ cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs"); ++ ++ if (!cg_test_a_procs || !cg_test_b_procs) ++ goto cleanup; ++ ++ if (cg_create(cg_test_a) || cg_create(cg_test_b)) ++ goto cleanup; ++ ++ if (cg_enter_current(cg_test_a)) ++ goto cleanup; ++ ++ if (chown(cg_test_a_procs, test_euid, -1) || ++ chown(cg_test_b_procs, test_euid, -1)) ++ goto cleanup; ++ ++ saved_uid = geteuid(); ++ if (seteuid(test_euid)) ++ goto cleanup; ++ ++ cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR); ++ ++ if (seteuid(saved_uid)) ++ goto cleanup; ++ ++ if (cg_test_b_procs_fd < 0) ++ goto cleanup; ++ ++ if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES) ++ goto cleanup; ++ ++ ret = KSFT_PASS; ++ ++cleanup: ++ cg_enter_current(root); ++ if (cg_test_b_procs_fd >= 0) ++ close(cg_test_b_procs_fd); ++ if (cg_test_b) ++ cg_destroy(cg_test_b); ++ if (cg_test_a) ++ cg_destroy(cg_test_a); ++ free(cg_test_b_procs); ++ free(cg_test_a_procs); ++ free(cg_test_b); ++ free(cg_test_a); ++ return ret; ++} ++ + #define T(x) { x, #x } + struct corecg_test { + int (*fn)(const char *root); +@@ -366,6 +433,7 @@ struct corecg_test { + T(test_cgcore_parent_becomes_threaded), + T(test_cgcore_invalid_domain), + T(test_cgcore_populated), ++ T(test_cgcore_lesser_euid_open), + }; + #undef T + diff --git a/queue-5.4/series b/queue-5.4/series index 855bc233c27..dc146541239 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -464,3 +464,12 @@ arm64-module-remove-noload-from-linker-script.patch mm-sparsemem-fix-mem_section-will-never-be-null-gcc-12-warning.patch drm-amdkfd-add-missing-void-argument-to-function-kgd2kfd_init.patch drm-amdkfd-fix-wstrict-prototypes-from-amdgpu_amdkfd_gfx_10_0_get_functions.patch +io_uring-fix-fs-users-overflow.patch +cgroup-use-open-time-credentials-for-process-migraton-perm-checks.patch +cgroup-allocate-cgroup_file_ctx-for-kernfs_open_file-priv.patch +cgroup-use-open-time-cgroup-namespace-for-process-migration-perm-checks.patch +selftests-cgroup-make-cg_create-use-0755-for-permission-instead-of-0644.patch +selftests-cgroup-test-open-time-credential-usage-for-migration-checks.patch +selftests-cgroup-test-open-time-cgroup-namespace-usage-for-migration-checks.patch +cpuidle-psci-move-the-has_lpi-check-to-the-beginning-of-the-function.patch +acpi-processor-idle-check-for-architectural-support-for-lpi.patch