--- /dev/null
+From eee87853794187f6adbe19533ed79c8b44b36a91 Mon Sep 17 00:00:00 2001
+From: Waiman Long <longman@redhat.com>
+Date: Tue, 11 Apr 2023 09:35:59 -0400
+Subject: cgroup/cpuset: Add cpuset_can_fork() and cpuset_cancel_fork() methods
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Waiman Long <longman@redhat.com>
+
+commit eee87853794187f6adbe19533ed79c8b44b36a91 upstream.
+
+In the case of CLONE_INTO_CGROUP, not all cpusets are ready to accept
+new tasks. It is too late to check that in cpuset_fork(). So we need
+to add the cpuset_can_fork() and cpuset_cancel_fork() methods to
+pre-check it before we can allow attachment to a different cpuset.
+
+We also need to set the attach_in_progress flag to alert other code
+that a new task is going to be added to the cpuset.
+
+Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups")
+Suggested-by: Michal Koutný <mkoutny@suse.com>
+Signed-off-by: Waiman Long <longman@redhat.com>
+Cc: stable@vger.kernel.org # v5.7+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cpuset.c | 97 +++++++++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 86 insertions(+), 11 deletions(-)
+
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -2453,6 +2453,20 @@ static int fmeter_getrate(struct fmeter
+
+ static struct cpuset *cpuset_attach_old_cs;
+
++/*
++ * Check to see if a cpuset can accept a new task
++ * For v1, cpus_allowed and mems_allowed can't be empty.
++ * For v2, effective_cpus can't be empty.
++ * Note that in v1, effective_cpus = cpus_allowed.
++ */
++static int cpuset_can_attach_check(struct cpuset *cs)
++{
++ if (cpumask_empty(cs->effective_cpus) ||
++ (!is_in_v2_mode() && nodes_empty(cs->mems_allowed)))
++ return -ENOSPC;
++ return 0;
++}
++
+ /* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */
+ static int cpuset_can_attach(struct cgroup_taskset *tset)
+ {
+@@ -2467,16 +2481,9 @@ static int cpuset_can_attach(struct cgro
+
+ percpu_down_write(&cpuset_rwsem);
+
+- /* allow moving tasks into an empty cpuset if on default hierarchy */
+- ret = -ENOSPC;
+- if (!is_in_v2_mode() &&
+- (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
+- goto out_unlock;
+-
+- /*
+- * Task cannot be moved to a cpuset with empty effective cpus.
+- */
+- if (cpumask_empty(cs->effective_cpus))
++ /* Check to see if task is allowed in the cpuset */
++ ret = cpuset_can_attach_check(cs);
++ if (ret)
+ goto out_unlock;
+
+ cgroup_taskset_for_each(task, css, tset) {
+@@ -2493,7 +2500,6 @@ static int cpuset_can_attach(struct cgro
+ * changes which zero cpus/mems_allowed.
+ */
+ cs->attach_in_progress++;
+- ret = 0;
+ out_unlock:
+ percpu_up_write(&cpuset_rwsem);
+ return ret;
+@@ -3265,6 +3271,68 @@ static void cpuset_bind(struct cgroup_su
+ }
+
+ /*
++ * In case the child is cloned into a cpuset different from its parent,
++ * additional checks are done to see if the move is allowed.
++ */
++static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
++{
++ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
++ bool same_cs;
++ int ret;
++
++ rcu_read_lock();
++ same_cs = (cs == task_cs(current));
++ rcu_read_unlock();
++
++ if (same_cs)
++ return 0;
++
++ lockdep_assert_held(&cgroup_mutex);
++ percpu_down_write(&cpuset_rwsem);
++
++ /* Check to see if task is allowed in the cpuset */
++ ret = cpuset_can_attach_check(cs);
++ if (ret)
++ goto out_unlock;
++
++ ret = task_can_attach(task, cs->effective_cpus);
++ if (ret)
++ goto out_unlock;
++
++ ret = security_task_setscheduler(task);
++ if (ret)
++ goto out_unlock;
++
++ /*
++ * Mark attach is in progress. This makes validate_change() fail
++ * changes which zero cpus/mems_allowed.
++ */
++ cs->attach_in_progress++;
++out_unlock:
++ percpu_up_write(&cpuset_rwsem);
++ return ret;
++}
++
++static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset)
++{
++ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
++ bool same_cs;
++
++ rcu_read_lock();
++ same_cs = (cs == task_cs(current));
++ rcu_read_unlock();
++
++ if (same_cs)
++ return;
++
++ percpu_down_write(&cpuset_rwsem);
++ cs->attach_in_progress--;
++ if (!cs->attach_in_progress)
++ wake_up(&cpuset_attach_wq);
++ percpu_up_write(&cpuset_rwsem);
++}
++
++/*
+ * Make sure the new task conform to the current state of its parent,
+ * which could have been changed by cpuset just after it inherits the
+ * state from the parent and before it sits on the cgroup's task list.
+@@ -3292,6 +3360,11 @@ static void cpuset_fork(struct task_stru
+ percpu_down_write(&cpuset_rwsem);
+ guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+ cpuset_attach_task(cs, task);
++
++ cs->attach_in_progress--;
++ if (!cs->attach_in_progress)
++ wake_up(&cpuset_attach_wq);
++
+ percpu_up_write(&cpuset_rwsem);
+ }
+
+@@ -3305,6 +3378,8 @@ struct cgroup_subsys cpuset_cgrp_subsys
+ .attach = cpuset_attach,
+ .post_attach = cpuset_post_attach,
+ .bind = cpuset_bind,
++ .can_fork = cpuset_can_fork,
++ .cancel_fork = cpuset_cancel_fork,
+ .fork = cpuset_fork,
+ .legacy_cftypes = legacy_files,
+ .dfl_cftypes = dfl_files,
--- /dev/null
+From 292fd843de26c551856e66faf134512c52dd78b4 Mon Sep 17 00:00:00 2001
+From: Waiman Long <longman@redhat.com>
+Date: Fri, 17 Mar 2023 11:15:05 -0400
+Subject: cgroup/cpuset: Fix partition root's cpuset.cpus update bug
+
+From: Waiman Long <longman@redhat.com>
+
+commit 292fd843de26c551856e66faf134512c52dd78b4 upstream.
+
+It was found that commit 7a2127e66a00 ("cpuset: Call
+set_cpus_allowed_ptr() with appropriate mask for task") introduced a bug
+that corrupted "cpuset.cpus" of a partition root when it was updated.
+
+It is because the tmp->new_cpus field of the passed tmp parameter
+of update_parent_subparts_cpumask() should not be used at all as
+it contains important cpumask data that should not be overwritten.
+Fix it by using tmp->addmask instead.
+
+Also update update_cpumask() to make sure that trialcs->cpu_allowed
+will not be corrupted until it is no longer needed.
+
+Fixes: 7a2127e66a00 ("cpuset: Call set_cpus_allowed_ptr() with appropriate mask for task")
+Signed-off-by: Waiman Long <longman@redhat.com>
+Cc: stable@vger.kernel.org # v6.2+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cpuset.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -1513,7 +1513,7 @@ static int update_parent_subparts_cpumas
+ spin_unlock_irq(&callback_lock);
+
+ if (adding || deleting)
+- update_tasks_cpumask(parent, tmp->new_cpus);
++ update_tasks_cpumask(parent, tmp->addmask);
+
+ /*
+ * Set or clear CS_SCHED_LOAD_BALANCE when partcmd_update, if necessary.
+@@ -1770,10 +1770,13 @@ static int update_cpumask(struct cpuset
+ /*
+ * Use the cpumasks in trialcs for tmpmasks when they are pointers
+ * to allocated cpumasks.
++ *
++ * Note that update_parent_subparts_cpumask() uses only addmask &
++ * delmask, but not new_cpus.
+ */
+ tmp.addmask = trialcs->subparts_cpus;
+ tmp.delmask = trialcs->effective_cpus;
+- tmp.new_cpus = trialcs->cpus_allowed;
++ tmp.new_cpus = NULL;
+ #endif
+
+ retval = validate_change(cs, trialcs);
+@@ -1838,6 +1841,11 @@ static int update_cpumask(struct cpuset
+ }
+ spin_unlock_irq(&callback_lock);
+
++#ifdef CONFIG_CPUMASK_OFFSTACK
++ /* Now trialcs->cpus_allowed is available */
++ tmp.new_cpus = trialcs->cpus_allowed;
++#endif
++
+ /* effective_cpus will be updated here */
+ update_cpumasks_hier(cs, &tmp, false);
+
--- /dev/null
+From 42a11bf5c5436e91b040aeb04063be1710bb9f9c Mon Sep 17 00:00:00 2001
+From: Waiman Long <longman@redhat.com>
+Date: Tue, 11 Apr 2023 09:35:58 -0400
+Subject: cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly
+
+From: Waiman Long <longman@redhat.com>
+
+commit 42a11bf5c5436e91b040aeb04063be1710bb9f9c upstream.
+
+By default, the clone(2) syscall spawn a child process into the same
+cgroup as its parent. With the use of the CLONE_INTO_CGROUP flag
+introduced by commit ef2c41cf38a7 ("clone3: allow spawning processes
+into cgroups"), the child will be spawned into a different cgroup which
+is somewhat similar to writing the child's tid into "cgroup.threads".
+
+The current cpuset_fork() method does not properly handle the
+CLONE_INTO_CGROUP case where the cpuset of the child may be different
+from that of its parent. Update the cpuset_fork() method to treat the
+CLONE_INTO_CGROUP case similar to cpuset_attach().
+
+Since the newly cloned task has not been running yet, its actual
+memory usage isn't known. So it is not necessary to make change to mm
+in cpuset_fork().
+
+Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups")
+Reported-by: Giuseppe Scrivano <gscrivan@redhat.com>
+Signed-off-by: Waiman Long <longman@redhat.com>
+Cc: stable@vger.kernel.org # v5.7+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cpuset.c | 62 +++++++++++++++++++++++++++++++++----------------
+ 1 file changed, 42 insertions(+), 20 deletions(-)
+
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -2515,16 +2515,33 @@ static void cpuset_cancel_attach(struct
+ }
+
+ /*
+- * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach()
++ * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task()
+ * but we can't allocate it dynamically there. Define it global and
+ * allocate from cpuset_init().
+ */
+ static cpumask_var_t cpus_attach;
++static nodemask_t cpuset_attach_nodemask_to;
++
++static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task)
++{
++ percpu_rwsem_assert_held(&cpuset_rwsem);
++
++ if (cs != &top_cpuset)
++ guarantee_online_cpus(task, cpus_attach);
++ else
++ cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
++ /*
++ * can_attach beforehand should guarantee that this doesn't
++ * fail. TODO: have a better way to handle failure here
++ */
++ WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
++
++ cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
++ cpuset_update_task_spread_flags(cs, task);
++}
+
+ static void cpuset_attach(struct cgroup_taskset *tset)
+ {
+- /* static buf protected by cpuset_rwsem */
+- static nodemask_t cpuset_attach_nodemask_to;
+ struct task_struct *task;
+ struct task_struct *leader;
+ struct cgroup_subsys_state *css;
+@@ -2555,20 +2572,8 @@ static void cpuset_attach(struct cgroup_
+
+ guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+
+- cgroup_taskset_for_each(task, css, tset) {
+- if (cs != &top_cpuset)
+- guarantee_online_cpus(task, cpus_attach);
+- else
+- cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
+- /*
+- * can_attach beforehand should guarantee that this doesn't
+- * fail. TODO: have a better way to handle failure here
+- */
+- WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
+-
+- cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
+- cpuset_update_task_spread_flags(cs, task);
+- }
++ cgroup_taskset_for_each(task, css, tset)
++ cpuset_attach_task(cs, task);
+
+ /*
+ * Change mm for all threadgroup leaders. This is expensive and may
+@@ -3266,11 +3271,28 @@ static void cpuset_bind(struct cgroup_su
+ */
+ static void cpuset_fork(struct task_struct *task)
+ {
+- if (task_css_is_root(task, cpuset_cgrp_id))
++ struct cpuset *cs;
++ bool same_cs;
++
++ rcu_read_lock();
++ cs = task_cs(task);
++ same_cs = (cs == task_cs(current));
++ rcu_read_unlock();
++
++ if (same_cs) {
++ if (cs == &top_cpuset)
++ return;
++
++ set_cpus_allowed_ptr(task, current->cpus_ptr);
++ task->mems_allowed = current->mems_allowed;
+ return;
++ }
+
+- set_cpus_allowed_ptr(task, current->cpus_ptr);
+- task->mems_allowed = current->mems_allowed;
++ /* CLONE_INTO_CGROUP */
++ percpu_down_write(&cpuset_rwsem);
++ guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
++ cpuset_attach_task(cs, task);
++ percpu_up_write(&cpuset_rwsem);
+ }
+
+ struct cgroup_subsys cpuset_cgrp_subsys = {
--- /dev/null
+From ba9182a89626d5f83c2ee4594f55cb9c1e60f0e2 Mon Sep 17 00:00:00 2001
+From: Waiman Long <longman@redhat.com>
+Date: Tue, 11 Apr 2023 09:35:57 -0400
+Subject: cgroup/cpuset: Wake up cpuset_attach_wq tasks in cpuset_cancel_attach()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Waiman Long <longman@redhat.com>
+
+commit ba9182a89626d5f83c2ee4594f55cb9c1e60f0e2 upstream.
+
+After a successful cpuset_can_attach() call which increments the
+attach_in_progress flag, either cpuset_cancel_attach() or cpuset_attach()
+will be called later. In cpuset_attach(), tasks in cpuset_attach_wq,
+if present, will be woken up at the end. That is not the case in
+cpuset_cancel_attach(). So missed wakeup is possible if the attach
+operation is somehow cancelled. Fix that by doing the wakeup in
+cpuset_cancel_attach() as well.
+
+Fixes: e44193d39e8d ("cpuset: let hotplug propagation work wait for task attaching")
+Signed-off-by: Waiman Long <longman@redhat.com>
+Reviewed-by: Michal Koutný <mkoutny@suse.com>
+Cc: stable@vger.kernel.org # v3.11+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cpuset.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -2502,11 +2502,15 @@ out_unlock:
+ static void cpuset_cancel_attach(struct cgroup_taskset *tset)
+ {
+ struct cgroup_subsys_state *css;
++ struct cpuset *cs;
+
+ cgroup_taskset_first(tset, &css);
++ cs = css_cs(css);
+
+ percpu_down_write(&cpuset_rwsem);
+- css_cs(css)->attach_in_progress--;
++ cs->attach_in_progress--;
++ if (!cs->attach_in_progress)
++ wake_up(&cpuset_attach_wq);
+ percpu_up_write(&cpuset_rwsem);
+ }
+
--- /dev/null
+From fcdb1eda5302599045bb366e679cccb4216f3873 Mon Sep 17 00:00:00 2001
+From: Josh Don <joshdon@google.com>
+Date: Wed, 15 Mar 2023 14:40:29 -0700
+Subject: cgroup: fix display of forceidle time at root
+
+From: Josh Don <joshdon@google.com>
+
+commit fcdb1eda5302599045bb366e679cccb4216f3873 upstream.
+
+We need to reset forceidle_sum to 0 when reading from root, since the
+bstat we accumulate into is stack allocated.
+
+To make this more robust, just replace the existing cputime reset with a
+memset of the overall bstat.
+
+Signed-off-by: Josh Don <joshdon@google.com>
+Fixes: 1fcf54deb767 ("sched/core: add forced idle accounting for cgroups")
+Cc: stable@vger.kernel.org # v6.0+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/rstat.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/kernel/cgroup/rstat.c
++++ b/kernel/cgroup/rstat.c
+@@ -457,9 +457,7 @@ static void root_cgroup_cputime(struct c
+ struct task_cputime *cputime = &bstat->cputime;
+ int i;
+
+- cputime->stime = 0;
+- cputime->utime = 0;
+- cputime->sum_exec_runtime = 0;
++ memset(bstat, 0, sizeof(*bstat));
+ for_each_possible_cpu(i) {
+ struct kernel_cpustat kcpustat;
+ u64 *cpustat = kcpustat.cpustat;
--- /dev/null
+From 85e0689eb6b10cd3b2fb455d1b3f4d4d0b13ff78 Mon Sep 17 00:00:00 2001
+From: Horatio Zhang <Hongkun.Zhang@amd.com>
+Date: Thu, 6 Apr 2023 13:32:14 +0800
+Subject: drm/amd/pm: correct SMU13.0.7 max shader clock reporting
+
+From: Horatio Zhang <Hongkun.Zhang@amd.com>
+
+commit 85e0689eb6b10cd3b2fb455d1b3f4d4d0b13ff78 upstream.
+
+Correct the max shader clock reporting on SMU
+13.0.7.
+
+Signed-off-by: Horatio Zhang <Hongkun.Zhang@amd.com>
+Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org # 6.1.x
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 61 ++++++++++++++++++-
+ 1 file changed, 60 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+@@ -575,6 +575,14 @@ static int smu_v13_0_7_set_default_dpm_t
+ dpm_table);
+ if (ret)
+ return ret;
++
++ if (skutable->DriverReportedClocks.GameClockAc &&
++ (dpm_table->dpm_levels[dpm_table->count - 1].value >
++ skutable->DriverReportedClocks.GameClockAc)) {
++ dpm_table->dpm_levels[dpm_table->count - 1].value =
++ skutable->DriverReportedClocks.GameClockAc;
++ dpm_table->max = skutable->DriverReportedClocks.GameClockAc;
++ }
+ } else {
+ dpm_table->count = 1;
+ dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100;
+@@ -828,6 +836,57 @@ static int smu_v13_0_7_get_smu_metrics_d
+ return ret;
+ }
+
++static int smu_v13_0_7_get_dpm_ultimate_freq(struct smu_context *smu,
++ enum smu_clk_type clk_type,
++ uint32_t *min,
++ uint32_t *max)
++{
++ struct smu_13_0_dpm_context *dpm_context =
++ smu->smu_dpm.dpm_context;
++ struct smu_13_0_dpm_table *dpm_table;
++
++ switch (clk_type) {
++ case SMU_MCLK:
++ case SMU_UCLK:
++ /* uclk dpm table */
++ dpm_table = &dpm_context->dpm_tables.uclk_table;
++ break;
++ case SMU_GFXCLK:
++ case SMU_SCLK:
++ /* gfxclk dpm table */
++ dpm_table = &dpm_context->dpm_tables.gfx_table;
++ break;
++ case SMU_SOCCLK:
++ /* socclk dpm table */
++ dpm_table = &dpm_context->dpm_tables.soc_table;
++ break;
++ case SMU_FCLK:
++ /* fclk dpm table */
++ dpm_table = &dpm_context->dpm_tables.fclk_table;
++ break;
++ case SMU_VCLK:
++ case SMU_VCLK1:
++ /* vclk dpm table */
++ dpm_table = &dpm_context->dpm_tables.vclk_table;
++ break;
++ case SMU_DCLK:
++ case SMU_DCLK1:
++ /* dclk dpm table */
++ dpm_table = &dpm_context->dpm_tables.dclk_table;
++ break;
++ default:
++ dev_err(smu->adev->dev, "Unsupported clock type!\n");
++ return -EINVAL;
++ }
++
++ if (min)
++ *min = dpm_table->min;
++ if (max)
++ *max = dpm_table->max;
++
++ return 0;
++}
++
+ static int smu_v13_0_7_read_sensor(struct smu_context *smu,
+ enum amd_pp_sensors sensor,
+ void *data,
+@@ -1684,7 +1743,7 @@ static const struct pptable_funcs smu_v1
+ .dpm_set_jpeg_enable = smu_v13_0_set_jpeg_enable,
+ .init_pptable_microcode = smu_v13_0_init_pptable_microcode,
+ .populate_umd_state_clk = smu_v13_0_7_populate_umd_state_clk,
+- .get_dpm_ultimate_freq = smu_v13_0_get_dpm_ultimate_freq,
++ .get_dpm_ultimate_freq = smu_v13_0_7_get_dpm_ultimate_freq,
+ .get_vbios_bootup_values = smu_v13_0_get_vbios_bootup_values,
+ .read_sensor = smu_v13_0_7_read_sensor,
+ .feature_is_enabled = smu_cmn_feature_is_enabled,
--- /dev/null
+From f06b8887e3ef4f50098d3a949aef392c529c831a Mon Sep 17 00:00:00 2001
+From: Horatio Zhang <Hongkun.Zhang@amd.com>
+Date: Thu, 6 Apr 2023 11:17:38 +0800
+Subject: drm/amd/pm: correct SMU13.0.7 pstate profiling clock settings
+
+From: Horatio Zhang <Hongkun.Zhang@amd.com>
+
+commit f06b8887e3ef4f50098d3a949aef392c529c831a upstream.
+
+Correct the pstate standard/peak profiling mode clock
+settings for SMU13.0.7.
+
+Signed-off-by: Horatio Zhang <Hongkun.Zhang@amd.com>
+Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org # 6.1.x
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 22 ++++++++++++-------
+ 1 file changed, 15 insertions(+), 7 deletions(-)
+
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+@@ -1329,9 +1329,17 @@ static int smu_v13_0_7_populate_umd_stat
+ &dpm_context->dpm_tables.fclk_table;
+ struct smu_umd_pstate_table *pstate_table =
+ &smu->pstate_table;
++ struct smu_table_context *table_context = &smu->smu_table;
++ PPTable_t *pptable = table_context->driver_pptable;
++ DriverReportedClocks_t driver_clocks =
++ pptable->SkuTable.DriverReportedClocks;
+
+ pstate_table->gfxclk_pstate.min = gfx_table->min;
+- pstate_table->gfxclk_pstate.peak = gfx_table->max;
++ if (driver_clocks.GameClockAc &&
++ (driver_clocks.GameClockAc < gfx_table->max))
++ pstate_table->gfxclk_pstate.peak = driver_clocks.GameClockAc;
++ else
++ pstate_table->gfxclk_pstate.peak = gfx_table->max;
+
+ pstate_table->uclk_pstate.min = mem_table->min;
+ pstate_table->uclk_pstate.peak = mem_table->max;
+@@ -1348,12 +1356,12 @@ static int smu_v13_0_7_populate_umd_stat
+ pstate_table->fclk_pstate.min = fclk_table->min;
+ pstate_table->fclk_pstate.peak = fclk_table->max;
+
+- /*
+- * For now, just use the mininum clock frequency.
+- * TODO: update them when the real pstate settings available
+- */
+- pstate_table->gfxclk_pstate.standard = gfx_table->min;
+- pstate_table->uclk_pstate.standard = mem_table->min;
++ if (driver_clocks.BaseClockAc &&
++ driver_clocks.BaseClockAc < gfx_table->max)
++ pstate_table->gfxclk_pstate.standard = driver_clocks.BaseClockAc;
++ else
++ pstate_table->gfxclk_pstate.standard = gfx_table->max;
++ pstate_table->uclk_pstate.standard = mem_table->max;
+ pstate_table->socclk_pstate.standard = soc_table->min;
+ pstate_table->vclk_pstate.standard = vclk_table->min;
+ pstate_table->dclk_pstate.standard = dclk_table->min;
--- /dev/null
+From b9a24d8bd51e2db425602fa82d7f4c06aa3db852 Mon Sep 17 00:00:00 2001
+From: Evan Quan <evan.quan@amd.com>
+Date: Fri, 7 Apr 2023 17:12:15 +0800
+Subject: drm/amd/pm: correct the pcie link state check for SMU13
+
+From: Evan Quan <evan.quan@amd.com>
+
+commit b9a24d8bd51e2db425602fa82d7f4c06aa3db852 upstream.
+
+Update the driver implementations to fit those data exposed
+by PMFW.
+
+Signed-off-by: Evan Quan <evan.quan@amd.com>
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org # 6.1.x
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 6 ++++++
+ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 4 ++--
+ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 4 ++--
+ 3 files changed, 10 insertions(+), 4 deletions(-)
+
+--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
++++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+@@ -61,6 +61,12 @@
+ #define CTF_OFFSET_HOTSPOT 5
+ #define CTF_OFFSET_MEM 5
+
++static const int pmfw_decoded_link_speed[5] = {1, 2, 3, 4, 5};
++static const int pmfw_decoded_link_width[7] = {0, 1, 2, 4, 8, 12, 16};
++
++#define DECODE_GEN_SPEED(gen_speed_idx) (pmfw_decoded_link_speed[gen_speed_idx])
++#define DECODE_LANE_WIDTH(lane_width_idx) (pmfw_decoded_link_width[lane_width_idx])
++
+ struct smu_13_0_max_sustainable_clocks {
+ uint32_t display_clock;
+ uint32_t phy_clock;
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+@@ -1125,8 +1125,8 @@ static int smu_v13_0_0_print_clk_levels(
+ (pcie_table->pcie_lane[i] == 5) ? "x12" :
+ (pcie_table->pcie_lane[i] == 6) ? "x16" : "",
+ pcie_table->clk_freq[i],
+- ((gen_speed - 1) == pcie_table->pcie_gen[i]) &&
+- (lane_width == link_width[pcie_table->pcie_lane[i]]) ?
++ (gen_speed == DECODE_GEN_SPEED(pcie_table->pcie_gen[i])) &&
++ (lane_width == DECODE_LANE_WIDTH(link_width[pcie_table->pcie_lane[i]])) ?
+ "*" : "");
+ break;
+
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+@@ -1074,8 +1074,8 @@ static int smu_v13_0_7_print_clk_levels(
+ (pcie_table->pcie_lane[i] == 5) ? "x12" :
+ (pcie_table->pcie_lane[i] == 6) ? "x16" : "",
+ pcie_table->clk_freq[i],
+- (gen_speed == pcie_table->pcie_gen[i]) &&
+- (lane_width == pcie_table->pcie_lane[i]) ?
++ (gen_speed == DECODE_GEN_SPEED(pcie_table->pcie_gen[i])) &&
++ (lane_width == DECODE_LANE_WIDTH(pcie_table->pcie_lane[i])) ?
+ "*" : "");
+ break;
+
--- /dev/null
+From c0ff6f6da66a7791a32c0234388b1bdc00244917 Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Tue, 11 Apr 2023 22:42:11 +0200
+Subject: mptcp: fix NULL pointer dereference on fastopen early fallback
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit c0ff6f6da66a7791a32c0234388b1bdc00244917 upstream.
+
+In case of early fallback to TCP, subflow_syn_recv_sock() deletes
+the subflow context before returning the newly allocated sock to
+the caller.
+
+The fastopen path does not cope with the above unconditionally
+dereferencing the subflow context.
+
+Fixes: 36b122baf6a8 ("mptcp: add subflow_v(4,6)_send_synack()")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/fastopen.c | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/net/mptcp/fastopen.c
++++ b/net/mptcp/fastopen.c
+@@ -9,11 +9,18 @@
+ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
+ struct request_sock *req)
+ {
+- struct sock *ssk = subflow->tcp_sock;
+- struct sock *sk = subflow->conn;
++ struct sock *sk, *ssk;
+ struct sk_buff *skb;
+ struct tcp_sock *tp;
+
++ /* on early fallback the subflow context is deleted by
++ * subflow_syn_recv_sock()
++ */
++ if (!subflow)
++ return;
++
++ ssk = subflow->tcp_sock;
++ sk = subflow->conn;
+ tp = tcp_sk(ssk);
+
+ subflow->is_mptfo = 1;
--- /dev/null
+From d6a0443733434408f2cbd4c53fea6910599bab9e Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Tue, 11 Apr 2023 22:42:10 +0200
+Subject: mptcp: stricter state check in mptcp_worker
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit d6a0443733434408f2cbd4c53fea6910599bab9e upstream.
+
+As reported by Christoph, the mptcp protocol can run the
+worker when the relevant msk socket is in an unexpected state:
+
+connect()
+// incoming reset + fastclose
+// the mptcp worker is scheduled
+mptcp_disconnect()
+// msk is now CLOSED
+listen()
+mptcp_worker()
+
+Leading to the following splat:
+
+divide error: 0000 [#1] PREEMPT SMP
+CPU: 1 PID: 21 Comm: kworker/1:0 Not tainted 6.3.0-rc1-gde5e8fd0123c #11
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014
+Workqueue: events mptcp_worker
+RIP: 0010:__tcp_select_window+0x22c/0x4b0 net/ipv4/tcp_output.c:3018
+RSP: 0018:ffffc900000b3c98 EFLAGS: 00010293
+RAX: 000000000000ffd7 RBX: 000000000000ffd7 RCX: 0000000000000000
+RDX: 0000000000000000 RSI: ffffffff8214ce97 RDI: 0000000000000004
+RBP: 000000000000ffd7 R08: 0000000000000004 R09: 0000000000010000
+R10: 000000000000ffd7 R11: ffff888005afa148 R12: 000000000000ffd7
+R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
+FS: 0000000000000000(0000) GS:ffff88803ed00000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000000405270 CR3: 000000003011e006 CR4: 0000000000370ee0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <TASK>
+ tcp_select_window net/ipv4/tcp_output.c:262 [inline]
+ __tcp_transmit_skb+0x356/0x1280 net/ipv4/tcp_output.c:1345
+ tcp_transmit_skb net/ipv4/tcp_output.c:1417 [inline]
+ tcp_send_active_reset+0x13e/0x320 net/ipv4/tcp_output.c:3459
+ mptcp_check_fastclose net/mptcp/protocol.c:2530 [inline]
+ mptcp_worker+0x6c7/0x800 net/mptcp/protocol.c:2705
+ process_one_work+0x3bd/0x950 kernel/workqueue.c:2390
+ worker_thread+0x5b/0x610 kernel/workqueue.c:2537
+ kthread+0x138/0x170 kernel/kthread.c:376
+ ret_from_fork+0x2c/0x50 arch/x86/entry/entry_64.S:308
+ </TASK>
+
+This change addresses the issue explicitly checking for bad states
+before running the mptcp worker.
+
+Fixes: e16163b6e2b7 ("mptcp: refactor shutdown and close")
+Cc: stable@vger.kernel.org
+Reported-by: Christoph Paasch <cpaasch@apple.com>
+Link: https://github.com/multipath-tcp/mptcp_net-next/issues/374
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Tested-by: Christoph Paasch <cpaasch@apple.com>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/protocol.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -2627,7 +2627,7 @@ static void mptcp_worker(struct work_str
+
+ lock_sock(sk);
+ state = sk->sk_state;
+- if (unlikely(state == TCP_CLOSE))
++ if (unlikely((1 << state) & (TCPF_CLOSE | TCPF_LISTEN)))
+ goto unlock;
+
+ mptcp_check_data_fin_ack(sk);
--- /dev/null
+From a5cb752b125766524c921faab1a45cc96065b0a7 Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Tue, 11 Apr 2023 22:42:09 +0200
+Subject: mptcp: use mptcp_schedule_work instead of open-coding it
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit a5cb752b125766524c921faab1a45cc96065b0a7 upstream.
+
+Beyond reducing code duplication this also avoids scheduling
+the mptcp_worker on a closed socket on some edge scenarios.
+
+The addressed issue is actually older than the blamed commit
+below, but this fix needs it as a pre-requisite.
+
+Fixes: ba8f48f7a4d7 ("mptcp: introduce mptcp_schedule_work")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/options.c | 5 ++---
+ net/mptcp/subflow.c | 18 ++++++------------
+ 2 files changed, 8 insertions(+), 15 deletions(-)
+
+--- a/net/mptcp/options.c
++++ b/net/mptcp/options.c
+@@ -1192,9 +1192,8 @@ bool mptcp_incoming_options(struct sock
+ */
+ if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
+ if (mp_opt.data_fin && mp_opt.data_len == 1 &&
+- mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64) &&
+- schedule_work(&msk->work))
+- sock_hold(subflow->conn);
++ mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64))
++ mptcp_schedule_work((struct sock *)msk);
+
+ return true;
+ }
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -407,9 +407,8 @@ void mptcp_subflow_reset(struct sock *ss
+
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
+ tcp_done(ssk);
+- if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) &&
+- schedule_work(&mptcp_sk(sk)->work))
+- return; /* worker will put sk for us */
++ if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags))
++ mptcp_schedule_work(sk);
+
+ sock_put(sk);
+ }
+@@ -1117,8 +1116,8 @@ static enum mapping_status get_mapping_s
+ skb_ext_del(skb, SKB_EXT_MPTCP);
+ return MAPPING_OK;
+ } else {
+- if (updated && schedule_work(&msk->work))
+- sock_hold((struct sock *)msk);
++ if (updated)
++ mptcp_schedule_work((struct sock *)msk);
+
+ return MAPPING_DATA_FIN;
+ }
+@@ -1221,17 +1220,12 @@ static void mptcp_subflow_discard_data(s
+ /* sched mptcp worker to remove the subflow if no more data is pending */
+ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk)
+ {
+- struct sock *sk = (struct sock *)msk;
+-
+ if (likely(ssk->sk_state != TCP_CLOSE))
+ return;
+
+ if (skb_queue_empty(&ssk->sk_receive_queue) &&
+- !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) {
+- sock_hold(sk);
+- if (!schedule_work(&msk->work))
+- sock_put(sk);
+- }
++ !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
++ mptcp_schedule_work((struct sock *)msk);
+ }
+
+ static bool subflow_can_fallback(struct mptcp_subflow_context *subflow)
--- /dev/null
+From 30ba2d09edb5ea857a1473ae3d820911347ada62 Mon Sep 17 00:00:00 2001
+From: Rob Herring <robh@kernel.org>
+Date: Wed, 29 Mar 2023 07:38:35 -0500
+Subject: PCI: Fix use-after-free in pci_bus_release_domain_nr()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Rob Herring <robh@kernel.org>
+
+commit 30ba2d09edb5ea857a1473ae3d820911347ada62 upstream.
+
+Commit c14f7ccc9f5d ("PCI: Assign PCI domain IDs by ida_alloc()")
+introduced a use-after-free bug in the bus removal cleanup. The issue was
+found with kfence:
+
+ [ 19.293351] BUG: KFENCE: use-after-free read in pci_bus_release_domain_nr+0x10/0x70
+
+ [ 19.302817] Use-after-free read at 0x000000007f3b80eb (in kfence-#115):
+ [ 19.309677] pci_bus_release_domain_nr+0x10/0x70
+ [ 19.309691] dw_pcie_host_deinit+0x28/0x78
+ [ 19.309702] tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194]
+ [ 19.309734] tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194]
+ [ 19.309752] platform_probe+0x90/0xd8
+ ...
+
+ [ 19.311457] kfence-#115: 0x00000000063a155a-0x00000000ba698da8, size=1072, cache=kmalloc-2k
+
+ [ 19.311469] allocated by task 96 on cpu 10 at 19.279323s:
+ [ 19.311562] __kmem_cache_alloc_node+0x260/0x278
+ [ 19.311571] kmalloc_trace+0x24/0x30
+ [ 19.311580] pci_alloc_bus+0x24/0xa0
+ [ 19.311590] pci_register_host_bridge+0x48/0x4b8
+ [ 19.311601] pci_scan_root_bus_bridge+0xc0/0xe8
+ [ 19.311613] pci_host_probe+0x18/0xc0
+ [ 19.311623] dw_pcie_host_init+0x2c0/0x568
+ [ 19.311630] tegra_pcie_dw_probe+0x610/0xb28 [pcie_tegra194]
+ [ 19.311647] platform_probe+0x90/0xd8
+ ...
+
+ [ 19.311782] freed by task 96 on cpu 10 at 19.285833s:
+ [ 19.311799] release_pcibus_dev+0x30/0x40
+ [ 19.311808] device_release+0x30/0x90
+ [ 19.311814] kobject_put+0xa8/0x120
+ [ 19.311832] device_unregister+0x20/0x30
+ [ 19.311839] pci_remove_bus+0x78/0x88
+ [ 19.311850] pci_remove_root_bus+0x5c/0x98
+ [ 19.311860] dw_pcie_host_deinit+0x28/0x78
+ [ 19.311866] tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194]
+ [ 19.311883] tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194]
+ [ 19.311900] platform_probe+0x90/0xd8
+ ...
+
+ [ 19.313579] CPU: 10 PID: 96 Comm: kworker/u24:2 Not tainted 6.2.0 #4
+ [ 19.320171] Hardware name: /, BIOS 1.0-d7fb19b 08/10/2022
+ [ 19.325852] Workqueue: events_unbound deferred_probe_work_func
+
+The stack trace is a bit misleading as dw_pcie_host_deinit() doesn't
+directly call pci_bus_release_domain_nr(). The issue turns out to be in
+pci_remove_root_bus() which first calls pci_remove_bus() which frees the
+struct pci_bus when its struct device is released. Then
+pci_bus_release_domain_nr() is called and accesses the freed struct
+pci_bus. Reordering these fixes the issue.
+
+Fixes: c14f7ccc9f5d ("PCI: Assign PCI domain IDs by ida_alloc()")
+Link: https://lore.kernel.org/r/20230329123835.2724518-1-robh@kernel.org
+Link: https://lore.kernel.org/r/b529cb69-0602-9eed-fc02-2f068707a006@nvidia.com
+Reported-by: Jon Hunter <jonathanh@nvidia.com>
+Tested-by: Jon Hunter <jonathanh@nvidia.com>
+Signed-off-by: Rob Herring <robh@kernel.org>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
+Cc: stable@vger.kernel.org # v6.2+
+Cc: Pali Rohár <pali@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/remove.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/pci/remove.c
++++ b/drivers/pci/remove.c
+@@ -157,8 +157,6 @@ void pci_remove_root_bus(struct pci_bus
+ list_for_each_entry_safe(child, tmp,
+ &bus->devices, bus_list)
+ pci_remove_bus_device(child);
+- pci_remove_bus(bus);
+- host_bridge->bus = NULL;
+
+ #ifdef CONFIG_PCI_DOMAINS_GENERIC
+ /* Release domain_nr if it was dynamically allocated */
+@@ -166,6 +164,9 @@ void pci_remove_root_bus(struct pci_bus
+ pci_bus_release_domain_nr(bus, host_bridge->dev.parent);
+ #endif
+
++ pci_remove_bus(bus);
++ host_bridge->bus = NULL;
++
+ /* remove the host bridge */
+ device_del(&host_bridge->dev);
+ }
--- /dev/null
+From 195d8e5da3acb17c5357526494f818a21e97cd10 Mon Sep 17 00:00:00 2001
+From: Reinette Chatre <reinette.chatre@intel.com>
+Date: Wed, 29 Mar 2023 13:13:11 -0700
+Subject: PCI/MSI: Provide missing stub for pci_msix_can_alloc_dyn()
+
+From: Reinette Chatre <reinette.chatre@intel.com>
+
+commit 195d8e5da3acb17c5357526494f818a21e97cd10 upstream.
+
+pci_msix_can_alloc_dyn() is not declared when CONFIG_PCI_MSI is disabled.
+
+There is no existing user of pci_msix_can_alloc_dyn() but work is in
+progress to change this. This work encounters the following error when
+CONFIG_PCI_MSI is disabled:
+
+ drivers/vfio/pci/vfio_pci_intrs.c:427:21: error: implicit declaration of function 'pci_msix_can_alloc_dyn' [-Werror=implicit-function-declaration]
+
+Provide definition for pci_msix_can_alloc_dyn() in preparation for users
+that need to compile when CONFIG_PCI_MSI is disabled.
+
+[bhelgaas: Also reported by Arnd Bergmann <arnd@kernel.org> in
+drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c; added his Fixes: line]
+
+Fixes: fb0a6a268dcd ("net/mlx5: Provide external API for allocating vectors")
+Fixes: 34026364df8e ("PCI/MSI: Provide post-enable dynamic allocation interfaces for MSI-X")
+Link: https://lore.kernel.org/oe-kbuild-all/202303291000.PWFqGCxH-lkp@intel.com/
+Link: https://lore.kernel.org/r/310ecc4815dae4174031062f525245f0755c70e2.1680119924.git.reinette.chatre@intel.com
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
+Cc: stable@vger.kernel.org # v6.2+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/pci.h | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/include/linux/pci.h
++++ b/include/linux/pci.h
+@@ -1623,6 +1623,8 @@ pci_alloc_irq_vectors(struct pci_dev *de
+ flags, NULL);
+ }
+
++static inline bool pci_msix_can_alloc_dyn(struct pci_dev *dev)
++{ return false; }
+ static inline struct msi_map pci_msix_alloc_irq_at(struct pci_dev *dev, unsigned int index,
+ const struct irq_affinity_desc *affdesc)
+ {
--- /dev/null
+From c8e22b7a1694bb8d025ea636816472739d859145 Mon Sep 17 00:00:00 2001
+From: Jiri Kosina <jkosina@suse.cz>
+Date: Tue, 4 Apr 2023 21:23:42 +0200
+Subject: scsi: ses: Handle enclosure with just a primary component gracefully
+
+From: Jiri Kosina <jkosina@suse.cz>
+
+commit c8e22b7a1694bb8d025ea636816472739d859145 upstream.
+
+This reverts commit 3fe97ff3d949 ("scsi: ses: Don't attach if enclosure
+has no components") and introduces proper handling of case where there are
+no detected secondary components, but primary component (enumerated in
+num_enclosures) does exist. That fix was originally proposed by Ding Hui
+<dinghui@sangfor.com.cn>.
+
+Completely ignoring devices that have one primary enclosure and no
+secondary one results in ses_intf_add() bailing completely
+
+ scsi 2:0:0:254: enclosure has no enumerated components
+ scsi 2:0:0:254: Failed to bind enclosure -12ven in valid configurations such
+
+even on valid configurations with 1 primary and 0 secondary enclosures as
+below:
+
+ # sg_ses /dev/sg0
+ 3PARdata SES 3321
+ Supported diagnostic pages:
+ Supported Diagnostic Pages [sdp] [0x0]
+ Configuration (SES) [cf] [0x1]
+ Short Enclosure Status (SES) [ses] [0x8]
+ # sg_ses -p cf /dev/sg0
+ 3PARdata SES 3321
+ Configuration diagnostic page:
+ number of secondary subenclosures: 0
+ generation code: 0x0
+ enclosure descriptor list
+ Subenclosure identifier: 0 [primary]
+ relative ES process id: 0, number of ES processes: 1
+ number of type descriptor headers: 1
+ enclosure logical identifier (hex): 20000002ac02068d
+ enclosure vendor: 3PARdata product: VV rev: 3321
+ type descriptor header and text list
+ Element type: Unspecified, subenclosure id: 0
+ number of possible elements: 1
+
+The changelog for the original fix follows
+
+=====
+We can get a crash when disconnecting the iSCSI session,
+the call trace like this:
+
+ [ffff00002a00fb70] kfree at ffff00000830e224
+ [ffff00002a00fba0] ses_intf_remove at ffff000001f200e4
+ [ffff00002a00fbd0] device_del at ffff0000086b6a98
+ [ffff00002a00fc50] device_unregister at ffff0000086b6d58
+ [ffff00002a00fc70] __scsi_remove_device at ffff00000870608c
+ [ffff00002a00fca0] scsi_remove_device at ffff000008706134
+ [ffff00002a00fcc0] __scsi_remove_target at ffff0000087062e4
+ [ffff00002a00fd10] scsi_remove_target at ffff0000087064c0
+ [ffff00002a00fd70] __iscsi_unbind_session at ffff000001c872c4
+ [ffff00002a00fdb0] process_one_work at ffff00000810f35c
+ [ffff00002a00fe00] worker_thread at ffff00000810f648
+ [ffff00002a00fe70] kthread at ffff000008116e98
+
+In ses_intf_add, components count could be 0, and kcalloc 0 size scomp,
+but not saved in edev->component[i].scratch
+
+In this situation, edev->component[0].scratch is an invalid pointer,
+when kfree it in ses_intf_remove_enclosure, a crash like above would happen
+The call trace also could be other random cases when kfree cannot catch
+the invalid pointer
+
+We should not use edev->component[] array when the components count is 0
+We also need check index when use edev->component[] array in
+ses_enclosure_data_process
+=====
+
+Reported-by: Michal Kolar <mich.k@seznam.cz>
+Originally-by: Ding Hui <dinghui@sangfor.com.cn>
+Cc: stable@vger.kernel.org
+Fixes: 3fe97ff3d949 ("scsi: ses: Don't attach if enclosure has no components")
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Link: https://lore.kernel.org/r/nycvar.YFH.7.76.2304042122270.29760@cbobk.fhfr.pm
+Tested-by: Michal Kolar <mich.k@seznam.cz>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/ses.c | 20 ++++++++------------
+ 1 file changed, 8 insertions(+), 12 deletions(-)
+
+--- a/drivers/scsi/ses.c
++++ b/drivers/scsi/ses.c
+@@ -503,9 +503,6 @@ static int ses_enclosure_find_by_addr(st
+ int i;
+ struct ses_component *scomp;
+
+- if (!edev->component[0].scratch)
+- return 0;
+-
+ for (i = 0; i < edev->components; i++) {
+ scomp = edev->component[i].scratch;
+ if (scomp->addr != efd->addr)
+@@ -596,8 +593,10 @@ static void ses_enclosure_data_process(s
+ components++,
+ type_ptr[0],
+ name);
+- else
++ else if (components < edev->components)
+ ecomp = &edev->component[components++];
++ else
++ ecomp = ERR_PTR(-EINVAL);
+
+ if (!IS_ERR(ecomp)) {
+ if (addl_desc_ptr) {
+@@ -728,11 +727,6 @@ static int ses_intf_add(struct device *c
+ components += type_ptr[1];
+ }
+
+- if (components == 0) {
+- sdev_printk(KERN_WARNING, sdev, "enclosure has no enumerated components\n");
+- goto err_free;
+- }
+-
+ ses_dev->page1 = buf;
+ ses_dev->page1_len = len;
+ buf = NULL;
+@@ -774,9 +768,11 @@ static int ses_intf_add(struct device *c
+ buf = NULL;
+ }
+ page2_not_supported:
+- scomp = kcalloc(components, sizeof(struct ses_component), GFP_KERNEL);
+- if (!scomp)
+- goto err_free;
++ if (components > 0) {
++ scomp = kcalloc(components, sizeof(struct ses_component), GFP_KERNEL);
++ if (!scomp)
++ goto err_free;
++ }
+
+ edev = enclosure_register(cdev->parent, dev_name(&sdev->sdev_gendev),
+ components, &ses_enclosure_callbacks);
--- /dev/null
+From 711ae788cbbb82818531b55e32b09518ee09a11a Mon Sep 17 00:00:00 2001
+From: Matthieu Baerts <matthieu.baerts@tessares.net>
+Date: Tue, 11 Apr 2023 22:42:12 +0200
+Subject: selftests: mptcp: userspace pm: uniform verify events
+
+From: Matthieu Baerts <matthieu.baerts@tessares.net>
+
+commit 711ae788cbbb82818531b55e32b09518ee09a11a upstream.
+
+Simply adding a "sleep" before checking something is usually not a good
+idea because the time that has been picked can not be enough or too
+much. The best is to wait for events with a timeout.
+
+In this selftest, 'sleep 0.5' is used more than 40 times. It is always
+used before calling a 'verify_*' function except for this
+verify_listener_events which has been added later.
+
+At the end, using all these 'sleep 0.5' seems to work: the slow CIs
+don't complain so far. Also because it doesn't take too much time, we
+can just add two more 'sleep 0.5' to uniform what is done before calling
+a 'verify_*' function. For the same reasons, we can also delay a bigger
+refactoring to replace all these 'sleep 0.5' by functions waiting for
+events instead of waiting for a fix time and hope for the best.
+
+Fixes: 6c73008aa301 ("selftests: mptcp: listener test for userspace PM")
+Cc: stable@vger.kernel.org
+Suggested-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/mptcp/userspace_pm.sh | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
++++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
+@@ -884,6 +884,7 @@ test_listener()
+ $client4_port > /dev/null 2>&1 &
+ local listener_pid=$!
+
++ sleep 0.5
+ verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port
+
+ # ADD_ADDR from client to server machine reusing the subflow port
+@@ -899,6 +900,7 @@ test_listener()
+ # Delete the listener from the client ns, if one was created
+ kill_wait $listener_pid
+
++ sleep 0.5
+ verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port
+ }
+
net-sfp-initialize-sfp-i2c_block_size-at-sfp-allocation.patch
net-phy-nxp-c45-tja11xx-add-remove-callback.patch
net-phy-nxp-c45-tja11xx-fix-unsigned-long-multiplication-overflow.patch
+scsi-ses-handle-enclosure-with-just-a-primary-component-gracefully.patch
+thermal-intel-avoid-updating-unsupported-therm_status_clear-mask-bits.patch
+drm-amd-pm-correct-the-pcie-link-state-check-for-smu13.patch
+pci-fix-use-after-free-in-pci_bus_release_domain_nr.patch
+pci-msi-provide-missing-stub-for-pci_msix_can_alloc_dyn.patch
+x86-pci-add-quirk-for-amd-xhci-controller-that-loses-msi-x-state-in-d3hot.patch
+cgroup-fix-display-of-forceidle-time-at-root.patch
+cgroup-cpuset-fix-partition-root-s-cpuset.cpus-update-bug.patch
+cgroup-cpuset-wake-up-cpuset_attach_wq-tasks-in-cpuset_cancel_attach.patch
+cgroup-cpuset-make-cpuset_fork-handle-clone_into_cgroup-properly.patch
+cgroup-cpuset-add-cpuset_can_fork-and-cpuset_cancel_fork-methods.patch
+drm-amd-pm-correct-smu13.0.7-pstate-profiling-clock-settings.patch
+drm-amd-pm-correct-smu13.0.7-max-shader-clock-reporting.patch
+mptcp-use-mptcp_schedule_work-instead-of-open-coding-it.patch
+mptcp-stricter-state-check-in-mptcp_worker.patch
+mptcp-fix-null-pointer-dereference-on-fastopen-early-fallback.patch
+selftests-mptcp-userspace-pm-uniform-verify-events.patch
--- /dev/null
+From 117e4e5bd9d47b89777dbf6b37a709dcfe59520f Mon Sep 17 00:00:00 2001
+From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Date: Mon, 10 Apr 2023 10:35:01 -0700
+Subject: thermal: intel: Avoid updating unsupported THERM_STATUS_CLEAR mask bits
+
+From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+
+commit 117e4e5bd9d47b89777dbf6b37a709dcfe59520f upstream.
+
+Some older processors don't allow BIT(13) and BIT(15) in the current
+mask set by "THERM_STATUS_CLEAR_CORE_MASK". This results in:
+
+unchecked MSR access error: WRMSR to 0x19c (tried to
+write 0x000000000000aaa8) at rIP: 0xffffffff816f66a6
+(throttle_active_work+0xa6/0x1d0)
+
+To avoid unchecked MSR issues, check CPUID for each relevant feature and
+use that information to set the supported feature bits only in the
+"clear" mask for cores. Do the same for the analogous package mask set
+by "THERM_STATUS_CLEAR_PKG_MASK".
+
+Introduce functions thermal_intr_init_core_clear_mask() and
+thermal_intr_init_pkg_clear_mask() to set core and package mask bits,
+respectively. These functions are called during initialization.
+
+Fixes: 6fe1e64b6026 ("thermal: intel: Prevent accidental clearing of HFI status")
+Reported-by: Rui Salvaterra <rsalvaterra@gmail.com>
+Link: https://lore.kernel.org/lkml/cdf43fb423368ee3994124a9e8c9b4f8d00712c6.camel@linux.intel.com/T/
+Tested-by: Rui Salvaterra <rsalvaterra@gmail.com>
+Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Cc: 6.2+ <stable@kernel.org> # 6.2+
+[ rjw: Renamed 2 funtions and 2 static variables, edited subject and
+ changelog ]
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/thermal/intel/therm_throt.c | 73 ++++++++++++++++++++++++++++++++----
+ 1 file changed, 66 insertions(+), 7 deletions(-)
+
+--- a/drivers/thermal/intel/therm_throt.c
++++ b/drivers/thermal/intel/therm_throt.c
+@@ -193,8 +193,67 @@ static const struct attribute_group ther
+ #define THERM_THROT_POLL_INTERVAL HZ
+ #define THERM_STATUS_PROCHOT_LOG BIT(1)
+
+-#define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15))
+-#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11))
++static u64 therm_intr_core_clear_mask;
++static u64 therm_intr_pkg_clear_mask;
++
++static void thermal_intr_init_core_clear_mask(void)
++{
++ if (therm_intr_core_clear_mask)
++ return;
++
++ /*
++ * Reference: Intel SDM Volume 4
++ * "Table 2-2. IA-32 Architectural MSRs", MSR 0x19C
++ * IA32_THERM_STATUS.
++ */
++
++ /*
++ * Bit 1, 3, 5: CPUID.01H:EDX[22] = 1. This driver will not
++ * enable interrupts, when 0 as it checks for X86_FEATURE_ACPI.
++ */
++ therm_intr_core_clear_mask = (BIT(1) | BIT(3) | BIT(5));
++
++ /*
++ * Bit 7 and 9: Thermal Threshold #1 and #2 log
++ * If CPUID.01H:ECX[8] = 1
++ */
++ if (boot_cpu_has(X86_FEATURE_TM2))
++ therm_intr_core_clear_mask |= (BIT(7) | BIT(9));
++
++ /* Bit 11: Power Limitation log (R/WC0) If CPUID.06H:EAX[4] = 1 */
++ if (boot_cpu_has(X86_FEATURE_PLN))
++ therm_intr_core_clear_mask |= BIT(11);
++
++ /*
++ * Bit 13: Current Limit log (R/WC0) If CPUID.06H:EAX[7] = 1
++ * Bit 15: Cross Domain Limit log (R/WC0) If CPUID.06H:EAX[7] = 1
++ */
++ if (boot_cpu_has(X86_FEATURE_HWP))
++ therm_intr_core_clear_mask |= (BIT(13) | BIT(15));
++}
++
++static void thermal_intr_init_pkg_clear_mask(void)
++{
++ if (therm_intr_pkg_clear_mask)
++ return;
++
++ /*
++ * Reference: Intel SDM Volume 4
++ * "Table 2-2. IA-32 Architectural MSRs", MSR 0x1B1
++ * IA32_PACKAGE_THERM_STATUS.
++ */
++
++ /* All bits except BIT 26 depend on CPUID.06H: EAX[6] = 1 */
++ if (boot_cpu_has(X86_FEATURE_PTS))
++ therm_intr_pkg_clear_mask = (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11));
++
++ /*
++ * Intel SDM Volume 2A: Thermal and Power Management Leaf
++ * Bit 26: CPUID.06H: EAX[19] = 1
++ */
++ if (boot_cpu_has(X86_FEATURE_HFI))
++ therm_intr_pkg_clear_mask |= BIT(26);
++}
+
+ /*
+ * Clear the bits in package thermal status register for bit = 1
+@@ -207,13 +266,10 @@ void thermal_clear_package_intr_status(i
+
+ if (level == CORE_LEVEL) {
+ msr = MSR_IA32_THERM_STATUS;
+- msr_val = THERM_STATUS_CLEAR_CORE_MASK;
++ msr_val = therm_intr_core_clear_mask;
+ } else {
+ msr = MSR_IA32_PACKAGE_THERM_STATUS;
+- msr_val = THERM_STATUS_CLEAR_PKG_MASK;
+- if (boot_cpu_has(X86_FEATURE_HFI))
+- msr_val |= BIT(26);
+-
++ msr_val = therm_intr_pkg_clear_mask;
+ }
+
+ msr_val &= ~bit_mask;
+@@ -708,6 +764,9 @@ void intel_init_thermal(struct cpuinfo_x
+ h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
+ apic_write(APIC_LVTTHMR, h);
+
++ thermal_intr_init_core_clear_mask();
++ thermal_intr_init_pkg_clear_mask();
++
+ rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
+ if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
+ wrmsr(MSR_IA32_THERM_INTERRUPT,
--- /dev/null
+From f195fc1e9715ba826c3b62d58038f760f66a4fe9 Mon Sep 17 00:00:00 2001
+From: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+Date: Wed, 29 Mar 2023 22:58:59 +0530
+Subject: x86/PCI: Add quirk for AMD XHCI controller that loses MSI-X state in D3hot
+
+From: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+
+commit f195fc1e9715ba826c3b62d58038f760f66a4fe9 upstream.
+
+The AMD [1022:15b8] USB controller loses some internal functional MSI-X
+context when transitioning from D0 to D3hot. BIOS normally traps D0->D3hot
+and D3hot->D0 transitions so it can save and restore that internal context,
+but some firmware in the field can't do this because it fails to clear the
+AMD_15B8_RCC_DEV2_EPF0_STRAP2 NO_SOFT_RESET bit.
+
+Clear AMD_15B8_RCC_DEV2_EPF0_STRAP2 NO_SOFT_RESET bit before USB controller
+initialization during boot.
+
+Link: https://lore.kernel.org/linux-usb/Y%2Fz9GdHjPyF2rNG3@glanzmann.de/T/#u
+Link: https://lore.kernel.org/r/20230329172859.699743-1-Basavaraj.Natikar@amd.com
+Reported-by: Thomas Glanzmann <thomas@glanzmann.de>
+Tested-by: Thomas Glanzmann <thomas@glanzmann.de>
+Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/pci/fixup.c | 21 +++++++++++++++++++++
+ 1 file changed, 21 insertions(+)
+
+--- a/arch/x86/pci/fixup.c
++++ b/arch/x86/pci/fixup.c
+@@ -7,6 +7,7 @@
+ #include <linux/dmi.h>
+ #include <linux/pci.h>
+ #include <linux/vgaarb.h>
++#include <asm/amd_nb.h>
+ #include <asm/hpet.h>
+ #include <asm/pci_x86.h>
+
+@@ -824,3 +825,23 @@ static void rs690_fix_64bit_dma(struct p
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma);
+
+ #endif
++
++#ifdef CONFIG_AMD_NB
++
++#define AMD_15B8_RCC_DEV2_EPF0_STRAP2 0x10136008
++#define AMD_15B8_RCC_DEV2_EPF0_STRAP2_NO_SOFT_RESET_DEV2_F0_MASK 0x00000080L
++
++static void quirk_clear_strap_no_soft_reset_dev2_f0(struct pci_dev *dev)
++{
++ u32 data;
++
++ if (!amd_smn_read(0, AMD_15B8_RCC_DEV2_EPF0_STRAP2, &data)) {
++ data &= ~AMD_15B8_RCC_DEV2_EPF0_STRAP2_NO_SOFT_RESET_DEV2_F0_MASK;
++ if (amd_smn_write(0, AMD_15B8_RCC_DEV2_EPF0_STRAP2, data))
++ pci_err(dev, "Failed to write data 0x%x\n", data);
++ } else {
++ pci_err(dev, "Failed to read data\n");
++ }
++}
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15b8, quirk_clear_strap_no_soft_reset_dev2_f0);
++#endif